From 01117ffa3659586ebb7e5023520f5a84b0329689 Mon Sep 17 00:00:00 2001 From: Dylan Starink Date: Fri, 6 Mar 2026 12:05:07 -0800 Subject: [PATCH 01/14] feat(project): report generation --- .../alembic/versions/869cfd49ebd5_initial.py | 1 + .../ai_analysis/ai_analysis.py | 16 +- .../context_manager/database.py | 13 + .../context_manager/models.py | 1 + .../context_manager/question_categories.py | 51 ++ .../session_context_manager.py | 1 + .../context_manager/tests/test_database.py | 52 ++- .../interview_helper/context_manager/types.py | 1 + .../interview_helper/downloads/get_report.py | 441 ++++++++++++++++++ .../tests/test_report_generation.py | 186 ++++++++ backend/src/long_run_eval.py | 1 + backend/src/main.py | 30 ++ .../audio-sender/TranscriptView.tsx | 20 + frontend/src/lib/api.ts | 37 ++ frontend/src/lib/message.ts | 1 + 15 files changed, 848 insertions(+), 4 deletions(-) create mode 100644 backend/src/interview_helper/context_manager/question_categories.py create mode 100644 backend/src/interview_helper/downloads/get_report.py create mode 100644 backend/src/interview_helper/tests/test_report_generation.py diff --git a/backend/alembic/versions/869cfd49ebd5_initial.py b/backend/alembic/versions/869cfd49ebd5_initial.py index c67a692..597e0ae 100644 --- a/backend/alembic/versions/869cfd49ebd5_initial.py +++ b/backend/alembic/versions/869cfd49ebd5_initial.py @@ -115,6 +115,7 @@ def upgrade() -> None: sa.Column("analysis_id", sa.String(length=26), nullable=False), sa.Column("project_id", sa.String(length=26), nullable=False), sa.Column("text", sa.Text(), nullable=False), + sa.Column("category_code", sa.String(length=1), nullable=False), sa.Column("span", sa.Text(), nullable=True), sa.Column("transcript_span_id", sa.String(length=26), nullable=True), sa.Column("transcript_context_start", sa.String(length=26), nullable=False), diff --git a/backend/src/interview_helper/ai_analysis/ai_analysis.py b/backend/src/interview_helper/ai_analysis/ai_analysis.py index 5b4aecd..bf13156 100644 --- a/backend/src/interview_helper/ai_analysis/ai_analysis.py +++ b/backend/src/interview_helper/ai_analysis/ai_analysis.py @@ -18,6 +18,10 @@ ProjectId, TranscriptId, ) +from interview_helper.context_manager.question_categories import ( + QUESTION_CATEGORIES, + normalize_question_category_code, +) from langchain_openai import AzureChatOpenAI from langchain.tools import ToolRuntime, tool # pyright: ignore[reportUnknownVariableType] from langchain.agents import create_agent # pyright: ignore[reportUnknownVariableType] @@ -40,6 +44,7 @@ class Question(BaseModel): question: str grounding_span: str + category_code: str class Analysis(BaseModel): @@ -55,7 +60,11 @@ class ProjectContext: class SimpleAnalyzer: """Simple LLM-based interview analyzer.""" - SYSTEM_PROMPT: str = dedent("""\ + CATEGORY_PROMPT_BLOCK: str = "\n".join( + [f" - {code}: {label}" for code, label in QUESTION_CATEGORIES] + ) + + SYSTEM_PROMPT: str = dedent(f"""\ ROLE: Interview Follow-Up Generator for SAR Profiles You will receive a chunk of transcript from an in-depth profile interview for a Search and Rescue operation. @@ -79,8 +88,12 @@ class SimpleAnalyzer: 7) Output: ONE to THREE questions, each with: - question (string) - grounding_span (short verbatim quote from the transcript) + - category_code (single letter in B-W from list below) As well as a brief summary of the entire situation so far, based on your knowledge. + Category codes: +{CATEGORY_PROMPT_BLOCK} + ALWAYS use the provided TOOLS to check for duplicates or gather more context from the transcript history before finalizing your questions. @@ -246,6 +259,7 @@ def clean_grounding_span(span: str) -> str: AIQuestion( question=q.question, grounding_span=clean_grounding_span(q.grounding_span), + category_code=normalize_question_category_code(q.category_code), ) for q in analysis.questions ] diff --git a/backend/src/interview_helper/context_manager/database.py b/backend/src/interview_helper/context_manager/database.py index 3a049b9..ebed376 100644 --- a/backend/src/interview_helper/context_manager/database.py +++ b/backend/src/interview_helper/context_manager/database.py @@ -9,6 +9,9 @@ SessionId, TranscriptId, ) +from interview_helper.context_manager.question_categories import ( + normalize_question_category_code, +) from interview_helper.context_manager.types import UserId from alembic.config import Config from alembic import command @@ -510,6 +513,7 @@ def get_project_creator_and_name( class AnalysisRow(BaseModel): analysis_id: str text: str + category_code: str span: str | None transcript_span_id: TranscriptId | None tag: Literal["starred", "dismissed", "starred_dismissed"] | None @@ -533,6 +537,7 @@ def get_all_ai_analyses( sa.select( models.AIAnalysis.analysis_id, models.AIAnalysis.text, + models.AIAnalysis.category_code, models.AIAnalysis.span, models.AIAnalysis.transcript_span_id, models.AIAnalysis.transcript_context_start, @@ -551,6 +556,7 @@ def get_all_ai_analyses( sa.select( subq.c.analysis_id, subq.c.text, + subq.c.category_code, subq.c.span, subq.c.transcript_span_id, subq.c.transcript_context_start, @@ -567,6 +573,7 @@ def get_all_ai_analyses( AnalysisRow( analysis_id=row.analysis_id, # pyright: ignore[reportAny] text=row.text, # pyright: ignore[reportAny] + category_code=row.category_code, # pyright: ignore[reportAny] span=row.span, # pyright: ignore[reportAny] transcript_span_id=TranscriptId.from_str(row.transcript_span_id) # pyright: ignore[reportAny] if row.transcript_span_id # pyright: ignore[reportAny] @@ -603,6 +610,7 @@ def get_analyses_by_ids( sa.select( models.AIAnalysis.analysis_id, models.AIAnalysis.text, + models.AIAnalysis.category_code, models.AIAnalysis.span, models.AIAnalysis.transcript_span_id, models.AIAnalysis.transcript_context_start, @@ -621,6 +629,7 @@ def get_analyses_by_ids( sa.select( subq.c.analysis_id, subq.c.text, + subq.c.category_code, subq.c.span, subq.c.transcript_span_id, subq.c.transcript_context_start, @@ -640,6 +649,7 @@ def get_analyses_by_ids( row.analysis_id: AnalysisRow( # pyright: ignore[reportAny] analysis_id=row.analysis_id, # pyright: ignore[reportAny] text=row.text, # pyright: ignore[reportAny] + category_code=row.category_code, # pyright: ignore[reportAny] span=row.span, # pyright: ignore[reportAny] tag=row.tag, # pyright: ignore[reportAny] transcript_context_start=TranscriptId.from_str( @@ -707,6 +717,7 @@ def add_ai_analysis( db: PersistentDatabase, project_id: ProjectId, text: str, + category_code: str, span: str | None, transcript_span_id: TranscriptId | None, transcript_context_start: TranscriptId, @@ -717,6 +728,7 @@ def add_ai_analysis( Adds a transcription result, returns the analysis ID """ analysis_id = str(ULID()).lower() + normalized_category_code = normalize_question_category_code(category_code) with db.begin() as conn: assert conn.execute( sa.insert(models.AIAnalysis), @@ -724,6 +736,7 @@ def add_ai_analysis( "analysis_id": analysis_id, "project_id": str(project_id), "text": text, + "category_code": normalized_category_code, "span": span, "transcript_span_id": str(transcript_span_id) if transcript_span_id diff --git a/backend/src/interview_helper/context_manager/models.py b/backend/src/interview_helper/context_manager/models.py index 704ef2c..1736be5 100644 --- a/backend/src/interview_helper/context_manager/models.py +++ b/backend/src/interview_helper/context_manager/models.py @@ -80,6 +80,7 @@ class AIAnalysis(Base): ) text: Mapped[str] = mapped_column(sa.Text, nullable=False) + category_code: Mapped[str] = mapped_column(sa.String(1), nullable=False) span: Mapped[str] = mapped_column(sa.Text, nullable=True) diff --git a/backend/src/interview_helper/context_manager/question_categories.py b/backend/src/interview_helper/context_manager/question_categories.py new file mode 100644 index 0000000..7180ea6 --- /dev/null +++ b/backend/src/interview_helper/context_manager/question_categories.py @@ -0,0 +1,51 @@ +from typing import Final + +# Missing "A" is intentional; category list starts at "B". +QUESTION_CATEGORIES: Final[tuple[tuple[str, str], ...]] = ( + ("B", "Source Information (person being interviewed)"), + ("C", "General Missing Person Information"), + ("D", "Physical Description"), + ("E", "Clothing"), + ("F", "Health / General & Emotional Condition"), + ("G", "Last Known location / Point last seen"), + ( + "H", + "Summary of Events leading up to and following MP's Disappearance", + ), + ("I", "Trip plans of Subject"), + ("J", "Outdoor Experience"), + ("K", "Habits / Personality / Behavior Preferences"), + ("L", "Outdoor Equipment"), + ("M", "Contacts Person Might Make Upon Reaching Civilization"), + ("N", "Electronic Devices"), + ("O", "Family, Friends, and Press Relations"), + ("P", "Other Information"), + ("Q", "Groups Overdue / Dynamics"), + ("R", "Child / Adolescent"), + ("S", "Autistic Spectrum"), + ("T", "Cognitively Impaired / Intellectual Disability"), + ("U", "Depressed / Despondent / Possible Suicidal"), + ("V", "Exhibiting Psychotic Behavior"), + ("W", "Exhibiting Signs of Dementia or Alzheimer's"), +) + +VALID_QUESTION_CATEGORY_CODES: Final[frozenset[str]] = frozenset( + code for code, _ in QUESTION_CATEGORIES +) +DEFAULT_QUESTION_CATEGORY_CODE: Final[str] = "P" + +QUESTION_CATEGORY_LABELS: Final[dict[str, str]] = dict(QUESTION_CATEGORIES) +QUESTION_CATEGORY_ORDER: Final[tuple[str, ...]] = tuple( + code for code, _ in QUESTION_CATEGORIES +) + + +def normalize_question_category_code(category_code: str | None) -> str: + if category_code is None: + return DEFAULT_QUESTION_CATEGORY_CODE + + normalized = category_code.strip().upper() + if normalized in VALID_QUESTION_CATEGORY_CODES: + return normalized + + return DEFAULT_QUESTION_CATEGORY_CODE diff --git a/backend/src/interview_helper/context_manager/session_context_manager.py b/backend/src/interview_helper/context_manager/session_context_manager.py index 158d170..23f953e 100644 --- a/backend/src/interview_helper/context_manager/session_context_manager.py +++ b/backend/src/interview_helper/context_manager/session_context_manager.py @@ -557,6 +557,7 @@ async def _worker( self.db, project_id=job.project_id, text=result.question, + category_code=result.category_code, span=result.grounding_span, transcript_span_id=transcript_span_id, transcript_context_start=results.transcript_context_start, diff --git a/backend/src/interview_helper/context_manager/tests/test_database.py b/backend/src/interview_helper/context_manager/tests/test_database.py index 9aa0a33..d6cce4d 100644 --- a/backend/src/interview_helper/context_manager/tests/test_database.py +++ b/backend/src/interview_helper/context_manager/tests/test_database.py @@ -1,6 +1,15 @@ -from interview_helper.context_manager.database import get_user_by_id -from interview_helper.context_manager.database import get_or_add_user_by_oidc_id -from interview_helper.context_manager.database import PersistentDatabase +from ulid import ULID +from interview_helper.context_manager.database import ( + PersistentDatabase, + add_ai_analysis, + add_transcription, + create_new_project, + create_session, + get_all_ai_analyses, + get_or_add_user_by_oidc_id, + get_user_by_id, +) +from interview_helper.context_manager.types import ProjectId, SessionId, TranscriptId import sqlalchemy as sa import pytest @@ -46,3 +55,40 @@ def test_user_addition(): assert added_user.user_id == added_user2.user_id == added_user3.user_id assert added_user.oidc_id == added_user2.oidc_id == added_user3.oidc_id + + +def test_add_ai_analysis_normalizes_invalid_category_code_to_default(): + db = PersistentDatabase.new_in_memory() + user = get_or_add_user_by_oidc_id(db, "oidc-1", "User One") + + project = create_new_project(db, user.user_id, "P1") + project_id = ProjectId.from_str(project["id"]) + session_id = SessionId(ULID()) + create_session(db, session_id, project_id, user.user_id) + + transcript_id = TranscriptId.from_str( + add_transcription( + db=db, + user_id=user.user_id, + session_id=session_id, + project_id=project_id, + text="Sample transcript", + speaker="Speaker-1", + ) + ) + + _ = add_ai_analysis( + db=db, + project_id=project_id, + text="What time did they leave?", + category_code="INVALID", + span="they left at sunrise", + transcript_span_id=transcript_id, + transcript_context_start=transcript_id, + transcript_context_end=transcript_id, + summary="Summary", + ) + + rows = get_all_ai_analyses(db, project_id) + assert len(rows) == 1 + assert rows[0].category_code == "P" diff --git a/backend/src/interview_helper/context_manager/types.py b/backend/src/interview_helper/context_manager/types.py index e1e82f5..926705f 100644 --- a/backend/src/interview_helper/context_manager/types.py +++ b/backend/src/interview_helper/context_manager/types.py @@ -121,6 +121,7 @@ class AIJob: class AIQuestion: question: str grounding_span: str + category_code: str @dataclass(frozen=True) diff --git a/backend/src/interview_helper/downloads/get_report.py b/backend/src/interview_helper/downloads/get_report.py new file mode 100644 index 0000000..6936d2d --- /dev/null +++ b/backend/src/interview_helper/downloads/get_report.py @@ -0,0 +1,441 @@ +from __future__ import annotations + +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime, timedelta, timezone +from io import BytesIO +from collections.abc import Sequence +from typing import cast +from xml.sax.saxutils import escape + +from reportlab.lib.pagesizes import letter +from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet +from reportlab.lib.units import inch +from reportlab.platypus import PageBreak, Paragraph, SimpleDocTemplate, Spacer +from reportlab.platypus.flowables import Flowable + +from interview_helper.context_manager.database import ( + AnalysisRow, + PersistentDatabase, + TranscriptionWithProjectDetails, + get_all_ai_analyses, + get_all_transcriptions_for_project, +) +from interview_helper.context_manager.question_categories import ( + QUESTION_CATEGORY_LABELS, + QUESTION_CATEGORY_ORDER, + normalize_question_category_code, +) +from interview_helper.context_manager.types import ProjectId, TranscriptId +from interview_helper.downloads.util import extract_timestamp_from_ulid + + +@dataclass +class ReportQuestionEntry: + analysis_id: str + ordinal: int + text: str + category_code: str + span: str | None + question_anchor: str + context_anchor: str | None + answered_at_anchor: str | None + answered_at_text: str | None + + +@dataclass +class ReportTranscriptSection: + anchor: str + speaker: str + text: str + started_at: datetime + chunk_ids: list[str] = field(default_factory=list) + answered_question_refs: list[tuple[int, str]] = field(default_factory=list) + + +@dataclass +class ReportData: + project_name: str + start_time: datetime + total_duration: timedelta + answered_by_category: dict[str, list[ReportQuestionEntry]] + unanswered_by_category: dict[str, list[ReportQuestionEntry]] + transcript_sections: list[ReportTranscriptSection] + + +@dataclass +class _TranscriptAnchorIndex: + sections: list[ReportTranscriptSection] + chunk_to_section_anchor: dict[str, str] + section_by_anchor: dict[str, ReportTranscriptSection] + + +def _format_utc(ts: datetime) -> str: + return ts.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC") + + +def _format_duration_hms(duration: timedelta) -> str: + total_seconds = int(max(duration.total_seconds(), 0)) + hours, rem = divmod(total_seconds, 3600) + minutes, seconds = divmod(rem, 60) + return f"{hours}h {minutes}m {seconds}s" + + +def _ordered_category_items( + grouped: dict[str, list[ReportQuestionEntry]], +) -> list[tuple[str, list[ReportQuestionEntry]]]: + items: list[tuple[str, list[ReportQuestionEntry]]] = [] + for code in QUESTION_CATEGORY_ORDER: + rows = grouped.get(code, []) + if rows: + items.append((code, rows)) + return items + + +def _build_transcript_anchor_index( + transcript_rows: Sequence[TranscriptionWithProjectDetails], +) -> _TranscriptAnchorIndex: + sections: list[ReportTranscriptSection] = [] + chunk_to_section_anchor: dict[str, str] = {} + + current_speaker: str | None = None + current_texts: list[str] = [] + current_chunk_ids: list[str] = [] + current_started_at: datetime | None = None + + def flush_current() -> None: + nonlocal current_speaker, current_texts, current_chunk_ids, current_started_at + + if ( + current_speaker is None + or current_started_at is None + or len(current_chunk_ids) == 0 + ): + return + + anchor = f"transcript-{len(sections) + 1}" + section = ReportTranscriptSection( + anchor=anchor, + speaker=current_speaker, + text=" ".join(current_texts).strip(), + started_at=current_started_at, + chunk_ids=[*current_chunk_ids], + ) + sections.append(section) + + for chunk_id in current_chunk_ids: + chunk_to_section_anchor[chunk_id] = anchor + + current_speaker = None + current_texts = [] + current_chunk_ids = [] + current_started_at = None + + for row in transcript_rows: + transcription_id = str(row["transcription_id"]) + speaker = str(row["speaker"] or "Unknown Speaker") + text = str(row["text_output"] or "").strip() + timestamp = extract_timestamp_from_ulid(transcription_id) + + if current_speaker is None: + current_speaker = speaker + current_started_at = timestamp + current_chunk_ids = [transcription_id] + current_texts = [text] + continue + + if speaker == current_speaker: + current_chunk_ids.append(transcription_id) + current_texts.append(text) + continue + + flush_current() + current_speaker = speaker + current_started_at = timestamp + current_chunk_ids = [transcription_id] + current_texts = [text] + + flush_current() + + section_by_anchor = {section.anchor: section for section in sections} + return _TranscriptAnchorIndex( + sections=sections, + chunk_to_section_anchor=chunk_to_section_anchor, + section_by_anchor=section_by_anchor, + ) + + +def _compute_total_duration( + transcript_rows: Sequence[TranscriptionWithProjectDetails], +) -> timedelta: + if not transcript_rows: + return timedelta(0) + + per_session_bounds: dict[str, tuple[datetime, datetime]] = {} + + for row in transcript_rows: + session_id = str(row["session_id"]) + timestamp = extract_timestamp_from_ulid(str(row["transcription_id"])) + + previous = per_session_bounds.get(session_id) + if previous is None: + per_session_bounds[session_id] = (timestamp, timestamp) + continue + + min_ts, max_ts = previous + if timestamp < min_ts: + min_ts = timestamp + if timestamp > max_ts: + max_ts = timestamp + per_session_bounds[session_id] = (min_ts, max_ts) + + total = timedelta(0) + for min_ts, max_ts in per_session_bounds.values(): + total += max_ts - min_ts + + return total + + +def _analysis_context_anchor( + analysis: AnalysisRow, chunk_to_section_anchor: dict[str, str] +) -> str | None: + if analysis.transcript_span_id is not None: + span_anchor = chunk_to_section_anchor.get(str(analysis.transcript_span_id)) + if span_anchor is not None: + return span_anchor + + start_anchor = chunk_to_section_anchor.get(str(analysis.transcript_context_start)) + if start_anchor is not None: + return start_anchor + + return chunk_to_section_anchor.get(str(analysis.transcript_context_end)) + + +def build_report_data(project_id: str, db: PersistentDatabase) -> ReportData | None: + typed_project_id = ProjectId.from_str(project_id) + + transcript_rows = get_all_transcriptions_for_project(db, typed_project_id) + if not transcript_rows: + return None + + anchor_index = _build_transcript_anchor_index(transcript_rows) + analyses = get_all_ai_analyses(db, typed_project_id) + + answered_by_category: dict[str, list[ReportQuestionEntry]] = defaultdict(list) + unanswered_by_category: dict[str, list[ReportQuestionEntry]] = defaultdict(list) + + for analysis in analyses: + normalized_category = normalize_question_category_code(analysis.category_code) + + answered_anchor: str | None = None + answered_at_text: str | None = None + + if analysis.asked_at_transcript_id is not None: + asked_at_id = analysis.asked_at_transcript_id.lower() + answered_anchor = anchor_index.chunk_to_section_anchor.get(asked_at_id) + asked_at_timestamp = TranscriptId.from_str(asked_at_id).get_datetime() + answered_at_text = _format_utc(asked_at_timestamp) + + question_anchor = f"question-{analysis.ordinal}" + + entry = ReportQuestionEntry( + analysis_id=analysis.analysis_id, + ordinal=analysis.ordinal, + text=analysis.text, + category_code=normalized_category, + span=analysis.span, + question_anchor=question_anchor, + context_anchor=_analysis_context_anchor( + analysis, anchor_index.chunk_to_section_anchor + ), + answered_at_anchor=answered_anchor, + answered_at_text=answered_at_text, + ) + + if analysis.was_asked is True: + answered_by_category[normalized_category].append(entry) + if answered_anchor is not None: + section = anchor_index.section_by_anchor.get(answered_anchor) + if section is not None: + section.answered_question_refs.append( + (entry.ordinal, entry.question_anchor) + ) + else: + unanswered_by_category[normalized_category].append(entry) + + for section in anchor_index.sections: + section.answered_question_refs.sort(key=lambda item: item[0]) + + project_name = str(transcript_rows[0]["project_name"] or "Untitled Project") + first_timestamp = extract_timestamp_from_ulid( + str(transcript_rows[0]["transcription_id"]) + ) + + return ReportData( + project_name=project_name, + start_time=first_timestamp, + total_duration=_compute_total_duration(transcript_rows), + answered_by_category=dict(answered_by_category), + unanswered_by_category=dict(unanswered_by_category), + transcript_sections=anchor_index.sections, + ) + + +def _render_question_sections( + story: list[Flowable], + title: str, + grouped_questions: dict[str, list[ReportQuestionEntry]], + normal_style: ParagraphStyle, + heading_style: ParagraphStyle, +) -> None: + story.append(Paragraph(escape(title), heading_style)) + story.append(Spacer(1, 0.15 * inch)) + + ordered_groups = _ordered_category_items(grouped_questions) + if len(ordered_groups) == 0: + story.append(Paragraph("No questions available.", normal_style)) + return + + for category_code, entries in ordered_groups: + category_label = QUESTION_CATEGORY_LABELS.get(category_code, "Unknown") + story.append( + Paragraph( + f"{escape(category_code)}. {escape(category_label)}", + normal_style, + ) + ) + story.append(Spacer(1, 0.08 * inch)) + + for entry in entries: + story.append( + Paragraph( + f'Q{entry.ordinal}. {escape(entry.text)}', + normal_style, + ) + ) + + if entry.span: + escaped_span = escape(entry.span) + if entry.context_anchor: + story.append( + Paragraph( + f'Context: "{escaped_span}"', + normal_style, + ) + ) + else: + story.append(Paragraph(f'Context: "{escaped_span}"', normal_style)) + + if entry.answered_at_text is not None: + if entry.answered_at_anchor: + story.append( + Paragraph( + f'Answered At: {escape(entry.answered_at_text)}', + normal_style, + ) + ) + else: + story.append( + Paragraph( + f"Answered At: {escape(entry.answered_at_text)}", + normal_style, + ) + ) + + story.append(Spacer(1, 0.08 * inch)) + + +def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None: + report_data = build_report_data(project_id, db) + if report_data is None: + return None + + buffer = BytesIO() + document = SimpleDocTemplate( + buffer, + pagesize=letter, + title=f"Interview Report - {report_data.project_name}", + leftMargin=0.75 * inch, + rightMargin=0.75 * inch, + topMargin=0.75 * inch, + bottomMargin=0.75 * inch, + ) + + styles = getSampleStyleSheet() + title_style = cast(ParagraphStyle, styles["Title"]) + heading_style = cast(ParagraphStyle, styles["Heading2"]) + normal_style = cast(ParagraphStyle, styles["BodyText"]) + + story: list[Flowable] = [] + + # Cover page + story.append(Paragraph(escape(report_data.project_name), title_style)) + story.append(Spacer(1, 0.35 * inch)) + story.append( + Paragraph( + f"Interview Start: {_format_utc(report_data.start_time)}", + normal_style, + ) + ) + story.append( + Paragraph( + f"Total Interview Length: {_format_duration_hms(report_data.total_duration)}", + normal_style, + ) + ) + story.append(PageBreak()) + + # Answered questions + _render_question_sections( + story, + "Answered Questions (Categorized)", + report_data.answered_by_category, + normal_style, + heading_style, + ) + story.append(PageBreak()) + + # Transcript + story.append(Paragraph("Transcript", heading_style)) + story.append(Spacer(1, 0.15 * inch)) + + for section in report_data.transcript_sections: + speaker = section.speaker if section.speaker else "Unknown Speaker" + transcript_heading = f"[{_format_utc(section.started_at)}] {speaker}" + story.append( + Paragraph( + f'{escape(transcript_heading)}', + normal_style, + ) + ) + story.append( + Paragraph( + escape(section.text if section.text else "(No transcript text)"), + normal_style, + ) + ) + + if section.answered_question_refs: + links = ", ".join( + [ + f'Q{ordinal}' + for ordinal, question_anchor in section.answered_question_refs + ] + ) + story.append(Paragraph(f"Answered Here: {links}", normal_style)) + + story.append(Spacer(1, 0.1 * inch)) + + story.append(PageBreak()) + + # Unanswered questions + _render_question_sections( + story, + "Unanswered Questions", + report_data.unanswered_by_category, + normal_style, + heading_style, + ) + + document.build(story) + return buffer.getvalue() diff --git a/backend/src/interview_helper/tests/test_report_generation.py b/backend/src/interview_helper/tests/test_report_generation.py new file mode 100644 index 0000000..bf63e46 --- /dev/null +++ b/backend/src/interview_helper/tests/test_report_generation.py @@ -0,0 +1,186 @@ +from datetime import datetime, timedelta, timezone +from typing import cast + +import pytest +import sqlalchemy as sa +from ulid import ULID + +from interview_helper.context_manager import models +from interview_helper.context_manager.database import ( + PersistentDatabase, + add_ai_analysis, + create_new_project, + create_session, + get_or_add_user_by_oidc_id, + update_ai_analysis_tag, +) +from interview_helper.context_manager.types import ( + ProjectId, + SessionId, + TranscriptId, + UserId, +) +from interview_helper.downloads.get_report import build_report_data, generate_report_pdf + + +pytestmark = pytest.mark.anyio + + +def _ulid_at(ts: datetime) -> str: + ulid_value = cast(ULID, ULID.from_datetime(ts)) + return str(ulid_value).lower() + + +def _insert_transcription( + db: PersistentDatabase, + *, + transcription_id: str, + project_id: ProjectId, + user_id: UserId, + session_id: SessionId, + speaker: str, + text: str, +) -> None: + with db.begin() as conn: + _ = conn.execute( + sa.insert(models.Transcription), + { + "transcription_id": transcription_id, + "project_id": str(project_id), + "user_id": str(user_id), + "session_id": str(session_id), + "speaker": speaker, + "text_output": text, + }, + ) + + +def test_build_report_data_groups_questions_and_creates_bidirectional_anchors(): + db = PersistentDatabase.new_in_memory() + user = get_or_add_user_by_oidc_id(db, "oidc-report-user", "Report User") + + project = create_new_project(db, user.user_id, "Mission Report") + project_id = ProjectId.from_str(project["id"]) + + session_1 = SessionId(ULID()) + session_2 = SessionId(ULID()) + create_session(db, session_1, project_id, user.user_id) + create_session(db, session_2, project_id, user.user_id) + + t0 = datetime(2026, 1, 1, 10, 0, 0, tzinfo=timezone.utc) + t1 = t0 + timedelta(seconds=120) + t2 = t0 + timedelta(seconds=600) + + transcript_1 = _ulid_at(t0) + transcript_2 = _ulid_at(t1) + transcript_3 = _ulid_at(t2) + + _insert_transcription( + db, + transcription_id=transcript_1, + project_id=project_id, + user_id=user.user_id, + session_id=session_1, + speaker="Speaker-A", + text="We last saw him near the trailhead.", + ) + _insert_transcription( + db, + transcription_id=transcript_2, + project_id=project_id, + user_id=user.user_id, + session_id=session_1, + speaker="Speaker-A", + text="He was carrying a blue jacket.", + ) + _insert_transcription( + db, + transcription_id=transcript_3, + project_id=project_id, + user_id=user.user_id, + session_id=session_2, + speaker="Speaker-B", + text="He usually checks in every night.", + ) + + answered_analysis_id = add_ai_analysis( + db=db, + project_id=project_id, + text="What route did he usually take from the trailhead?", + category_code="B", + span="last saw him near the trailhead", + transcript_span_id=TranscriptId.from_str(transcript_1), + transcript_context_start=TranscriptId.from_str(transcript_1), + transcript_context_end=TranscriptId.from_str(transcript_2), + summary="Summary", + ) + + unanswered_analysis_id = add_ai_analysis( + db=db, + project_id=project_id, + text="What medication does he take?", + category_code="C", + span="", + transcript_span_id=TranscriptId.from_str(transcript_2), + transcript_context_start=TranscriptId.from_str(transcript_2), + transcript_context_end=TranscriptId.from_str(transcript_2), + summary="Summary", + ) + _ = unanswered_analysis_id + + update_ai_analysis_tag( + db, + analysis_id=str(answered_analysis_id), + tag="dismissed", + _user_id=user.user_id, + was_asked=True, + asked_at_transcript_id=transcript_3, + ) + + report = build_report_data(project["id"], db) + assert report is not None + + assert report.project_name == "Mission Report" + assert report.start_time == t0 + assert report.total_duration == timedelta(seconds=120) + + answered = report.answered_by_category.get("B", []) + unanswered = report.unanswered_by_category.get("C", []) + + assert len(answered) == 1 + assert len(unanswered) == 1 + + answered_entry = answered[0] + assert answered_entry.context_anchor == "transcript-1" + assert answered_entry.answered_at_anchor == "transcript-2" + + transcript_section = report.transcript_sections[1] + assert len(transcript_section.answered_question_refs) == 1 + _, question_anchor = transcript_section.answered_question_refs[0] + assert question_anchor == answered_entry.question_anchor + + +def test_generate_report_pdf_returns_pdf_bytes(): + db = PersistentDatabase.new_in_memory() + user = get_or_add_user_by_oidc_id(db, "oidc-pdf-user", "PDF User") + + project = create_new_project(db, user.user_id, "PDF Project") + project_id = ProjectId.from_str(project["id"]) + + session = SessionId(ULID()) + create_session(db, session, project_id, user.user_id) + + transcript_id = _ulid_at(datetime(2026, 1, 1, 12, 0, 0, tzinfo=timezone.utc)) + _insert_transcription( + db, + transcription_id=transcript_id, + project_id=project_id, + user_id=user.user_id, + session_id=session, + speaker="Speaker-A", + text="Sample text", + ) + + pdf_bytes = generate_report_pdf(project["id"], db) + assert pdf_bytes is not None + assert pdf_bytes.startswith(b"%PDF") diff --git a/backend/src/long_run_eval.py b/backend/src/long_run_eval.py index 8fa76fd..f4c3ed5 100644 --- a/backend/src/long_run_eval.py +++ b/backend/src/long_run_eval.py @@ -163,6 +163,7 @@ async def run_analysis( db, project_id=project, text=result.question, + category_code=result.category_code, span=result.grounding_span, transcript_span_id=None, transcript_context_start=analysis_results.transcript_context_start, diff --git a/backend/src/main.py b/backend/src/main.py index 2ca9a59..3cd434a 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -70,6 +70,7 @@ import tempfile import sqlalchemy as sa from interview_helper.downloads.get_transcript import generate_transcript +from interview_helper.downloads.get_report import generate_report_pdf from interview_helper.context_manager import models # Configure logging @@ -653,6 +654,35 @@ async def download_questions( ) +@app.get("/project/{project_id}/download/report") +async def download_report(project_id: str, token: Annotated[str, Depends(oidc_scheme)]): + """ + Download a unified interview report for a project as a PDF + """ + clean_token = token.removeprefix("Bearer ") + _ = verify_jwt_token(clean_token, jwks_client, CLIENT_ID, signing_algos) + + project_id_typed = ProjectId.from_str(project_id) + project = get_project_by_id(session_manager.db, project_id_typed) + if not project: + raise HTTPException(status_code=404, detail="Project not found") + + report_pdf = generate_report_pdf(project_id=project_id, db=session_manager.db) + if report_pdf is None: + raise HTTPException( + status_code=404, detail="No transcriptions found for this project" + ) + + project_name = project["name"] or "report" + safe_filename = sanitize_filename(project_name, "report") + "_report.pdf" + + return Response( + content=report_pdf, + media_type="application/pdf", + headers={"Content-Disposition": f'attachment; filename="{safe_filename}"'}, + ) + + @app.get("/project/{project_id}/download/audio") async def download_audio(project_id: str, token: Annotated[str, Depends(oidc_scheme)]): """ diff --git a/frontend/src/components/audio-sender/TranscriptView.tsx b/frontend/src/components/audio-sender/TranscriptView.tsx index d772b4b..fb4636b 100644 --- a/frontend/src/components/audio-sender/TranscriptView.tsx +++ b/frontend/src/components/audio-sender/TranscriptView.tsx @@ -18,6 +18,7 @@ import { downloadTranscript, downloadQuestions, downloadAudio, + downloadReport, } from "../../lib/api"; import { useState } from "react"; import { TranscriptSection } from "./TranscriptSection"; @@ -109,6 +110,18 @@ export function TranscriptView({ } }; + const handleDownloadReport = async () => { + if (!projectId || !auth.user?.access_token) return; + try { + setDownloading("report"); + await downloadReport(projectId, auth.user.access_token); + } catch (error) { + console.error("Failed to download report:", error); + } finally { + setDownloading(null); + } + }; + return ( @@ -149,6 +162,13 @@ export function TranscriptView({ Downloads + } + onClick={handleDownloadReport} + disabled={downloading !== null} + > + Download Report + } onClick={handleDownloadAudio} diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index 34d976d..1ff0691 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -182,6 +182,43 @@ export async function downloadAudio( document.body.removeChild(a); } +/** + * Download unified report for a project + */ +export async function downloadReport( + projectId: string, + token: string, +): Promise { + const response = await fetch( + `${BACKEND_URL}/project/${projectId}/download/report`, + { + headers: { + Authorization: `Bearer ${token}`, + }, + }, + ); + + if (!response.ok) { + throw new Error( + `Failed to download report: ${response.status} ${response.statusText}`, + ); + } + + const blob = await response.blob(); + const url = window.URL.createObjectURL(blob); + const a = document.createElement("a"); + a.href = url; + a.download = + response.headers + .get("content-disposition") + ?.split("filename=")[1] + ?.replace(/"/g, "") || "report.pdf"; + document.body.appendChild(a); + a.click(); + window.URL.revokeObjectURL(url); + document.body.removeChild(a); +} + /** * Get project info including session count */ diff --git a/frontend/src/lib/message.ts b/frontend/src/lib/message.ts index 2600022..d174c76 100644 --- a/frontend/src/lib/message.ts +++ b/frontend/src/lib/message.ts @@ -59,6 +59,7 @@ export interface AIResultMessage { export interface AnalysisRow { analysis_id: string; text: string; + category_code: string; span: string | null; transcript_span_id: string | null; is_dismissed: boolean; From 34933c6ec108cd9027cc389bd4eff8a1f034a7d3 Mon Sep 17 00:00:00 2001 From: Dylan Starink Date: Sun, 8 Mar 2026 17:51:46 -0700 Subject: [PATCH 02/14] feat(backend): style report --- .../interview_helper/downloads/get_report.py | 113 ++++++++++++++---- 1 file changed, 91 insertions(+), 22 deletions(-) diff --git a/backend/src/interview_helper/downloads/get_report.py b/backend/src/interview_helper/downloads/get_report.py index 6936d2d..b02b68e 100644 --- a/backend/src/interview_helper/downloads/get_report.py +++ b/backend/src/interview_helper/downloads/get_report.py @@ -8,6 +8,7 @@ from typing import cast from xml.sax.saxutils import escape +from reportlab.lib import colors from reportlab.lib.pagesizes import letter from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet from reportlab.lib.units import inch @@ -287,6 +288,8 @@ def _render_question_sections( grouped_questions: dict[str, list[ReportQuestionEntry]], normal_style: ParagraphStyle, heading_style: ParagraphStyle, + category_style: ParagraphStyle, + question_style: ParagraphStyle, ) -> None: story.append(Paragraph(escape(title), heading_style)) story.append(Spacer(1, 0.15 * inch)) @@ -300,17 +303,17 @@ def _render_question_sections( category_label = QUESTION_CATEGORY_LABELS.get(category_code, "Unknown") story.append( Paragraph( - f"{escape(category_code)}. {escape(category_label)}", - normal_style, + f'{escape(category_code)}. {escape(category_label)}', + category_style, ) ) - story.append(Spacer(1, 0.08 * inch)) + story.append(Spacer(1, 0.12 * inch)) for entry in entries: story.append( Paragraph( - f'Q{entry.ordinal}. {escape(entry.text)}', - normal_style, + f'Q{entry.ordinal}. {escape(entry.text)}', + question_style, ) ) @@ -319,30 +322,37 @@ def _render_question_sections( if entry.context_anchor: story.append( Paragraph( - f'Context: "{escaped_span}"', - normal_style, + f'Context: "{escaped_span}"', + question_style, ) ) else: - story.append(Paragraph(f'Context: "{escaped_span}"', normal_style)) + story.append( + Paragraph( + f'Context: "{escaped_span}"', + question_style, + ) + ) if entry.answered_at_text is not None: if entry.answered_at_anchor: story.append( Paragraph( - f'Answered At: {escape(entry.answered_at_text)}', - normal_style, + f'Answered At: {escape(entry.answered_at_text)}', + question_style, ) ) else: story.append( Paragraph( - f"Answered At: {escape(entry.answered_at_text)}", - normal_style, + f'Answered At: {escape(entry.answered_at_text)}', + question_style, ) ) - story.append(Spacer(1, 0.08 * inch)) + story.append(Spacer(1, 0.12 * inch)) + + story.append(Spacer(1, 0.08 * inch)) def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None: @@ -363,24 +373,74 @@ def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None styles = getSampleStyleSheet() title_style = cast(ParagraphStyle, styles["Title"]) + title_style.textColor = colors.HexColor("#1a472a") + title_style.fontSize = 36 + title_style.leading = 42 + heading_style = cast(ParagraphStyle, styles["Heading2"]) + heading_style.textColor = colors.HexColor("#2E5090") + heading_style.fontSize = 18 + heading_style.spaceAfter = 6 + normal_style = cast(ParagraphStyle, styles["BodyText"]) + normal_style.fontSize = 11 + + # Custom styles for categories and questions + category_style = ParagraphStyle( + "CategoryStyle", + parent=normal_style, + fontSize=13, + textColor=colors.HexColor("#2E5090"), + spaceAfter=8, + spaceBefore=4, + ) + + question_style = ParagraphStyle( + "QuestionStyle", + parent=normal_style, + fontSize=11, + leftIndent=20, + spaceAfter=4, + ) + + subtitle_style = ParagraphStyle( + "SubtitleStyle", + parent=normal_style, + fontSize=13, + textColor=colors.HexColor("#555555"), + spaceAfter=6, + ) story: list[Flowable] = [] # Cover page - story.append(Paragraph(escape(report_data.project_name), title_style)) - story.append(Spacer(1, 0.35 * inch)) + story.append(Spacer(1, 1.5 * inch)) + story.append( + Paragraph('Interview Prep', title_style) + ) + story.append(Spacer(1, 0.1 * inch)) + story.append( + Paragraph( + f'{escape(report_data.project_name)}', + ParagraphStyle( + "ProjectTitle", + parent=title_style, + fontSize=24, + textColor=colors.HexColor("#2E5090"), + ), + ) + ) + story.append(Spacer(1, 0.5 * inch)) story.append( Paragraph( - f"Interview Start: {_format_utc(report_data.start_time)}", - normal_style, + f'Interview Start: {_format_utc(report_data.start_time)}', + subtitle_style, ) ) story.append( Paragraph( - f"Total Interview Length: {_format_duration_hms(report_data.total_duration)}", - normal_style, + f'Total Interview Length: {_format_duration_hms(report_data.total_duration)}', + subtitle_style, ) ) story.append(PageBreak()) @@ -392,6 +452,8 @@ def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None report_data.answered_by_category, normal_style, heading_style, + category_style, + question_style, ) story.append(PageBreak()) @@ -404,7 +466,7 @@ def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None transcript_heading = f"[{_format_utc(section.started_at)}] {speaker}" story.append( Paragraph( - f'{escape(transcript_heading)}', + f'{escape(transcript_heading)}', normal_style, ) ) @@ -418,11 +480,16 @@ def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None if section.answered_question_refs: links = ", ".join( [ - f'Q{ordinal}' + f'Q{ordinal}' for ordinal, question_anchor in section.answered_question_refs ] ) - story.append(Paragraph(f"Answered Here: {links}", normal_style)) + story.append( + Paragraph( + f'Answered Here: {links}', + normal_style, + ) + ) story.append(Spacer(1, 0.1 * inch)) @@ -435,6 +502,8 @@ def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None report_data.unanswered_by_category, normal_style, heading_style, + category_style, + question_style, ) document.build(story) From ad95e3f0e5afee15893870d6f9f60b712792ed74 Mon Sep 17 00:00:00 2001 From: Dylan Starink Date: Sun, 8 Mar 2026 18:19:26 -0700 Subject: [PATCH 03/14] feat(backend): report formatting + exerpts --- .../context_manager/question_categories.py | 2 +- .../interview_helper/downloads/get_report.py | 35 +++++++++++++++++-- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/backend/src/interview_helper/context_manager/question_categories.py b/backend/src/interview_helper/context_manager/question_categories.py index 7180ea6..d3068b9 100644 --- a/backend/src/interview_helper/context_manager/question_categories.py +++ b/backend/src/interview_helper/context_manager/question_categories.py @@ -2,7 +2,7 @@ # Missing "A" is intentional; category list starts at "B". QUESTION_CATEGORIES: Final[tuple[tuple[str, str], ...]] = ( - ("B", "Source Information (person being interviewed)"), + ("B", "Source Information"), ("C", "General Missing Person Information"), ("D", "Physical Description"), ("E", "Clothing"), diff --git a/backend/src/interview_helper/downloads/get_report.py b/backend/src/interview_helper/downloads/get_report.py index b02b68e..217c8d1 100644 --- a/backend/src/interview_helper/downloads/get_report.py +++ b/backend/src/interview_helper/downloads/get_report.py @@ -30,6 +30,9 @@ from interview_helper.context_manager.types import ProjectId, TranscriptId from interview_helper.downloads.util import extract_timestamp_from_ulid +# Time window for transcript excerpts before answered questions +TRANSCRIPT_EXCERPT_WINDOW = timedelta(minutes=1) + @dataclass class ReportQuestionEntry: @@ -42,6 +45,7 @@ class ReportQuestionEntry: context_anchor: str | None answered_at_anchor: str | None answered_at_text: str | None + transcript_excerpt: str | None = None @dataclass @@ -230,6 +234,7 @@ def build_report_data(project_id: str, db: PersistentDatabase) -> ReportData | N answered_anchor: str | None = None answered_at_text: str | None = None + transcript_excerpt: str | None = None if analysis.asked_at_transcript_id is not None: asked_at_id = analysis.asked_at_transcript_id.lower() @@ -237,6 +242,23 @@ def build_report_data(project_id: str, db: PersistentDatabase) -> ReportData | N asked_at_timestamp = TranscriptId.from_str(asked_at_id).get_datetime() answered_at_text = _format_utc(asked_at_timestamp) + # Build transcript excerpt from past minute before question was answered + excerpt_parts: list[str] = [] + excerpt_start_time = asked_at_timestamp - TRANSCRIPT_EXCERPT_WINDOW + + for row in transcript_rows: + row_id = str(row["transcription_id"]) + row_timestamp = extract_timestamp_from_ulid(row_id) + + if excerpt_start_time <= row_timestamp < asked_at_timestamp: + speaker = str(row["speaker"] or "Unknown") + text = str(row["text_output"] or "").strip() + if text: + excerpt_parts.append(f"{speaker}: {text}") + + if excerpt_parts: + transcript_excerpt = " ".join(excerpt_parts) + question_anchor = f"question-{analysis.ordinal}" entry = ReportQuestionEntry( @@ -251,6 +273,7 @@ def build_report_data(project_id: str, db: PersistentDatabase) -> ReportData | N ), answered_at_anchor=answered_anchor, answered_at_text=answered_at_text, + transcript_excerpt=transcript_excerpt, ) if analysis.was_asked is True: @@ -350,6 +373,14 @@ def _render_question_sections( ) ) + if entry.transcript_excerpt: + story.append( + Paragraph( + f'Transcript Excerpt: {escape(entry.transcript_excerpt)}', + question_style, + ) + ) + story.append(Spacer(1, 0.12 * inch)) story.append(Spacer(1, 0.08 * inch)) @@ -416,12 +447,12 @@ def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None # Cover page story.append(Spacer(1, 1.5 * inch)) story.append( - Paragraph('Interview Prep', title_style) + Paragraph('Interview Report', title_style) ) story.append(Spacer(1, 0.1 * inch)) story.append( Paragraph( - f'{escape(report_data.project_name)}', + f'Project: {escape(report_data.project_name)}', ParagraphStyle( "ProjectTitle", parent=title_style, From f96019b0b5135bc74bfb89cf0a5ec864b7b7a1b4 Mon Sep 17 00:00:00 2001 From: Dylan Starink Date: Sun, 8 Mar 2026 18:29:55 -0700 Subject: [PATCH 04/14] feat(frontend): resizable on desktop --- .../components/audio-sender/DesktopLayout.tsx | 139 +++++++++++++++++- 1 file changed, 135 insertions(+), 4 deletions(-) diff --git a/frontend/src/components/audio-sender/DesktopLayout.tsx b/frontend/src/components/audio-sender/DesktopLayout.tsx index 43ebd51..2ae8146 100644 --- a/frontend/src/components/audio-sender/DesktopLayout.tsx +++ b/frontend/src/components/audio-sender/DesktopLayout.tsx @@ -1,4 +1,5 @@ import { Box } from "@mantine/core"; +import { useCallback, useEffect, useRef, useState } from "react"; import type { AnalysisRow } from "../../lib/message"; import { InsightsPanel } from "./InsightsPanel"; import { RecordingControls } from "./RecordingControls"; @@ -71,15 +72,110 @@ export function DesktopLayout({ onStopRecording, }: DesktopLayoutProps) { const isConnected = connectionState === "connected"; + const DEFAULT_INSIGHTS_WIDTH = 340; + const MIN_TRANSCRIPT_WIDTH = 420; + const MIN_INSIGHTS_WIDTH = 260; + const RESIZE_HANDLE_WIDTH = 10; + const [insightsWidth, setInsightsWidth] = useState(DEFAULT_INSIGHTS_WIDTH); + const [isResizing, setIsResizing] = useState(false); + const resizeHandleRef = useRef(null); + const dragStateRef = useRef<{ + startX: number; + startWidth: number; + containerWidth: number; + } | null>(null); + + const clampInsightsWidth = useCallback((nextWidth: number) => { + const containerWidth = + resizeHandleRef.current?.parentElement?.clientWidth ?? 0; + + if (containerWidth <= 0) return nextWidth; + + const maxInsightsWidth = Math.max( + MIN_INSIGHTS_WIDTH, + containerWidth - MIN_TRANSCRIPT_WIDTH - RESIZE_HANDLE_WIDTH, + ); + + return Math.max( + MIN_INSIGHTS_WIDTH, + Math.min(nextWidth, maxInsightsWidth), + ); + }, []); + + useEffect(() => { + const handleWindowResize = () => { + setInsightsWidth((prevWidth) => clampInsightsWidth(prevWidth)); + }; + + handleWindowResize(); + window.addEventListener("resize", handleWindowResize); + return () => window.removeEventListener("resize", handleWindowResize); + }, [clampInsightsWidth]); + + useEffect(() => { + if (!isResizing) return; + + const handleMouseMove = (event: MouseEvent) => { + const dragState = dragStateRef.current; + if (!dragState) return; + + const deltaX = event.clientX - dragState.startX; + const nextWidth = dragState.startWidth - deltaX; + setInsightsWidth(clampInsightsWidth(nextWidth)); + }; + + const handleMouseUp = () => { + dragStateRef.current = null; + setIsResizing(false); + }; + + const previousUserSelect = document.body.style.userSelect; + const previousCursor = document.body.style.cursor; + + document.body.style.userSelect = "none"; + document.body.style.cursor = "col-resize"; + + window.addEventListener("mousemove", handleMouseMove); + window.addEventListener("mouseup", handleMouseUp); + + return () => { + document.body.style.userSelect = previousUserSelect; + document.body.style.cursor = previousCursor; + window.removeEventListener("mousemove", handleMouseMove); + window.removeEventListener("mouseup", handleMouseUp); + }; + }, [clampInsightsWidth, isResizing]); + + const handleResizeStart = (event: React.MouseEvent) => { + event.preventDefault(); + + const containerWidth = + resizeHandleRef.current?.parentElement?.clientWidth ?? 0; + if (containerWidth <= 0) return; + + dragStateRef.current = { + startX: event.clientX, + startWidth: insightsWidth, + containerWidth, + }; + setIsResizing(true); + }; return ( - <> + {/* Transcript area fills the rest */} @@ -108,8 +204,43 @@ export function DesktopLayout({ /> + + + + {/* Insights Panel */} - + - + ); } From 31f8ebf7efcb5626b569d7d04ca32d6c5291c9b9 Mon Sep 17 00:00:00 2001 From: Dylan Starink Date: Sun, 8 Mar 2026 18:57:36 -0700 Subject: [PATCH 05/14] fix(backend): better transcript exerpts --- .../interview_helper/downloads/get_report.py | 96 +++++++++++++++---- 1 file changed, 77 insertions(+), 19 deletions(-) diff --git a/backend/src/interview_helper/downloads/get_report.py b/backend/src/interview_helper/downloads/get_report.py index 217c8d1..fa71010 100644 --- a/backend/src/interview_helper/downloads/get_report.py +++ b/backend/src/interview_helper/downloads/get_report.py @@ -216,6 +216,56 @@ def _analysis_context_anchor( return chunk_to_section_anchor.get(str(analysis.transcript_context_end)) +def _build_transcript_excerpt( + transcript_rows: Sequence[TranscriptionWithProjectDetails], + asked_at_timestamp: datetime, +) -> str | None: + excerpt_start_time = asked_at_timestamp - TRANSCRIPT_EXCERPT_WINDOW + excerpt_rows: list[tuple[str, str]] = [] + + for row in transcript_rows: + row_timestamp = extract_timestamp_from_ulid(str(row["transcription_id"])) + if not (excerpt_start_time <= row_timestamp < asked_at_timestamp): + continue + + speaker = str(row["speaker"] or "Unknown Speaker") + text = str(row["text_output"] or "").strip() + if text: + excerpt_rows.append((speaker, text)) + + if len(excerpt_rows) == 0: + return None + + grouped_lines: list[str] = [] + current_speaker: str | None = None + current_texts: list[str] = [] + + def flush_current() -> None: + nonlocal current_speaker, current_texts + if current_speaker is None or len(current_texts) == 0: + return + grouped_lines.append(f"{current_speaker}: {' '.join(current_texts)}") + current_speaker = None + current_texts = [] + + for speaker, text in excerpt_rows: + if current_speaker is None: + current_speaker = speaker + current_texts = [text] + continue + + if speaker == current_speaker: + current_texts.append(text) + continue + + flush_current() + current_speaker = speaker + current_texts = [text] + + flush_current() + return "\n".join(grouped_lines) if grouped_lines else None + + def build_report_data(project_id: str, db: PersistentDatabase) -> ReportData | None: typed_project_id = ProjectId.from_str(project_id) @@ -241,23 +291,9 @@ def build_report_data(project_id: str, db: PersistentDatabase) -> ReportData | N answered_anchor = anchor_index.chunk_to_section_anchor.get(asked_at_id) asked_at_timestamp = TranscriptId.from_str(asked_at_id).get_datetime() answered_at_text = _format_utc(asked_at_timestamp) - - # Build transcript excerpt from past minute before question was answered - excerpt_parts: list[str] = [] - excerpt_start_time = asked_at_timestamp - TRANSCRIPT_EXCERPT_WINDOW - - for row in transcript_rows: - row_id = str(row["transcription_id"]) - row_timestamp = extract_timestamp_from_ulid(row_id) - - if excerpt_start_time <= row_timestamp < asked_at_timestamp: - speaker = str(row["speaker"] or "Unknown") - text = str(row["text_output"] or "").strip() - if text: - excerpt_parts.append(f"{speaker}: {text}") - - if excerpt_parts: - transcript_excerpt = " ".join(excerpt_parts) + transcript_excerpt = _build_transcript_excerpt( + transcript_rows, asked_at_timestamp + ) question_anchor = f"question-{analysis.ordinal}" @@ -313,6 +349,7 @@ def _render_question_sections( heading_style: ParagraphStyle, category_style: ParagraphStyle, question_style: ParagraphStyle, + excerpt_style: ParagraphStyle, ) -> None: story.append(Paragraph(escape(title), heading_style)) story.append(Spacer(1, 0.15 * inch)) @@ -374,10 +411,18 @@ def _render_question_sections( ) if entry.transcript_excerpt: + formatted_excerpt = "... " + escape(entry.transcript_excerpt).replace( + "\n", "
" + ) + excerpt_label = ( + f'Transcript Excerpt' + if entry.answered_at_anchor + else "Transcript Excerpt" + ) story.append( Paragraph( - f'Transcript Excerpt: {escape(entry.transcript_excerpt)}', - question_style, + f'{excerpt_label}: {formatted_excerpt}', + excerpt_style, ) ) @@ -441,6 +486,17 @@ def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None textColor=colors.HexColor("#555555"), spaceAfter=6, ) + excerpt_style = ParagraphStyle( + "ExcerptStyle", + parent=normal_style, + fontSize=10, + leading=12, + leftIndent=36, + rightIndent=12, + textColor=colors.HexColor("#666666"), + spaceBefore=6, + spaceAfter=4, + ) story: list[Flowable] = [] @@ -485,6 +541,7 @@ def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None heading_style, category_style, question_style, + excerpt_style, ) story.append(PageBreak()) @@ -535,6 +592,7 @@ def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None heading_style, category_style, question_style, + excerpt_style, ) document.build(story) From 766562082a1ec8b02283e471a9d9a78aa7a77239 Mon Sep 17 00:00:00 2001 From: Dylan Starink Date: Tue, 10 Mar 2026 10:31:18 -0700 Subject: [PATCH 06/14] feat(project): clarify AI analysis API --- .../alembic/versions/869cfd49ebd5_initial.py | 17 +- .../context_manager/database.py | 194 +++++++++++++++--- .../context_manager/messages.py | 38 ++++ .../context_manager/models.py | 21 +- .../context_manager/tests/test_database.py | 128 ++++++++++++ .../interview_helper/downloads/get_report.py | 12 +- .../tests/test_report_generation.py | 19 +- backend/src/main.py | 102 ++++++--- frontend/src/components/AudioSender.tsx | 116 ++--------- frontend/src/lib/message.ts | 43 ++++ 10 files changed, 512 insertions(+), 178 deletions(-) diff --git a/backend/alembic/versions/869cfd49ebd5_initial.py b/backend/alembic/versions/869cfd49ebd5_initial.py index 597e0ae..35535d7 100644 --- a/backend/alembic/versions/869cfd49ebd5_initial.py +++ b/backend/alembic/versions/869cfd49ebd5_initial.py @@ -27,7 +27,7 @@ def upgrade() -> None: sa.Column("oidc_id", sa.String(length=255), nullable=False), sa.Column( "updated_at", - sa.DateTime(), + sa.DateTime(timezone=True), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False, ), @@ -42,13 +42,13 @@ def upgrade() -> None: sa.Column("creator_user_id", sa.String(length=26), nullable=False), sa.Column( "created_at", - sa.DateTime(), + sa.DateTime(timezone=True), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False, ), sa.Column( "updated_at", - sa.DateTime(), + sa.DateTime(timezone=True), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False, ), @@ -65,11 +65,11 @@ def upgrade() -> None: sa.Column("user_id", sa.String(length=26), nullable=False), sa.Column( "started_at", - sa.DateTime(), + sa.DateTime(timezone=True), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False, ), - sa.Column("ended_at", sa.DateTime(), nullable=True), + sa.Column("ended_at", sa.DateTime(timezone=True), nullable=True), sa.ForeignKeyConstraint( ["project_id"], ["project.project_id"], @@ -90,13 +90,13 @@ def upgrade() -> None: sa.Column("speaker", sa.String(length=100), nullable=True), sa.Column( "created_at", - sa.DateTime(), + sa.DateTime(timezone=True), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False, ), sa.Column( "updated_at", - sa.DateTime(), + sa.DateTime(timezone=True), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False, ), @@ -122,9 +122,10 @@ def upgrade() -> None: sa.Column("transcript_context_end", sa.String(length=26), nullable=False), sa.Column("summary", sa.Text(), nullable=False), sa.Column("tag", sa.String(length=50), nullable=True), - sa.Column("time_tag_changed", sa.DateTime(), nullable=True), + sa.Column("time_tag_changed", sa.DateTime(timezone=True), nullable=True), sa.Column("was_asked", sa.Boolean(), nullable=True), sa.Column("asked_at_transcript_id", sa.String(length=26), nullable=True), + sa.Column("asked_at", sa.DateTime(timezone=True), nullable=True), sa.ForeignKeyConstraint( ["project_id"], ["project.project_id"], diff --git a/backend/src/interview_helper/context_manager/database.py b/backend/src/interview_helper/context_manager/database.py index ebed376..060beeb 100644 --- a/backend/src/interview_helper/context_manager/database.py +++ b/backend/src/interview_helper/context_manager/database.py @@ -1,5 +1,5 @@ from collections.abc import Sequence -from datetime import datetime +from datetime import datetime, timezone from pydantic import BaseModel from sqlalchemy.sql.sqltypes import DateTime from typing import Literal, TypedDict @@ -523,6 +523,64 @@ class AnalysisRow(BaseModel): ordinal: int was_asked: bool | None = None asked_at_transcript_id: str | None = None + asked_at: datetime | None = None + + +type AnalysisTag = Literal["starred", "dismissed", "starred_dismissed"] | None + + +@dataclass +class AnalysisTagUpdateResult: + analysis_id: str + tag: AnalysisTag + was_asked: bool | None + asked_at_transcript_id: str | None + + +def _get_ai_analysis_state_for_update( + conn: sa.Connection, analysis_id: str +) -> tuple[AnalysisTag, bool | None, str | None]: + row = conn.execute( + sa.select( + models.AIAnalysis.tag, + models.AIAnalysis.was_asked, + models.AIAnalysis.asked_at_transcript_id, + ).where(models.AIAnalysis.analysis_id == analysis_id) + ).one_or_none() + + if row is None: + raise ValueError(f"analysis_id {analysis_id} was not found") + + return row.tag, row.was_asked, row.asked_at_transcript_id # pyright: ignore[reportAny] + + +def _persist_ai_analysis_state( + conn: sa.Connection, + *, + analysis_id: str, + tag: AnalysisTag, + was_asked: bool | None, + asked_at_transcript_id: str | None, + asked_at: datetime | None, +) -> AnalysisTagUpdateResult: + _ = conn.execute( + sa.update(models.AIAnalysis) + .where(models.AIAnalysis.analysis_id == analysis_id) + .values( + tag=tag, + time_tag_changed=sa.func.now(), + was_asked=was_asked, + asked_at_transcript_id=asked_at_transcript_id, + asked_at=asked_at, + ) + ) + + return AnalysisTagUpdateResult( + analysis_id=analysis_id, + tag=tag, + was_asked=was_asked, + asked_at_transcript_id=asked_at_transcript_id, + ) def get_all_ai_analyses( @@ -546,6 +604,7 @@ def get_all_ai_analyses( models.AIAnalysis.tag, models.AIAnalysis.was_asked, models.AIAnalysis.asked_at_transcript_id, + models.AIAnalysis.asked_at, sa.func.row_number() .over(order_by=models.AIAnalysis.analysis_id.asc()) .label("ordinal"), @@ -565,6 +624,7 @@ def get_all_ai_analyses( subq.c.tag, subq.c.was_asked, subq.c.asked_at_transcript_id, + subq.c.asked_at, subq.c.ordinal, ).order_by(subq.c.analysis_id.asc()) ).all() @@ -587,6 +647,9 @@ def get_all_ai_analyses( ordinal=row.ordinal, # pyright: ignore[reportAny] was_asked=row.was_asked, # pyright: ignore[reportAny] asked_at_transcript_id=row.asked_at_transcript_id, # pyright: ignore[reportAny] + asked_at=row.asked_at.replace(tzinfo=timezone.utc) # pyright: ignore[reportAny] + if row.asked_at # pyright: ignore[reportAny] + else None, ) for row in rows ] @@ -619,6 +682,7 @@ def get_analyses_by_ids( models.AIAnalysis.tag, models.AIAnalysis.was_asked, models.AIAnalysis.asked_at_transcript_id, + models.AIAnalysis.asked_at, sa.func.row_number() .over(order_by=models.AIAnalysis.analysis_id.asc()) .label("ordinal"), @@ -638,6 +702,7 @@ def get_analyses_by_ids( subq.c.tag, subq.c.was_asked, subq.c.asked_at_transcript_id, + subq.c.asked_at, subq.c.ordinal, ) .where(subq.c.analysis_id.in_(analysis_id_strs)) @@ -663,6 +728,7 @@ def get_analyses_by_ids( ordinal=row.ordinal, # pyright: ignore[reportAny] was_asked=row.was_asked, # pyright: ignore[reportAny] asked_at_transcript_id=row.asked_at_transcript_id, # pyright: ignore[reportAny] + asked_at=row.asked_at, # pyright: ignore[reportAny] ) for row in rows } @@ -673,43 +739,109 @@ def get_analyses_by_ids( ] -def update_ai_analysis_tag( +def mark_ai_analysis_asked( db: PersistentDatabase, analysis_id: str, - tag: str | None, - _user_id: UserId, - was_asked: bool | None = None, - asked_at_transcript_id: str | None = None, -): - """ - Update the tag for an AI analysis. + asked_at_transcript_id: str, +) -> AnalysisTagUpdateResult: + with db.begin() as conn: + current_tag, _, _ = _get_ai_analysis_state_for_update(conn, analysis_id) + if current_tag not in (None, "starred"): + raise ValueError("mark_asked is only valid for active or starred analyses") - Args: - analysis_id: The ID of the analysis to update - tag: The new tag value ("starred", "dismissed", "starred_dismissed", or None to clear) - _user_id: User ID (kept for API compatibility, not used as tags are project-wide) - was_asked: Whether the question was asked (only relevant when dismissing) - asked_at_transcript_id: The transcript ID where the question was asked - """ + asked_at = datetime.now(timezone.utc) + new_tag: AnalysisTag = ( + "starred_dismissed" if current_tag == "starred" else "dismissed" + ) + return _persist_ai_analysis_state( + conn, + analysis_id=analysis_id, + tag=new_tag, + was_asked=True, + asked_at_transcript_id=asked_at_transcript_id, + asked_at=asked_at, + ) + + +def mark_ai_analysis_dismissed_not_asked( + db: PersistentDatabase, + analysis_id: str, +) -> AnalysisTagUpdateResult: with db.begin() as conn: - update_values: dict[str, object] = { - "tag": tag, - "time_tag_changed": sa.func.now(), - } + current_tag, _, _ = _get_ai_analysis_state_for_update(conn, analysis_id) + if current_tag not in (None, "starred"): + raise ValueError( + "mark_dismissed_not_asked is only valid for active or starred analyses" + ) - # Only update answered fields if they are provided - if was_asked is not None: - update_values["was_asked"] = was_asked + new_tag: AnalysisTag = ( + "starred_dismissed" if current_tag == "starred" else "dismissed" + ) + return _persist_ai_analysis_state( + conn, + analysis_id=analysis_id, + tag=new_tag, + was_asked=False, + asked_at_transcript_id=None, + asked_at=None, + ) - if asked_at_transcript_id is None: - update_values["asked_at_transcript_id"] = None - else: - update_values["asked_at_transcript_id"] = asked_at_transcript_id - _ = conn.execute( - sa.update(models.AIAnalysis) - .where(models.AIAnalysis.analysis_id == analysis_id) - .values(**update_values) +def undo_ai_analysis_dismissal( + db: PersistentDatabase, + analysis_id: str, +) -> AnalysisTagUpdateResult: + with db.begin() as conn: + current_tag, _, _ = _get_ai_analysis_state_for_update(conn, analysis_id) + if current_tag not in ("dismissed", "starred_dismissed"): + raise ValueError("undo is only valid for dismissed analyses") + + new_tag: AnalysisTag = "starred" if current_tag == "starred_dismissed" else None + return _persist_ai_analysis_state( + conn, + analysis_id=analysis_id, + tag=new_tag, + was_asked=None, + asked_at_transcript_id=None, + asked_at=None, + ) + + +def star_ai_analysis( + db: PersistentDatabase, + analysis_id: str, +) -> AnalysisTagUpdateResult: + with db.begin() as conn: + current_tag, _, _ = _get_ai_analysis_state_for_update(conn, analysis_id) + if current_tag is not None: + raise ValueError("star is only valid for active analyses") + + return _persist_ai_analysis_state( + conn, + analysis_id=analysis_id, + tag="starred", + was_asked=None, + asked_at_transcript_id=None, + asked_at=None, + ) + + +def unstar_ai_analysis( + db: PersistentDatabase, + analysis_id: str, +) -> AnalysisTagUpdateResult: + with db.begin() as conn: + current_tag, _, _ = _get_ai_analysis_state_for_update(conn, analysis_id) + if current_tag != "starred": + raise ValueError("unstar is only valid for starred analyses") + + return _persist_ai_analysis_state( + conn, + analysis_id=analysis_id, + tag=None, + was_asked=None, + asked_at_transcript_id=None, + asked_at=None, ) diff --git a/backend/src/interview_helper/context_manager/messages.py b/backend/src/interview_helper/context_manager/messages.py index 82045c2..d4aaa09 100644 --- a/backend/src/interview_helper/context_manager/messages.py +++ b/backend/src/interview_helper/context_manager/messages.py @@ -74,6 +74,39 @@ class UpdateAIAnalysisTag(BaseModel): asked_at_transcript_id: str | None = None +class MarkAIAnalysisAsked(BaseModel): + type: Literal["mark_ai_analysis_asked"] = "mark_ai_analysis_asked" + timestamp: datetime = Field(default_factory=datetime.now) + analysis_id: str + asked_at_transcript_id: str + + +class UndoAIAnalysisDismissal(BaseModel): + type: Literal["undo_ai_analysis_dismissal"] = "undo_ai_analysis_dismissal" + timestamp: datetime = Field(default_factory=datetime.now) + analysis_id: str + + +class MarkAIAnalysisDismissedNotAsked(BaseModel): + type: Literal["mark_ai_analysis_dismissed_not_asked"] = ( + "mark_ai_analysis_dismissed_not_asked" + ) + timestamp: datetime = Field(default_factory=datetime.now) + analysis_id: str + + +class StarAIAnalysis(BaseModel): + type: Literal["star_ai_analysis"] = "star_ai_analysis" + timestamp: datetime = Field(default_factory=datetime.now) + analysis_id: str + + +class UnstarAIAnalysis(BaseModel): + type: Literal["unstar_ai_analysis"] = "unstar_ai_analysis" + timestamp: datetime = Field(default_factory=datetime.now) + analysis_id: str + + class RecordingStateMessage(BaseModel): type: Literal["recording_state"] = "recording_state" timestamp: datetime = Field(default_factory=datetime.now) @@ -89,6 +122,11 @@ class RecordingStateMessage(BaseModel): | AIResultMessage | CatchupMessage | ProjectMetadataMessage + | MarkAIAnalysisAsked + | UndoAIAnalysisDismissal + | MarkAIAnalysisDismissedNotAsked + | StarAIAnalysis + | UnstarAIAnalysis | UpdateAIAnalysisTag | RecordingStateMessage ) diff --git a/backend/src/interview_helper/context_manager/models.py b/backend/src/interview_helper/context_manager/models.py index 1736be5..78157a7 100644 --- a/backend/src/interview_helper/context_manager/models.py +++ b/backend/src/interview_helper/context_manager/models.py @@ -19,7 +19,7 @@ class User(Base): full_name: Mapped[str] = mapped_column(sa.String(100), nullable=False, unique=True) oidc_id: Mapped[str] = mapped_column(sa.String(255), nullable=False, unique=True) updated_at: Mapped[DateTime] = mapped_column( - sa.DateTime, + sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now(), onupdate=sa.func.now(), @@ -42,10 +42,10 @@ class Transcription(Base): text_output: Mapped[str] = mapped_column(sa.Text, nullable=True) speaker: Mapped[str] = mapped_column(sa.String(100), nullable=True) created_at: Mapped[DateTime] = mapped_column( - sa.DateTime, nullable=False, server_default=sa.func.now() + sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now() ) updated_at: Mapped[DateTime] = mapped_column( - sa.DateTime, + sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now(), onupdate=sa.func.now(), @@ -65,9 +65,11 @@ class Session(Base): sa.String(26), ForeignKey("users.user_id"), nullable=False ) started_at: Mapped[DateTime] = mapped_column( - sa.DateTime, nullable=False, server_default=sa.func.now() + sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now() + ) + ended_at: Mapped[DateTime | None] = mapped_column( + sa.DateTime(timezone=True), nullable=True ) - ended_at: Mapped[DateTime | None] = mapped_column(sa.DateTime, nullable=True) class AIAnalysis(Base): @@ -100,7 +102,7 @@ class AIAnalysis(Base): tag: Mapped[str | None] = mapped_column(sa.String(50), nullable=True) time_tag_changed: Mapped[DateTime | None] = mapped_column( - sa.DateTime, nullable=True + sa.DateTime(timezone=True), nullable=True ) # Fields for tracking if the question was asked @@ -108,6 +110,9 @@ class AIAnalysis(Base): asked_at_transcript_id: Mapped[str | None] = mapped_column( sa.String(26), ForeignKey("transcriptions.transcription_id"), nullable=True ) + asked_at: Mapped[DateTime | None] = mapped_column( + sa.DateTime(timezone=True), nullable=True + ) class Project(Base): @@ -122,11 +127,11 @@ class Project(Base): ) created_at: Mapped[DateTime] = mapped_column( - sa.DateTime, nullable=False, server_default=sa.func.now() + sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now() ) updated_at: Mapped[DateTime] = mapped_column( - sa.DateTime, + sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now(), onupdate=sa.func.now(), diff --git a/backend/src/interview_helper/context_manager/tests/test_database.py b/backend/src/interview_helper/context_manager/tests/test_database.py index d6cce4d..73dd71a 100644 --- a/backend/src/interview_helper/context_manager/tests/test_database.py +++ b/backend/src/interview_helper/context_manager/tests/test_database.py @@ -8,8 +8,14 @@ get_all_ai_analyses, get_or_add_user_by_oidc_id, get_user_by_id, + mark_ai_analysis_asked, + mark_ai_analysis_dismissed_not_asked, + star_ai_analysis, + unstar_ai_analysis, + undo_ai_analysis_dismissal, ) from interview_helper.context_manager.types import ProjectId, SessionId, TranscriptId +from datetime import datetime, timezone import sqlalchemy as sa import pytest @@ -92,3 +98,125 @@ def test_add_ai_analysis_normalizes_invalid_category_code_to_default(): rows = get_all_ai_analyses(db, project_id) assert len(rows) == 1 assert rows[0].category_code == "P" + + +def test_mark_ai_analysis_actions_update_tag_and_asked_fields(): + db = PersistentDatabase.new_in_memory() + user = get_or_add_user_by_oidc_id(db, "oidc-asked-at", "Asked At User") + + project = create_new_project(db, user.user_id, "P2") + project_id = ProjectId.from_str(project["id"]) + session_id = SessionId(ULID()) + create_session(db, session_id, project_id, user.user_id) + + transcript_id = add_transcription( + db=db, + user_id=user.user_id, + session_id=session_id, + project_id=project_id, + text="Transcript chunk", + speaker="Speaker-1", + ) + + analysis_id = add_ai_analysis( + db=db, + project_id=project_id, + text="Question?", + category_code="P", + span=None, + transcript_span_id=TranscriptId.from_str(transcript_id), + transcript_context_start=TranscriptId.from_str(transcript_id), + transcript_context_end=TranscriptId.from_str(transcript_id), + summary="Summary", + ) + + current_datetime = datetime.now(timezone.utc) + + _ = mark_ai_analysis_asked( + db=db, analysis_id=str(analysis_id), asked_at_transcript_id=transcript_id + ) + + rows = get_all_ai_analyses(db, project_id) + assert len(rows) == 1 + asked_row = rows[0] + assert asked_row.was_asked is True + assert asked_row.asked_at_transcript_id == transcript_id + assert asked_row.asked_at is not None, "asked_at should be set when marked as asked" + assert asked_row.asked_at >= current_datetime, ( + "asked_at should be at least the time before the 'mark as asked'" + ) + + _ = undo_ai_analysis_dismissal(db=db, analysis_id=str(analysis_id)) + _ = star_ai_analysis(db=db, analysis_id=str(analysis_id)) + _ = mark_ai_analysis_dismissed_not_asked(db=db, analysis_id=str(analysis_id)) + rows_after_clear = get_all_ai_analyses(db, project_id) + assert len(rows_after_clear) == 1 + cleared_row = rows_after_clear[0] + assert cleared_row.tag == "starred_dismissed" + assert cleared_row.was_asked is False + assert cleared_row.asked_at_transcript_id is None + assert cleared_row.asked_at is None + + _ = undo_ai_analysis_dismissal(db=db, analysis_id=str(analysis_id)) + rows_after_undo = get_all_ai_analyses(db, project_id) + assert len(rows_after_undo) == 1 + undone_row = rows_after_undo[0] + assert undone_row.tag == "starred" + assert undone_row.was_asked is None + assert undone_row.asked_at_transcript_id is None + assert undone_row.asked_at is None + + _ = unstar_ai_analysis(db=db, analysis_id=str(analysis_id)) + rows_after_unstar = get_all_ai_analyses(db, project_id) + assert len(rows_after_unstar) == 1 + unstarred_row = rows_after_unstar[0] + assert unstarred_row.tag is None + + +def test_mark_ai_analysis_actions_validate_invalid_transitions(): + db = PersistentDatabase.new_in_memory() + user = get_or_add_user_by_oidc_id(db, "oidc-validate-tags", "Validate User") + + project = create_new_project(db, user.user_id, "P3") + project_id = ProjectId.from_str(project["id"]) + session_id = SessionId(ULID()) + create_session(db, session_id, project_id, user.user_id) + + transcript_id = add_transcription( + db=db, + user_id=user.user_id, + session_id=session_id, + project_id=project_id, + text="Transcript chunk", + speaker="Speaker-1", + ) + + analysis_id = add_ai_analysis( + db=db, + project_id=project_id, + text="Question?", + category_code="P", + span=None, + transcript_span_id=TranscriptId.from_str(transcript_id), + transcript_context_start=TranscriptId.from_str(transcript_id), + transcript_context_end=TranscriptId.from_str(transcript_id), + summary="Summary", + ) + + with pytest.raises(ValueError): + _ = unstar_ai_analysis(db=db, analysis_id=str(analysis_id)) + + with pytest.raises(ValueError): + _ = undo_ai_analysis_dismissal(db=db, analysis_id=str(analysis_id)) + + _ = mark_ai_analysis_dismissed_not_asked(db=db, analysis_id=str(analysis_id)) + + with pytest.raises(ValueError): + _ = star_ai_analysis(db=db, analysis_id=str(analysis_id)) + + with pytest.raises(ValueError): + _ = mark_ai_analysis_asked( + db=db, + analysis_id=str(analysis_id), + asked_at_transcript_id=transcript_id, + ) diff --git a/backend/src/interview_helper/downloads/get_report.py b/backend/src/interview_helper/downloads/get_report.py index fa71010..7752fb3 100644 --- a/backend/src/interview_helper/downloads/get_report.py +++ b/backend/src/interview_helper/downloads/get_report.py @@ -27,7 +27,7 @@ QUESTION_CATEGORY_ORDER, normalize_question_category_code, ) -from interview_helper.context_manager.types import ProjectId, TranscriptId +from interview_helper.context_manager.types import ProjectId from interview_helper.downloads.util import extract_timestamp_from_ulid # Time window for transcript excerpts before answered questions @@ -286,13 +286,15 @@ def build_report_data(project_id: str, db: PersistentDatabase) -> ReportData | N answered_at_text: str | None = None transcript_excerpt: str | None = None - if analysis.asked_at_transcript_id is not None: + if analysis.asked_at is not None: + assert analysis.asked_at_transcript_id, ( + "asked_at_transcript_id should be set if asked_at is set" + ) asked_at_id = analysis.asked_at_transcript_id.lower() answered_anchor = anchor_index.chunk_to_section_anchor.get(asked_at_id) - asked_at_timestamp = TranscriptId.from_str(asked_at_id).get_datetime() - answered_at_text = _format_utc(asked_at_timestamp) + answered_at_text = _format_utc(analysis.asked_at) transcript_excerpt = _build_transcript_excerpt( - transcript_rows, asked_at_timestamp + transcript_rows, analysis.asked_at ) question_anchor = f"question-{analysis.ordinal}" diff --git a/backend/src/interview_helper/tests/test_report_generation.py b/backend/src/interview_helper/tests/test_report_generation.py index bf63e46..52607cd 100644 --- a/backend/src/interview_helper/tests/test_report_generation.py +++ b/backend/src/interview_helper/tests/test_report_generation.py @@ -12,7 +12,7 @@ create_new_project, create_session, get_or_add_user_by_oidc_id, - update_ai_analysis_tag, + mark_ai_analysis_asked, ) from interview_helper.context_manager.types import ( ProjectId, @@ -128,14 +128,18 @@ def test_build_report_data_groups_questions_and_creates_bidirectional_anchors(): ) _ = unanswered_analysis_id - update_ai_analysis_tag( + _ = mark_ai_analysis_asked( db, analysis_id=str(answered_analysis_id), - tag="dismissed", - _user_id=user.user_id, - was_asked=True, asked_at_transcript_id=transcript_3, ) + explicit_asked_at = t1 + timedelta(seconds=30) + with db.begin() as conn: + _ = conn.execute( + sa.update(models.AIAnalysis) + .where(models.AIAnalysis.analysis_id == str(answered_analysis_id)) + .values(asked_at=explicit_asked_at) + ) report = build_report_data(project["id"], db) assert report is not None @@ -153,6 +157,11 @@ def test_build_report_data_groups_questions_and_creates_bidirectional_anchors(): answered_entry = answered[0] assert answered_entry.context_anchor == "transcript-1" assert answered_entry.answered_at_anchor == "transcript-2" + assert answered_entry.answered_at_text == "2026-01-01 10:02:30 UTC" + assert answered_entry.transcript_excerpt is not None + assert ( + "Speaker-A: He was carrying a blue jacket." in answered_entry.transcript_excerpt + ) transcript_section = report.transcript_sections[1] assert len(transcript_section.answered_question_refs) == 1 diff --git a/backend/src/main.py b/backend/src/main.py index 3cd434a..2446780 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -10,12 +10,18 @@ vosk_transcriber_consumer_pair, ) from interview_helper.context_manager.messages import ( + MarkAIAnalysisAsked, + MarkAIAnalysisDismissedNotAsked, UpdateAIAnalysisTag, + StarAIAnalysis, + UndoAIAnalysisDismissal, + UnstarAIAnalysis, PingMessage, CatchupMessage, ProjectMetadataMessage, TranscriptChunkToSend, RecordingStateMessage, + ErrorMessage, ) from interview_helper.security.http import ( verify_jwt_token, @@ -48,7 +54,11 @@ from interview_helper.context_manager.database import ( ProjectListing, create_new_project, - update_ai_analysis_tag, + mark_ai_analysis_asked, + mark_ai_analysis_dismissed_not_asked, + undo_ai_analysis_dismissal, + star_ai_analysis, + unstar_ai_analysis, get_all_projects, get_or_add_user_by_oidc_id, get_project_by_id, @@ -58,7 +68,7 @@ delete_project, get_project_creator_and_name, ) -from interview_helper.context_manager.types import ProjectId, TranscriptId +from interview_helper.context_manager.types import ProjectId from fastapi.security import OpenIdConnect from fastapi import FastAPI, WebSocket, Depends, HTTPException, status @@ -399,19 +409,68 @@ async def websocket_endpoint( await handle_webrtc_message(context, message) elif isinstance(message, PingMessage): await cws.send_message(PingMessage()) - elif isinstance(message, UpdateAIAnalysisTag): - update_ai_analysis_tag( - session_manager.db, - message.analysis_id, - message.tag, - ticket.user_id, - was_asked=message.was_asked, - asked_at_transcript_id=message.asked_at_transcript_id, - ) - # Broadcast the tag update to all sessions in this project - await session_manager.broadcast_to_project( - context.project_id, message - ) + elif isinstance( + message, + ( + MarkAIAnalysisAsked, + UndoAIAnalysisDismissal, + MarkAIAnalysisDismissedNotAsked, + StarAIAnalysis, + UnstarAIAnalysis, + ), + ): + try: + if isinstance(message, MarkAIAnalysisAsked): + update = mark_ai_analysis_asked( + session_manager.db, + message.analysis_id, + message.asked_at_transcript_id, + ) + elif isinstance(message, UndoAIAnalysisDismissal): + update = undo_ai_analysis_dismissal( + session_manager.db, + message.analysis_id, + ) + elif isinstance( + message, MarkAIAnalysisDismissedNotAsked + ): + update = mark_ai_analysis_dismissed_not_asked( + session_manager.db, + message.analysis_id, + ) + elif isinstance(message, StarAIAnalysis): + update = star_ai_analysis( + session_manager.db, + message.analysis_id, + ) + else: + update = unstar_ai_analysis( + session_manager.db, + message.analysis_id, + ) + + update_message = UpdateAIAnalysisTag( + analysis_id=update.analysis_id, + tag=update.tag, + was_asked=update.was_asked, + asked_at_transcript_id=update.asked_at_transcript_id, + ) + await session_manager.broadcast_to_project( + context.project_id, update_message + ) + except ValueError as e: + logger.warning( + "Rejected invalid AI analysis action for %s: %s", + message.analysis_id, + e, + ) + await cws.send_message( + ErrorMessage( + error_code="invalid_ai_analysis_action", + message=str(e), + session_id=str(context.session_id), + ) + ) # handle other message types... except WebSocketDisconnect: logger.info( @@ -632,14 +691,11 @@ async def download_questions( transcript_lines.append("\tStarred") if analysis.was_asked is True: - if analysis.asked_at_transcript_id: - transcript_id = TranscriptId.from_str(analysis.asked_at_transcript_id) - timestamp = transcript_id.get_datetime().strftime( - "%Y-%m-%d %H:%M:%S %Z" - ) - transcript_lines.append(f"\tAsked at {timestamp}") - else: - transcript_lines.append("\tAsked at unknown") + assert analysis.asked_at is not None, ( + "asked_at should be set if was_asked is True" + ) + timestamp = analysis.asked_at.strftime("%Y-%m-%d %H:%M:%S %Z") + transcript_lines.append(f"\tAsked at {timestamp}") elif analysis.was_asked is False: transcript_lines.append("\tNot Asked") diff --git a/frontend/src/components/AudioSender.tsx b/frontend/src/components/AudioSender.tsx index ce9df89..081ab12 100644 --- a/frontend/src/components/AudioSender.tsx +++ b/frontend/src/components/AudioSender.tsx @@ -93,21 +93,10 @@ export function AudioSender({ projectId }: AudioSenderProps) { // Handle starring an insight const handleStarInsight = useCallback( (analysisId: string) => { - // Update local state immediately - setInsights((prevState) => - prevState.map((insight) => - insight.analysis_id === analysisId - ? { ...insight, tag: "starred" } - : insight, - ), - ); - - // Send update tag message to backend ws.sendMessage({ - type: MessageType.UPDATE_AI_ANALYSIS_TAG, + type: MessageType.STAR_AI_ANALYSIS, timestamp: new Date().toISOString(), analysis_id: analysisId, - tag: "starred", }); }, [ws], @@ -116,21 +105,10 @@ export function AudioSender({ projectId }: AudioSenderProps) { // Handle unstarring an insight const handleUnstarInsight = useCallback( (analysisId: string) => { - // Update local state immediately - setInsights((prevState) => - prevState.map((insight) => - insight.analysis_id === analysisId - ? { ...insight, tag: null } - : insight, - ), - ); - - // Send update tag message to backend ws.sendMessage({ - type: MessageType.UPDATE_AI_ANALYSIS_TAG, + type: MessageType.UNSTAR_AI_ANALYSIS, timestamp: new Date().toISOString(), analysis_id: analysisId, - tag: null, }); }, [ws], @@ -139,110 +117,47 @@ export function AudioSender({ projectId }: AudioSenderProps) { // Handle dismissing an insight as answered const handleDismissAsAnswered = useCallback( (analysisId: string) => { - // Get the latest transcript ID const latestTranscriptId = transcriptChunks.length > 0 ? transcriptChunks[transcriptChunks.length - 1] .transcription_id : null; - - // Send update tag message to backend - const insight = insights.find((i) => i.analysis_id === analysisId); - const newTag = - insight?.tag === "starred" ? "starred_dismissed" : "dismissed"; - - // Update local state immediately - setInsights((prevState) => - prevState.map((insight) => { - if (insight.analysis_id === analysisId) { - return { - ...insight, - tag: newTag, - was_asked: true, - asked_at_transcript_id: latestTranscriptId, - }; - } - return insight; - }), - ); + if (!latestTranscriptId) { + return; + } ws.sendMessage({ - type: MessageType.UPDATE_AI_ANALYSIS_TAG, + type: MessageType.MARK_AI_ANALYSIS_ASKED, timestamp: new Date().toISOString(), analysis_id: analysisId, - tag: newTag, - was_asked: true, asked_at_transcript_id: latestTranscriptId, }); }, - [ws, insights, transcriptChunks], + [ws, transcriptChunks], ); // Handle dismissing an insight as not answered const handleDismissNotAnswered = useCallback( (analysisId: string) => { - // Send update tag message to backend - const insight = insights.find((i) => i.analysis_id === analysisId); - const newTag = - insight?.tag === "starred" ? "starred_dismissed" : "dismissed"; - - // Update local state immediately - setInsights((prevState) => - prevState.map((insight) => { - if (insight.analysis_id === analysisId) { - return { - ...insight, - tag: newTag, - was_asked: false, - asked_at_transcript_id: null, - }; - } - return insight; - }), - ); - ws.sendMessage({ - type: MessageType.UPDATE_AI_ANALYSIS_TAG, + type: MessageType.MARK_AI_ANALYSIS_DISMISSED_NOT_ASKED, timestamp: new Date().toISOString(), analysis_id: analysisId, - tag: newTag, - was_asked: false, - asked_at_transcript_id: null, }); }, - [ws, insights], + [ws], ); // Handle undoing a dismiss (restore to active or starred) const handleUndoDismiss = useCallback( (analysisId: string) => { - // Update local state immediately - setInsights((prevState) => - prevState.map((insight) => { - if (insight.analysis_id === analysisId) { - // If it was starred_dismissed, restore to starred, otherwise to active (null) - const newTag = - insight.tag === "starred_dismissed" - ? "starred" - : null; - return { ...insight, tag: newTag }; - } - return insight; - }), - ); - - // Send update tag message to backend - const insight = insights.find((i) => i.analysis_id === analysisId); - const newTag = - insight?.tag === "starred_dismissed" ? "starred" : null; ws.sendMessage({ - type: MessageType.UPDATE_AI_ANALYSIS_TAG, + type: MessageType.UNDO_AI_ANALYSIS_DISMISSAL, timestamp: new Date().toISOString(), analysis_id: analysisId, - tag: newTag, }); }, - [ws, insights], + [ws], ); const viewportRef = useRef(null); @@ -577,11 +492,16 @@ export function AudioSender({ projectId }: AudioSenderProps) { useEffect(() => { const handleUpdateAIAnalysisTag = (message: UpdateAIAnalysisTag) => { - // Update insight tag in local state setInsights((prevState) => prevState.map((insight) => insight.analysis_id === message.analysis_id - ? { ...insight, tag: message.tag } + ? { + ...insight, + tag: message.tag, + was_asked: message.was_asked ?? null, + asked_at_transcript_id: + message.asked_at_transcript_id ?? null, + } : insight, ), ); diff --git a/frontend/src/lib/message.ts b/frontend/src/lib/message.ts index d174c76..57916b4 100644 --- a/frontend/src/lib/message.ts +++ b/frontend/src/lib/message.ts @@ -9,6 +9,12 @@ export const MessageType = { CATCHUP: "catchup", PROJECT_METADATA: "project_metadata", UPDATE_AI_ANALYSIS_TAG: "update_ai_analysis_tag", + MARK_AI_ANALYSIS_ASKED: "mark_ai_analysis_asked", + UNDO_AI_ANALYSIS_DISMISSAL: "undo_ai_analysis_dismissal", + MARK_AI_ANALYSIS_DISMISSED_NOT_ASKED: + "mark_ai_analysis_dismissed_not_asked", + STAR_AI_ANALYSIS: "star_ai_analysis", + UNSTAR_AI_ANALYSIS: "unstar_ai_analysis", RECORDING_STATE: "recording_state", } as const; @@ -67,6 +73,7 @@ export interface AnalysisRow { ordinal: number; was_asked?: boolean | null; asked_at_transcript_id?: string | null; + asked_at?: string | null; } export interface CatchupMessage { @@ -92,6 +99,37 @@ export interface UpdateAIAnalysisTag { asked_at_transcript_id?: string | null; } +export interface MarkAIAnalysisAsked { + type: typeof MessageType.MARK_AI_ANALYSIS_ASKED; + timestamp: string; + analysis_id: string; + asked_at_transcript_id: string; +} + +export interface UndoAIAnalysisDismissal { + type: typeof MessageType.UNDO_AI_ANALYSIS_DISMISSAL; + timestamp: string; + analysis_id: string; +} + +export interface MarkAIAnalysisDismissedNotAsked { + type: typeof MessageType.MARK_AI_ANALYSIS_DISMISSED_NOT_ASKED; + timestamp: string; + analysis_id: string; +} + +export interface StarAIAnalysis { + type: typeof MessageType.STAR_AI_ANALYSIS; + timestamp: string; + analysis_id: string; +} + +export interface UnstarAIAnalysis { + type: typeof MessageType.UNSTAR_AI_ANALYSIS; + timestamp: string; + analysis_id: string; +} + export interface RecordingStateMessage { type: typeof MessageType.RECORDING_STATE; timestamp: string; @@ -112,6 +150,11 @@ export type Message = | CatchupMessage | ProjectMetadataMessage | UpdateAIAnalysisTag + | MarkAIAnalysisAsked + | UndoAIAnalysisDismissal + | MarkAIAnalysisDismissedNotAsked + | StarAIAnalysis + | UnstarAIAnalysis | RecordingStateMessage; export interface Envelope { From 90a2d8499bc611730ea732907d1d48045d318a64 Mon Sep 17 00:00:00 2001 From: Dylan Starink Date: Tue, 10 Mar 2026 10:59:07 -0700 Subject: [PATCH 07/14] fix(project): sort tags by when it was changed --- .../context_manager/database.py | 14 +++- .../components/audio-sender/InsightsPanel.tsx | 70 +++++++++++-------- frontend/src/lib/message.ts | 1 + 3 files changed, 56 insertions(+), 29 deletions(-) diff --git a/backend/src/interview_helper/context_manager/database.py b/backend/src/interview_helper/context_manager/database.py index 060beeb..62f81c2 100644 --- a/backend/src/interview_helper/context_manager/database.py +++ b/backend/src/interview_helper/context_manager/database.py @@ -524,6 +524,7 @@ class AnalysisRow(BaseModel): was_asked: bool | None = None asked_at_transcript_id: str | None = None asked_at: datetime | None = None + time_tag_changed: datetime | None = None type AnalysisTag = Literal["starred", "dismissed", "starred_dismissed"] | None @@ -605,6 +606,7 @@ def get_all_ai_analyses( models.AIAnalysis.was_asked, models.AIAnalysis.asked_at_transcript_id, models.AIAnalysis.asked_at, + models.AIAnalysis.time_tag_changed, sa.func.row_number() .over(order_by=models.AIAnalysis.analysis_id.asc()) .label("ordinal"), @@ -625,6 +627,7 @@ def get_all_ai_analyses( subq.c.was_asked, subq.c.asked_at_transcript_id, subq.c.asked_at, + subq.c.time_tag_changed, subq.c.ordinal, ).order_by(subq.c.analysis_id.asc()) ).all() @@ -650,6 +653,9 @@ def get_all_ai_analyses( asked_at=row.asked_at.replace(tzinfo=timezone.utc) # pyright: ignore[reportAny] if row.asked_at # pyright: ignore[reportAny] else None, + time_tag_changed=row.time_tag_changed.replace(tzinfo=timezone.utc) # pyright: ignore[reportAny] + if row.time_tag_changed # pyright: ignore[reportAny] + else None, ) for row in rows ] @@ -703,6 +709,7 @@ def get_analyses_by_ids( subq.c.was_asked, subq.c.asked_at_transcript_id, subq.c.asked_at, + subq.c.time_tag_changed, subq.c.ordinal, ) .where(subq.c.analysis_id.in_(analysis_id_strs)) @@ -728,7 +735,12 @@ def get_analyses_by_ids( ordinal=row.ordinal, # pyright: ignore[reportAny] was_asked=row.was_asked, # pyright: ignore[reportAny] asked_at_transcript_id=row.asked_at_transcript_id, # pyright: ignore[reportAny] - asked_at=row.asked_at, # pyright: ignore[reportAny] + asked_at=row.asked_at.replace(tzinfo=timezone.utc) # pyright: ignore[reportAny] + if row.asked_at # pyright: ignore[reportAny] + else None, + time_tag_changed=row.time_tag_changed.replace(tzinfo=timezone.utc) # pyright: ignore[reportAny] + if row.time_tag_changed # pyright: ignore[reportAny] + else None, ) for row in rows } diff --git a/frontend/src/components/audio-sender/InsightsPanel.tsx b/frontend/src/components/audio-sender/InsightsPanel.tsx index 58dc08c..1721a19 100644 --- a/frontend/src/components/audio-sender/InsightsPanel.tsx +++ b/frontend/src/components/audio-sender/InsightsPanel.tsx @@ -48,10 +48,30 @@ export function InsightsPanel({ a.tag !== "dismissed" && a.tag !== "starred_dismissed", ); - const starredInsights = insights.filter((a) => a.tag === "starred"); - const dismissedInsights = insights.filter( - (a) => a.tag === "dismissed" || a.tag === "starred_dismissed", - ); + const starredInsights = insights + .filter((a) => a.tag === "starred") + .sort((a, b) => { + // Sort by time_tag_changed, oldest first (newest at bottom) + const timeA = a.time_tag_changed + ? new Date(a.time_tag_changed).getTime() + : 0; + const timeB = b.time_tag_changed + ? new Date(b.time_tag_changed).getTime() + : 0; + return timeA - timeB; + }); + const dismissedInsights = insights + .filter((a) => a.tag === "dismissed" || a.tag === "starred_dismissed") + .sort((a, b) => { + // Sort by time_tag_changed, oldest first + const timeA = a.time_tag_changed + ? new Date(a.time_tag_changed).getTime() + : 0; + const timeB = b.time_tag_changed + ? new Date(b.time_tag_changed).getTime() + : 0; + return timeA - timeB; + }); const renderActiveInsight = (analysis: AnalysisRow) => ( @@ -366,18 +386,15 @@ export function InsightsPanel({ here in real time. ) : ( - activeInsights - .reverse() - .map((analysis, index) => ( - - {renderActiveInsight(analysis)} - {index < - activeInsights.length - - 1 && } - - )) + activeInsights.map((analysis, index) => ( + + {renderActiveInsight(analysis)} + {index < + activeInsights.length - 1 && ( + + )} + + )) )}
@@ -397,18 +414,15 @@ export function InsightsPanel({ No starred questions. ) : ( - starredInsights - .reverse() - .map((analysis, index) => ( - - {renderStarredInsight(analysis)} - {index < - starredInsights.length - - 1 && } - - )) + starredInsights.map((analysis, index) => ( + + {renderStarredInsight(analysis)} + {index < + starredInsights.length - 1 && ( + + )} + + )) )} diff --git a/frontend/src/lib/message.ts b/frontend/src/lib/message.ts index 57916b4..b3e8242 100644 --- a/frontend/src/lib/message.ts +++ b/frontend/src/lib/message.ts @@ -74,6 +74,7 @@ export interface AnalysisRow { was_asked?: boolean | null; asked_at_transcript_id?: string | null; asked_at?: string | null; + time_tag_changed?: string | null; } export interface CatchupMessage { From 1c9afd93b8b873a45132fc2cf04389a6b55b723b Mon Sep 17 00:00:00 2001 From: Dylan Starink Date: Tue, 10 Mar 2026 11:04:40 -0700 Subject: [PATCH 08/14] chore(backend): extend question time to every 4m --- backend/src/interview_helper/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/interview_helper/config.py b/backend/src/interview_helper/config.py index fff1bd5..2dc3755 100644 --- a/backend/src/interview_helper/config.py +++ b/backend/src/interview_helper/config.py @@ -50,7 +50,7 @@ def model_post_init(self, __context): bytes_per_sample: int = 2 # AI Processing - process_transcript_every_secs: float = 60.0 * 2 # 2 minutes + process_transcript_every_secs: float = 60.0 * 4 # 4 minutes process_transcript_every_word_count: int = 100 azure_api_endpoint: str = Field(alias="OPENAI_API_ENDPOINT") From 6767d4041a370199b3282e1ebb8ad8c348367e3b Mon Sep 17 00:00:00 2001 From: Dylan Starink Date: Wed, 11 Mar 2026 14:08:52 -0700 Subject: [PATCH 09/14] Update backend/src/interview_helper/context_manager/database.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- backend/src/interview_helper/context_manager/database.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/src/interview_helper/context_manager/database.py b/backend/src/interview_helper/context_manager/database.py index 62f81c2..003bb3e 100644 --- a/backend/src/interview_helper/context_manager/database.py +++ b/backend/src/interview_helper/context_manager/database.py @@ -689,6 +689,7 @@ def get_analyses_by_ids( models.AIAnalysis.was_asked, models.AIAnalysis.asked_at_transcript_id, models.AIAnalysis.asked_at, + models.AIAnalysis.time_tag_changed, sa.func.row_number() .over(order_by=models.AIAnalysis.analysis_id.asc()) .label("ordinal"), From 0b281681902038896982d5fbce5554032fb4cdfe Mon Sep 17 00:00:00 2001 From: Dylan Starink Date: Wed, 11 Mar 2026 14:09:21 -0700 Subject: [PATCH 10/14] Update backend/src/main.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- backend/src/main.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/src/main.py b/backend/src/main.py index 2446780..a1f30d9 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -454,6 +454,8 @@ async def websocket_endpoint( tag=update.tag, was_asked=update.was_asked, asked_at_transcript_id=update.asked_at_transcript_id, + time_tag_changed=update.time_tag_changed, + asked_at=update.asked_at, ) await session_manager.broadcast_to_project( context.project_id, update_message From b71fca3cfd646abd644cb0bd39ccecd07c4c6d0d Mon Sep 17 00:00:00 2001 From: Dylan Starink Date: Wed, 11 Mar 2026 14:10:45 -0700 Subject: [PATCH 11/14] Update frontend/src/components/audio-sender/DesktopLayout.tsx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- frontend/src/components/audio-sender/DesktopLayout.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/frontend/src/components/audio-sender/DesktopLayout.tsx b/frontend/src/components/audio-sender/DesktopLayout.tsx index 2ae8146..f86a3e1 100644 --- a/frontend/src/components/audio-sender/DesktopLayout.tsx +++ b/frontend/src/components/audio-sender/DesktopLayout.tsx @@ -86,8 +86,11 @@ export function DesktopLayout({ } | null>(null); const clampInsightsWidth = useCallback((nextWidth: number) => { + const storedContainerWidth = dragStateRef.current?.containerWidth; const containerWidth = - resizeHandleRef.current?.parentElement?.clientWidth ?? 0; + storedContainerWidth ?? + resizeHandleRef.current?.parentElement?.clientWidth ?? + 0; if (containerWidth <= 0) return nextWidth; From 27a3002c29dc46d0c45afda363ede3402fb3ad66 Mon Sep 17 00:00:00 2001 From: Dylan Starink Date: Wed, 11 Mar 2026 14:29:07 -0700 Subject: [PATCH 12/14] fix(project): add asked_at to update msg + db ts Added a function to fix the datetime from the database --- .../context_manager/database.py | 45 +++++++---- .../context_manager/messages.py | 2 + .../interview_helper/downloads/get_report.py | 76 ++++++++++++++----- frontend/src/components/AudioSender.tsx | 3 + frontend/src/lib/message.ts | 2 + 5 files changed, 93 insertions(+), 35 deletions(-) diff --git a/backend/src/interview_helper/context_manager/database.py b/backend/src/interview_helper/context_manager/database.py index 003bb3e..9654727 100644 --- a/backend/src/interview_helper/context_manager/database.py +++ b/backend/src/interview_helper/context_manager/database.py @@ -2,7 +2,7 @@ from datetime import datetime, timezone from pydantic import BaseModel from sqlalchemy.sql.sqltypes import DateTime -from typing import Literal, TypedDict +from typing import Literal, TypedDict, cast from interview_helper.context_manager.types import ( AnalysisId, ProjectId, @@ -486,6 +486,14 @@ class ProjectCreatorInfo: name: str +def _normalize_db_timestamp(ts: datetime | None) -> datetime | None: + if ts is None: + return None + if ts.tzinfo is None: + return ts.replace(tzinfo=timezone.utc) + return ts.astimezone(timezone.utc) + + def get_project_creator_and_name( db: PersistentDatabase, project_id: ProjectId ) -> ProjectCreatorInfo | None: @@ -536,6 +544,8 @@ class AnalysisTagUpdateResult: tag: AnalysisTag was_asked: bool | None asked_at_transcript_id: str | None + asked_at: datetime | None + time_tag_changed: datetime | None def _get_ai_analysis_state_for_update( @@ -564,7 +574,7 @@ def _persist_ai_analysis_state( asked_at_transcript_id: str | None, asked_at: datetime | None, ) -> AnalysisTagUpdateResult: - _ = conn.execute( + row = conn.execute( sa.update(models.AIAnalysis) .where(models.AIAnalysis.analysis_id == analysis_id) .values( @@ -574,13 +584,22 @@ def _persist_ai_analysis_state( asked_at_transcript_id=asked_at_transcript_id, asked_at=asked_at, ) - ) + .returning( + models.AIAnalysis.asked_at, + models.AIAnalysis.time_tag_changed, + ) + ).one() + + asked_at_db = cast(datetime | None, row.asked_at) + time_tag_changed_db = cast(datetime | None, row.time_tag_changed) return AnalysisTagUpdateResult( analysis_id=analysis_id, tag=tag, was_asked=was_asked, asked_at_transcript_id=asked_at_transcript_id, + asked_at=_normalize_db_timestamp(asked_at_db), + time_tag_changed=_normalize_db_timestamp(time_tag_changed_db), ) @@ -650,12 +669,10 @@ def get_all_ai_analyses( ordinal=row.ordinal, # pyright: ignore[reportAny] was_asked=row.was_asked, # pyright: ignore[reportAny] asked_at_transcript_id=row.asked_at_transcript_id, # pyright: ignore[reportAny] - asked_at=row.asked_at.replace(tzinfo=timezone.utc) # pyright: ignore[reportAny] - if row.asked_at # pyright: ignore[reportAny] - else None, - time_tag_changed=row.time_tag_changed.replace(tzinfo=timezone.utc) # pyright: ignore[reportAny] - if row.time_tag_changed # pyright: ignore[reportAny] - else None, + asked_at=_normalize_db_timestamp(cast(datetime | None, row.asked_at)), + time_tag_changed=_normalize_db_timestamp( + cast(datetime | None, row.time_tag_changed) + ), ) for row in rows ] @@ -736,12 +753,10 @@ def get_analyses_by_ids( ordinal=row.ordinal, # pyright: ignore[reportAny] was_asked=row.was_asked, # pyright: ignore[reportAny] asked_at_transcript_id=row.asked_at_transcript_id, # pyright: ignore[reportAny] - asked_at=row.asked_at.replace(tzinfo=timezone.utc) # pyright: ignore[reportAny] - if row.asked_at # pyright: ignore[reportAny] - else None, - time_tag_changed=row.time_tag_changed.replace(tzinfo=timezone.utc) # pyright: ignore[reportAny] - if row.time_tag_changed # pyright: ignore[reportAny] - else None, + asked_at=_normalize_db_timestamp(cast(datetime | None, row.asked_at)), + time_tag_changed=_normalize_db_timestamp( + cast(datetime | None, row.time_tag_changed) + ), ) for row in rows } diff --git a/backend/src/interview_helper/context_manager/messages.py b/backend/src/interview_helper/context_manager/messages.py index d4aaa09..6d53a38 100644 --- a/backend/src/interview_helper/context_manager/messages.py +++ b/backend/src/interview_helper/context_manager/messages.py @@ -72,6 +72,8 @@ class UpdateAIAnalysisTag(BaseModel): tag: Literal["starred", "dismissed", "starred_dismissed"] | None was_asked: bool | None = None asked_at_transcript_id: str | None = None + asked_at: datetime | None = None + time_tag_changed: datetime | None = None class MarkAIAnalysisAsked(BaseModel): diff --git a/backend/src/interview_helper/downloads/get_report.py b/backend/src/interview_helper/downloads/get_report.py index 7752fb3..b7249b4 100644 --- a/backend/src/interview_helper/downloads/get_report.py +++ b/backend/src/interview_helper/downloads/get_report.py @@ -1,6 +1,6 @@ from __future__ import annotations -from collections import defaultdict +from collections import defaultdict, deque from dataclasses import dataclass, field from datetime import datetime, timedelta, timezone from io import BytesIO @@ -216,23 +216,7 @@ def _analysis_context_anchor( return chunk_to_section_anchor.get(str(analysis.transcript_context_end)) -def _build_transcript_excerpt( - transcript_rows: Sequence[TranscriptionWithProjectDetails], - asked_at_timestamp: datetime, -) -> str | None: - excerpt_start_time = asked_at_timestamp - TRANSCRIPT_EXCERPT_WINDOW - excerpt_rows: list[tuple[str, str]] = [] - - for row in transcript_rows: - row_timestamp = extract_timestamp_from_ulid(str(row["transcription_id"])) - if not (excerpt_start_time <= row_timestamp < asked_at_timestamp): - continue - - speaker = str(row["speaker"] or "Unknown Speaker") - text = str(row["text_output"] or "").strip() - if text: - excerpt_rows.append((speaker, text)) - +def _format_excerpt_rows(excerpt_rows: Sequence[tuple[str, str]]) -> str | None: if len(excerpt_rows) == 0: return None @@ -266,6 +250,54 @@ def flush_current() -> None: return "\n".join(grouped_lines) if grouped_lines else None +def _precompute_transcript_excerpts_by_asked_at( + transcript_rows: Sequence[TranscriptionWithProjectDetails], + analyses: Sequence[AnalysisRow], +) -> dict[str, str | None]: + asked_events = sorted( + [ + (analysis.asked_at, analysis.analysis_id) + for analysis in analyses + if analysis.asked_at is not None + ], + key=lambda item: item[0], + ) + if len(asked_events) == 0: + return {} + + excerpts_by_analysis_id: dict[str, str | None] = {} + window_rows: deque[tuple[datetime, str, str]] = deque() + transcript_events: list[tuple[datetime, str, str]] = [] + + for row in transcript_rows: + row_timestamp = extract_timestamp_from_ulid(str(row["transcription_id"])) + speaker = str(row["speaker"] or "Unknown Speaker") + text = str(row["text_output"] or "").strip() + if text: + transcript_events.append((row_timestamp, speaker, text)) + + event_index = 0 + event_count = len(transcript_events) + + for asked_at_timestamp, analysis_id in asked_events: + while ( + event_index < event_count + and transcript_events[event_index][0] < asked_at_timestamp + ): + window_rows.append(transcript_events[event_index]) + event_index += 1 + + excerpt_start_time = asked_at_timestamp - TRANSCRIPT_EXCERPT_WINDOW + while window_rows and window_rows[0][0] < excerpt_start_time: + _ = window_rows.popleft() + + excerpts_by_analysis_id[analysis_id] = _format_excerpt_rows( + [(speaker, text) for _, speaker, text in window_rows] + ) + + return excerpts_by_analysis_id + + def build_report_data(project_id: str, db: PersistentDatabase) -> ReportData | None: typed_project_id = ProjectId.from_str(project_id) @@ -276,6 +308,10 @@ def build_report_data(project_id: str, db: PersistentDatabase) -> ReportData | N anchor_index = _build_transcript_anchor_index(transcript_rows) analyses = get_all_ai_analyses(db, typed_project_id) + transcript_excerpts_by_analysis_id = _precompute_transcript_excerpts_by_asked_at( + transcript_rows, analyses + ) + answered_by_category: dict[str, list[ReportQuestionEntry]] = defaultdict(list) unanswered_by_category: dict[str, list[ReportQuestionEntry]] = defaultdict(list) @@ -293,8 +329,8 @@ def build_report_data(project_id: str, db: PersistentDatabase) -> ReportData | N asked_at_id = analysis.asked_at_transcript_id.lower() answered_anchor = anchor_index.chunk_to_section_anchor.get(asked_at_id) answered_at_text = _format_utc(analysis.asked_at) - transcript_excerpt = _build_transcript_excerpt( - transcript_rows, analysis.asked_at + transcript_excerpt = transcript_excerpts_by_analysis_id.get( + analysis.analysis_id ) question_anchor = f"question-{analysis.ordinal}" diff --git a/frontend/src/components/AudioSender.tsx b/frontend/src/components/AudioSender.tsx index 081ab12..cfa129d 100644 --- a/frontend/src/components/AudioSender.tsx +++ b/frontend/src/components/AudioSender.tsx @@ -501,6 +501,9 @@ export function AudioSender({ projectId }: AudioSenderProps) { was_asked: message.was_asked ?? null, asked_at_transcript_id: message.asked_at_transcript_id ?? null, + asked_at: message.asked_at ?? null, + time_tag_changed: + message.time_tag_changed ?? null, } : insight, ), diff --git a/frontend/src/lib/message.ts b/frontend/src/lib/message.ts index b3e8242..c518268 100644 --- a/frontend/src/lib/message.ts +++ b/frontend/src/lib/message.ts @@ -98,6 +98,8 @@ export interface UpdateAIAnalysisTag { tag: "starred" | "dismissed" | "starred_dismissed" | null; was_asked?: boolean | null; asked_at_transcript_id?: string | null; + asked_at?: string | null; + time_tag_changed?: string | null; } export interface MarkAIAnalysisAsked { From eb35d5bdfca53cf040fa9ecaf3922a351d5bfdc0 Mon Sep 17 00:00:00 2001 From: Dylan Starink Date: Wed, 11 Mar 2026 14:49:47 -0700 Subject: [PATCH 13/14] feat(backend): add stars to report --- .../interview_helper/downloads/get_report.py | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/backend/src/interview_helper/downloads/get_report.py b/backend/src/interview_helper/downloads/get_report.py index b7249b4..73465e0 100644 --- a/backend/src/interview_helper/downloads/get_report.py +++ b/backend/src/interview_helper/downloads/get_report.py @@ -45,6 +45,7 @@ class ReportQuestionEntry: context_anchor: str | None answered_at_anchor: str | None answered_at_text: str | None + is_starred: bool transcript_excerpt: str | None = None @@ -86,6 +87,16 @@ def _format_duration_hms(duration: timedelta) -> str: return f"{hours}h {minutes}m {seconds}s" +def _format_excerpt_window(duration: timedelta) -> str: + total_seconds = int(max(duration.total_seconds(), 0)) + minutes, seconds = divmod(total_seconds, 60) + if minutes > 0 and seconds == 0: + return f"{minutes} minute" if minutes == 1 else f"{minutes} minutes" + if minutes > 0: + return f"{minutes}m {seconds}s" + return f"{seconds} second" if seconds == 1 else f"{seconds} seconds" + + def _ordered_category_items( grouped: dict[str, list[ReportQuestionEntry]], ) -> list[tuple[str, list[ReportQuestionEntry]]]: @@ -347,6 +358,7 @@ def build_report_data(project_id: str, db: PersistentDatabase) -> ReportData | N ), answered_at_anchor=answered_anchor, answered_at_text=answered_at_text, + is_starred=analysis.tag in ("starred", "starred_dismissed"), transcript_excerpt=transcript_excerpt, ) @@ -408,9 +420,19 @@ def _render_question_sections( story.append(Spacer(1, 0.12 * inch)) for entry in entries: + star_icon = ( + '' + if entry.is_starred + else "" + ) + question_label = ( + f"Q{entry.ordinal}. {star_icon}" + if entry.is_starred + else f"Q{entry.ordinal}." + ) story.append( Paragraph( - f'Q{entry.ordinal}. {escape(entry.text)}', + f'{question_label} {escape(entry.text)}', question_style, ) ) @@ -449,17 +471,18 @@ def _render_question_sections( ) if entry.transcript_excerpt: - formatted_excerpt = "... " + escape(entry.transcript_excerpt).replace( - "\n", "
" + formatted_excerpt = ( + '[ . . . ]
' + + escape(entry.transcript_excerpt).replace("\n", "
") ) excerpt_label = ( - f'
Transcript Excerpt' + f'Transcript Excerpt (Last {_format_excerpt_window(TRANSCRIPT_EXCERPT_WINDOW)})' if entry.answered_at_anchor - else "Transcript Excerpt" + else f"Transcript Excerpt (Last {_format_excerpt_window(TRANSCRIPT_EXCERPT_WINDOW)})" ) story.append( Paragraph( - f'{excerpt_label}: {formatted_excerpt}', + f'{excerpt_label}:
{formatted_excerpt}', excerpt_style, ) ) From aecbba510c468fe662f67ebf124cec0b942fa6b6 Mon Sep 17 00:00:00 2001 From: Dylan Starink Date: Wed, 11 Mar 2026 15:04:59 -0700 Subject: [PATCH 14/14] feat(backend): add gaps in transcript --- .../interview_helper/downloads/get_report.py | 126 +++++++++++++++--- .../tests/test_report_generation.py | 3 +- 2 files changed, 113 insertions(+), 16 deletions(-) diff --git a/backend/src/interview_helper/downloads/get_report.py b/backend/src/interview_helper/downloads/get_report.py index 73465e0..d70e14d 100644 --- a/backend/src/interview_helper/downloads/get_report.py +++ b/backend/src/interview_helper/downloads/get_report.py @@ -32,6 +32,10 @@ # Time window for transcript excerpts before answered questions TRANSCRIPT_EXCERPT_WINDOW = timedelta(minutes=1) +# Time gap threshold for adding visual separators between transcript entries +TRANSCRIPT_ENTRY_GAP_SEPARATOR_THRESHOLD = timedelta(minutes=5) +# If an answered-question event happens after this delay, render a standalone timestamp. +ANSWERED_EVENT_TIMESTAMP_CUTOFF = timedelta(minutes=2) @dataclass @@ -55,8 +59,11 @@ class ReportTranscriptSection: speaker: str text: str started_at: datetime + ended_at: datetime chunk_ids: list[str] = field(default_factory=list) - answered_question_refs: list[tuple[int, str]] = field(default_factory=list) + answered_question_refs: list[tuple[int, str, datetime]] = field( + default_factory=list + ) @dataclass @@ -97,6 +104,18 @@ def _format_excerpt_window(duration: timedelta) -> str: return f"{seconds} second" if seconds == 1 else f"{seconds} seconds" +def _format_gap_duration(duration: timedelta) -> str: + total_seconds = int(max(duration.total_seconds(), 0)) + hours, rem = divmod(total_seconds, 3600) + minutes, seconds = divmod(rem, 60) + + if hours > 0: + return f"{hours}h {minutes}m {seconds}s" + if minutes > 0: + return f"{minutes}m {seconds}s" + return f"{seconds}s" + + def _ordered_category_items( grouped: dict[str, list[ReportQuestionEntry]], ) -> list[tuple[str, list[ReportQuestionEntry]]]: @@ -130,11 +149,13 @@ def flush_current() -> None: return anchor = f"transcript-{len(sections) + 1}" + ended_at = extract_timestamp_from_ulid(current_chunk_ids[-1]) section = ReportTranscriptSection( anchor=anchor, speaker=current_speaker, text=" ".join(current_texts).strip(), started_at=current_started_at, + ended_at=ended_at, chunk_ids=[*current_chunk_ids], ) sections.append(section) @@ -364,17 +385,17 @@ def build_report_data(project_id: str, db: PersistentDatabase) -> ReportData | N if analysis.was_asked is True: answered_by_category[normalized_category].append(entry) - if answered_anchor is not None: + if answered_anchor is not None and analysis.asked_at is not None: section = anchor_index.section_by_anchor.get(answered_anchor) if section is not None: section.answered_question_refs.append( - (entry.ordinal, entry.question_anchor) + (entry.ordinal, entry.question_anchor, analysis.asked_at) ) else: unanswered_by_category[normalized_category].append(entry) for section in anchor_index.sections: - section.answered_question_refs.sort(key=lambda item: item[0]) + section.answered_question_refs.sort(key=lambda item: item[2]) project_name = str(transcript_rows[0]["project_name"] or "Untitled Project") first_timestamp = extract_timestamp_from_ulid( @@ -558,6 +579,15 @@ def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None spaceBefore=6, spaceAfter=4, ) + gap_separator_style = ParagraphStyle( + "GapSeparatorStyle", + parent=normal_style, + fontSize=10, + textColor=colors.HexColor("#777777"), + alignment=1, + spaceBefore=6, + spaceAfter=6, + ) story: list[Flowable] = [] @@ -610,7 +640,19 @@ def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None story.append(Paragraph("Transcript", heading_style)) story.append(Spacer(1, 0.15 * inch)) + previous_ended_at: datetime | None = None for section in report_data.transcript_sections: + if previous_ended_at is not None: + time_gap = section.started_at - previous_ended_at + if time_gap > TRANSCRIPT_ENTRY_GAP_SEPARATOR_THRESHOLD: + story.append( + Paragraph( + f'[ . . . {_format_gap_duration(time_gap)} passed . . . ]', + gap_separator_style, + ) + ) + story.append(Spacer(1, 0.06 * inch)) + speaker = section.speaker if section.speaker else "Unknown Speaker" transcript_heading = f"[{_format_utc(section.started_at)}] {speaker}" story.append( @@ -627,20 +669,74 @@ def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None ) if section.answered_question_refs: - links = ", ".join( - [ - f'Q{ordinal}' - for ordinal, question_anchor in section.answered_question_refs - ] - ) - story.append( - Paragraph( - f'Answered Here: {links}', - normal_style, + grouped_answered_refs: list[ + tuple[datetime, datetime, list[tuple[int, str]]] + ] = [] + group_first_at: datetime | None = None + group_last_at: datetime | None = None + group_links: list[tuple[int, str]] = [] + + for ordinal, question_anchor, answered_at in section.answered_question_refs: + if group_first_at is None or group_last_at is None: + group_first_at = answered_at + group_last_at = answered_at + group_links = [(ordinal, question_anchor)] + continue + + if answered_at - group_last_at <= ANSWERED_EVENT_TIMESTAMP_CUTOFF: + group_last_at = answered_at + group_links.append((ordinal, question_anchor)) + continue + + grouped_answered_refs.append( + (group_first_at, group_last_at, [*group_links]) ) - ) + group_first_at = answered_at + group_last_at = answered_at + group_links = [(ordinal, question_anchor)] + + if group_first_at is not None and group_last_at is not None: + grouped_answered_refs.append( + (group_first_at, group_last_at, [*group_links]) + ) + + last_rendered_timestamp = section.ended_at + for group_first_at, group_last_at, grouped_links in grouped_answered_refs: + group_gap = group_first_at - last_rendered_timestamp + if group_gap > TRANSCRIPT_ENTRY_GAP_SEPARATOR_THRESHOLD: + story.append( + Paragraph( + f'[ . . . {_format_gap_duration(group_gap)} passed . . . ]', + gap_separator_style, + ) + ) + story.append(Spacer(1, 0.06 * inch)) + + if group_gap > ANSWERED_EVENT_TIMESTAMP_CUTOFF: + story.append( + Paragraph( + f'[{_format_utc(group_first_at)}]', + normal_style, + ) + ) + + grouped_links_text = ", ".join( + [ + f'Q{ordinal}' + for ordinal, question_anchor in grouped_links + ] + ) + + story.append( + Paragraph( + f'Answered Here: {grouped_links_text}', + normal_style, + ) + ) + last_rendered_timestamp = group_last_at story.append(Spacer(1, 0.1 * inch)) + previous_ended_at = section.ended_at story.append(PageBreak()) diff --git a/backend/src/interview_helper/tests/test_report_generation.py b/backend/src/interview_helper/tests/test_report_generation.py index 52607cd..ab49529 100644 --- a/backend/src/interview_helper/tests/test_report_generation.py +++ b/backend/src/interview_helper/tests/test_report_generation.py @@ -165,8 +165,9 @@ def test_build_report_data_groups_questions_and_creates_bidirectional_anchors(): transcript_section = report.transcript_sections[1] assert len(transcript_section.answered_question_refs) == 1 - _, question_anchor = transcript_section.answered_question_refs[0] + _, question_anchor, question_datetime = transcript_section.answered_question_refs[0] assert question_anchor == answered_entry.question_anchor + assert question_datetime == explicit_asked_at def test_generate_report_pdf_returns_pdf_bytes():