diff --git a/backend/alembic/versions/869cfd49ebd5_initial.py b/backend/alembic/versions/869cfd49ebd5_initial.py
index c67a692..35535d7 100644
--- a/backend/alembic/versions/869cfd49ebd5_initial.py
+++ b/backend/alembic/versions/869cfd49ebd5_initial.py
@@ -27,7 +27,7 @@ def upgrade() -> None:
sa.Column("oidc_id", sa.String(length=255), nullable=False),
sa.Column(
"updated_at",
- sa.DateTime(),
+ sa.DateTime(timezone=True),
server_default=sa.text("(CURRENT_TIMESTAMP)"),
nullable=False,
),
@@ -42,13 +42,13 @@ def upgrade() -> None:
sa.Column("creator_user_id", sa.String(length=26), nullable=False),
sa.Column(
"created_at",
- sa.DateTime(),
+ sa.DateTime(timezone=True),
server_default=sa.text("(CURRENT_TIMESTAMP)"),
nullable=False,
),
sa.Column(
"updated_at",
- sa.DateTime(),
+ sa.DateTime(timezone=True),
server_default=sa.text("(CURRENT_TIMESTAMP)"),
nullable=False,
),
@@ -65,11 +65,11 @@ def upgrade() -> None:
sa.Column("user_id", sa.String(length=26), nullable=False),
sa.Column(
"started_at",
- sa.DateTime(),
+ sa.DateTime(timezone=True),
server_default=sa.text("(CURRENT_TIMESTAMP)"),
nullable=False,
),
- sa.Column("ended_at", sa.DateTime(), nullable=True),
+ sa.Column("ended_at", sa.DateTime(timezone=True), nullable=True),
sa.ForeignKeyConstraint(
["project_id"],
["project.project_id"],
@@ -90,13 +90,13 @@ def upgrade() -> None:
sa.Column("speaker", sa.String(length=100), nullable=True),
sa.Column(
"created_at",
- sa.DateTime(),
+ sa.DateTime(timezone=True),
server_default=sa.text("(CURRENT_TIMESTAMP)"),
nullable=False,
),
sa.Column(
"updated_at",
- sa.DateTime(),
+ sa.DateTime(timezone=True),
server_default=sa.text("(CURRENT_TIMESTAMP)"),
nullable=False,
),
@@ -115,15 +115,17 @@ def upgrade() -> None:
sa.Column("analysis_id", sa.String(length=26), nullable=False),
sa.Column("project_id", sa.String(length=26), nullable=False),
sa.Column("text", sa.Text(), nullable=False),
+ sa.Column("category_code", sa.String(length=1), nullable=False),
sa.Column("span", sa.Text(), nullable=True),
sa.Column("transcript_span_id", sa.String(length=26), nullable=True),
sa.Column("transcript_context_start", sa.String(length=26), nullable=False),
sa.Column("transcript_context_end", sa.String(length=26), nullable=False),
sa.Column("summary", sa.Text(), nullable=False),
sa.Column("tag", sa.String(length=50), nullable=True),
- sa.Column("time_tag_changed", sa.DateTime(), nullable=True),
+ sa.Column("time_tag_changed", sa.DateTime(timezone=True), nullable=True),
sa.Column("was_asked", sa.Boolean(), nullable=True),
sa.Column("asked_at_transcript_id", sa.String(length=26), nullable=True),
+ sa.Column("asked_at", sa.DateTime(timezone=True), nullable=True),
sa.ForeignKeyConstraint(
["project_id"],
["project.project_id"],
diff --git a/backend/src/interview_helper/ai_analysis/ai_analysis.py b/backend/src/interview_helper/ai_analysis/ai_analysis.py
index 5b4aecd..bf13156 100644
--- a/backend/src/interview_helper/ai_analysis/ai_analysis.py
+++ b/backend/src/interview_helper/ai_analysis/ai_analysis.py
@@ -18,6 +18,10 @@
ProjectId,
TranscriptId,
)
+from interview_helper.context_manager.question_categories import (
+ QUESTION_CATEGORIES,
+ normalize_question_category_code,
+)
from langchain_openai import AzureChatOpenAI
from langchain.tools import ToolRuntime, tool # pyright: ignore[reportUnknownVariableType]
from langchain.agents import create_agent # pyright: ignore[reportUnknownVariableType]
@@ -40,6 +44,7 @@
class Question(BaseModel):
question: str
grounding_span: str
+ category_code: str
class Analysis(BaseModel):
@@ -55,7 +60,11 @@ class ProjectContext:
class SimpleAnalyzer:
"""Simple LLM-based interview analyzer."""
- SYSTEM_PROMPT: str = dedent("""\
+ CATEGORY_PROMPT_BLOCK: str = "\n".join(
+ [f" - {code}: {label}" for code, label in QUESTION_CATEGORIES]
+ )
+
+ SYSTEM_PROMPT: str = dedent(f"""\
ROLE: Interview Follow-Up Generator for SAR Profiles
You will receive a chunk of transcript from an in-depth profile interview for a Search and Rescue operation.
@@ -79,8 +88,12 @@ class SimpleAnalyzer:
7) Output: ONE to THREE questions, each with:
- question (string)
- grounding_span (short verbatim quote from the transcript)
+ - category_code (single letter in B-W from list below)
As well as a brief summary of the entire situation so far, based on your knowledge.
+ Category codes:
+{CATEGORY_PROMPT_BLOCK}
+
ALWAYS use the provided TOOLS to check for duplicates or
gather more context from the transcript history before finalizing your questions.
@@ -246,6 +259,7 @@ def clean_grounding_span(span: str) -> str:
AIQuestion(
question=q.question,
grounding_span=clean_grounding_span(q.grounding_span),
+ category_code=normalize_question_category_code(q.category_code),
)
for q in analysis.questions
]
diff --git a/backend/src/interview_helper/config.py b/backend/src/interview_helper/config.py
index fff1bd5..2dc3755 100644
--- a/backend/src/interview_helper/config.py
+++ b/backend/src/interview_helper/config.py
@@ -50,7 +50,7 @@ def model_post_init(self, __context):
bytes_per_sample: int = 2
# AI Processing
- process_transcript_every_secs: float = 60.0 * 2 # 2 minutes
+ process_transcript_every_secs: float = 60.0 * 4 # 4 minutes
process_transcript_every_word_count: int = 100
azure_api_endpoint: str = Field(alias="OPENAI_API_ENDPOINT")
diff --git a/backend/src/interview_helper/context_manager/database.py b/backend/src/interview_helper/context_manager/database.py
index 3a049b9..9654727 100644
--- a/backend/src/interview_helper/context_manager/database.py
+++ b/backend/src/interview_helper/context_manager/database.py
@@ -1,14 +1,17 @@
from collections.abc import Sequence
-from datetime import datetime
+from datetime import datetime, timezone
from pydantic import BaseModel
from sqlalchemy.sql.sqltypes import DateTime
-from typing import Literal, TypedDict
+from typing import Literal, TypedDict, cast
from interview_helper.context_manager.types import (
AnalysisId,
ProjectId,
SessionId,
TranscriptId,
)
+from interview_helper.context_manager.question_categories import (
+ normalize_question_category_code,
+)
from interview_helper.context_manager.types import UserId
from alembic.config import Config
from alembic import command
@@ -483,6 +486,14 @@ class ProjectCreatorInfo:
name: str
+def _normalize_db_timestamp(ts: datetime | None) -> datetime | None:
+ if ts is None:
+ return None
+ if ts.tzinfo is None:
+ return ts.replace(tzinfo=timezone.utc)
+ return ts.astimezone(timezone.utc)
+
+
def get_project_creator_and_name(
db: PersistentDatabase, project_id: ProjectId
) -> ProjectCreatorInfo | None:
@@ -510,6 +521,7 @@ def get_project_creator_and_name(
class AnalysisRow(BaseModel):
analysis_id: str
text: str
+ category_code: str
span: str | None
transcript_span_id: TranscriptId | None
tag: Literal["starred", "dismissed", "starred_dismissed"] | None
@@ -519,6 +531,76 @@ class AnalysisRow(BaseModel):
ordinal: int
was_asked: bool | None = None
asked_at_transcript_id: str | None = None
+ asked_at: datetime | None = None
+ time_tag_changed: datetime | None = None
+
+
+type AnalysisTag = Literal["starred", "dismissed", "starred_dismissed"] | None
+
+
+@dataclass
+class AnalysisTagUpdateResult:
+ analysis_id: str
+ tag: AnalysisTag
+ was_asked: bool | None
+ asked_at_transcript_id: str | None
+ asked_at: datetime | None
+ time_tag_changed: datetime | None
+
+
+def _get_ai_analysis_state_for_update(
+ conn: sa.Connection, analysis_id: str
+) -> tuple[AnalysisTag, bool | None, str | None]:
+ row = conn.execute(
+ sa.select(
+ models.AIAnalysis.tag,
+ models.AIAnalysis.was_asked,
+ models.AIAnalysis.asked_at_transcript_id,
+ ).where(models.AIAnalysis.analysis_id == analysis_id)
+ ).one_or_none()
+
+ if row is None:
+ raise ValueError(f"analysis_id {analysis_id} was not found")
+
+ return row.tag, row.was_asked, row.asked_at_transcript_id # pyright: ignore[reportAny]
+
+
+def _persist_ai_analysis_state(
+ conn: sa.Connection,
+ *,
+ analysis_id: str,
+ tag: AnalysisTag,
+ was_asked: bool | None,
+ asked_at_transcript_id: str | None,
+ asked_at: datetime | None,
+) -> AnalysisTagUpdateResult:
+ row = conn.execute(
+ sa.update(models.AIAnalysis)
+ .where(models.AIAnalysis.analysis_id == analysis_id)
+ .values(
+ tag=tag,
+ time_tag_changed=sa.func.now(),
+ was_asked=was_asked,
+ asked_at_transcript_id=asked_at_transcript_id,
+ asked_at=asked_at,
+ )
+ .returning(
+ models.AIAnalysis.asked_at,
+ models.AIAnalysis.time_tag_changed,
+ )
+ ).one()
+
+ asked_at_db = cast(datetime | None, row.asked_at)
+ time_tag_changed_db = cast(datetime | None, row.time_tag_changed)
+
+ return AnalysisTagUpdateResult(
+ analysis_id=analysis_id,
+ tag=tag,
+ was_asked=was_asked,
+ asked_at_transcript_id=asked_at_transcript_id,
+ asked_at=_normalize_db_timestamp(asked_at_db),
+ time_tag_changed=_normalize_db_timestamp(time_tag_changed_db),
+ )
def get_all_ai_analyses(
@@ -533,6 +615,7 @@ def get_all_ai_analyses(
sa.select(
models.AIAnalysis.analysis_id,
models.AIAnalysis.text,
+ models.AIAnalysis.category_code,
models.AIAnalysis.span,
models.AIAnalysis.transcript_span_id,
models.AIAnalysis.transcript_context_start,
@@ -541,6 +624,8 @@ def get_all_ai_analyses(
models.AIAnalysis.tag,
models.AIAnalysis.was_asked,
models.AIAnalysis.asked_at_transcript_id,
+ models.AIAnalysis.asked_at,
+ models.AIAnalysis.time_tag_changed,
sa.func.row_number()
.over(order_by=models.AIAnalysis.analysis_id.asc())
.label("ordinal"),
@@ -551,6 +636,7 @@ def get_all_ai_analyses(
sa.select(
subq.c.analysis_id,
subq.c.text,
+ subq.c.category_code,
subq.c.span,
subq.c.transcript_span_id,
subq.c.transcript_context_start,
@@ -559,6 +645,8 @@ def get_all_ai_analyses(
subq.c.tag,
subq.c.was_asked,
subq.c.asked_at_transcript_id,
+ subq.c.asked_at,
+ subq.c.time_tag_changed,
subq.c.ordinal,
).order_by(subq.c.analysis_id.asc())
).all()
@@ -567,6 +655,7 @@ def get_all_ai_analyses(
AnalysisRow(
analysis_id=row.analysis_id, # pyright: ignore[reportAny]
text=row.text, # pyright: ignore[reportAny]
+ category_code=row.category_code, # pyright: ignore[reportAny]
span=row.span, # pyright: ignore[reportAny]
transcript_span_id=TranscriptId.from_str(row.transcript_span_id) # pyright: ignore[reportAny]
if row.transcript_span_id # pyright: ignore[reportAny]
@@ -580,6 +669,10 @@ def get_all_ai_analyses(
ordinal=row.ordinal, # pyright: ignore[reportAny]
was_asked=row.was_asked, # pyright: ignore[reportAny]
asked_at_transcript_id=row.asked_at_transcript_id, # pyright: ignore[reportAny]
+ asked_at=_normalize_db_timestamp(cast(datetime | None, row.asked_at)),
+ time_tag_changed=_normalize_db_timestamp(
+ cast(datetime | None, row.time_tag_changed)
+ ),
)
for row in rows
]
@@ -603,6 +696,7 @@ def get_analyses_by_ids(
sa.select(
models.AIAnalysis.analysis_id,
models.AIAnalysis.text,
+ models.AIAnalysis.category_code,
models.AIAnalysis.span,
models.AIAnalysis.transcript_span_id,
models.AIAnalysis.transcript_context_start,
@@ -611,6 +705,8 @@ def get_analyses_by_ids(
models.AIAnalysis.tag,
models.AIAnalysis.was_asked,
models.AIAnalysis.asked_at_transcript_id,
+ models.AIAnalysis.asked_at,
+ models.AIAnalysis.time_tag_changed,
sa.func.row_number()
.over(order_by=models.AIAnalysis.analysis_id.asc())
.label("ordinal"),
@@ -621,6 +717,7 @@ def get_analyses_by_ids(
sa.select(
subq.c.analysis_id,
subq.c.text,
+ subq.c.category_code,
subq.c.span,
subq.c.transcript_span_id,
subq.c.transcript_context_start,
@@ -629,6 +726,8 @@ def get_analyses_by_ids(
subq.c.tag,
subq.c.was_asked,
subq.c.asked_at_transcript_id,
+ subq.c.asked_at,
+ subq.c.time_tag_changed,
subq.c.ordinal,
)
.where(subq.c.analysis_id.in_(analysis_id_strs))
@@ -640,6 +739,7 @@ def get_analyses_by_ids(
row.analysis_id: AnalysisRow( # pyright: ignore[reportAny]
analysis_id=row.analysis_id, # pyright: ignore[reportAny]
text=row.text, # pyright: ignore[reportAny]
+ category_code=row.category_code, # pyright: ignore[reportAny]
span=row.span, # pyright: ignore[reportAny]
tag=row.tag, # pyright: ignore[reportAny]
transcript_context_start=TranscriptId.from_str(
@@ -653,6 +753,10 @@ def get_analyses_by_ids(
ordinal=row.ordinal, # pyright: ignore[reportAny]
was_asked=row.was_asked, # pyright: ignore[reportAny]
asked_at_transcript_id=row.asked_at_transcript_id, # pyright: ignore[reportAny]
+ asked_at=_normalize_db_timestamp(cast(datetime | None, row.asked_at)),
+ time_tag_changed=_normalize_db_timestamp(
+ cast(datetime | None, row.time_tag_changed)
+ ),
)
for row in rows
}
@@ -663,43 +767,109 @@ def get_analyses_by_ids(
]
-def update_ai_analysis_tag(
+def mark_ai_analysis_asked(
db: PersistentDatabase,
analysis_id: str,
- tag: str | None,
- _user_id: UserId,
- was_asked: bool | None = None,
- asked_at_transcript_id: str | None = None,
-):
- """
- Update the tag for an AI analysis.
+ asked_at_transcript_id: str,
+) -> AnalysisTagUpdateResult:
+ with db.begin() as conn:
+ current_tag, _, _ = _get_ai_analysis_state_for_update(conn, analysis_id)
+ if current_tag not in (None, "starred"):
+ raise ValueError("mark_asked is only valid for active or starred analyses")
- Args:
- analysis_id: The ID of the analysis to update
- tag: The new tag value ("starred", "dismissed", "starred_dismissed", or None to clear)
- _user_id: User ID (kept for API compatibility, not used as tags are project-wide)
- was_asked: Whether the question was asked (only relevant when dismissing)
- asked_at_transcript_id: The transcript ID where the question was asked
- """
+ asked_at = datetime.now(timezone.utc)
+ new_tag: AnalysisTag = (
+ "starred_dismissed" if current_tag == "starred" else "dismissed"
+ )
+ return _persist_ai_analysis_state(
+ conn,
+ analysis_id=analysis_id,
+ tag=new_tag,
+ was_asked=True,
+ asked_at_transcript_id=asked_at_transcript_id,
+ asked_at=asked_at,
+ )
+
+
+def mark_ai_analysis_dismissed_not_asked(
+ db: PersistentDatabase,
+ analysis_id: str,
+) -> AnalysisTagUpdateResult:
with db.begin() as conn:
- update_values: dict[str, object] = {
- "tag": tag,
- "time_tag_changed": sa.func.now(),
- }
+ current_tag, _, _ = _get_ai_analysis_state_for_update(conn, analysis_id)
+ if current_tag not in (None, "starred"):
+ raise ValueError(
+ "mark_dismissed_not_asked is only valid for active or starred analyses"
+ )
- # Only update answered fields if they are provided
- if was_asked is not None:
- update_values["was_asked"] = was_asked
+ new_tag: AnalysisTag = (
+ "starred_dismissed" if current_tag == "starred" else "dismissed"
+ )
+ return _persist_ai_analysis_state(
+ conn,
+ analysis_id=analysis_id,
+ tag=new_tag,
+ was_asked=False,
+ asked_at_transcript_id=None,
+ asked_at=None,
+ )
- if asked_at_transcript_id is None:
- update_values["asked_at_transcript_id"] = None
- else:
- update_values["asked_at_transcript_id"] = asked_at_transcript_id
- _ = conn.execute(
- sa.update(models.AIAnalysis)
- .where(models.AIAnalysis.analysis_id == analysis_id)
- .values(**update_values)
+def undo_ai_analysis_dismissal(
+ db: PersistentDatabase,
+ analysis_id: str,
+) -> AnalysisTagUpdateResult:
+ with db.begin() as conn:
+ current_tag, _, _ = _get_ai_analysis_state_for_update(conn, analysis_id)
+ if current_tag not in ("dismissed", "starred_dismissed"):
+ raise ValueError("undo is only valid for dismissed analyses")
+
+ new_tag: AnalysisTag = "starred" if current_tag == "starred_dismissed" else None
+ return _persist_ai_analysis_state(
+ conn,
+ analysis_id=analysis_id,
+ tag=new_tag,
+ was_asked=None,
+ asked_at_transcript_id=None,
+ asked_at=None,
+ )
+
+
+def star_ai_analysis(
+ db: PersistentDatabase,
+ analysis_id: str,
+) -> AnalysisTagUpdateResult:
+ with db.begin() as conn:
+ current_tag, _, _ = _get_ai_analysis_state_for_update(conn, analysis_id)
+ if current_tag is not None:
+ raise ValueError("star is only valid for active analyses")
+
+ return _persist_ai_analysis_state(
+ conn,
+ analysis_id=analysis_id,
+ tag="starred",
+ was_asked=None,
+ asked_at_transcript_id=None,
+ asked_at=None,
+ )
+
+
+def unstar_ai_analysis(
+ db: PersistentDatabase,
+ analysis_id: str,
+) -> AnalysisTagUpdateResult:
+ with db.begin() as conn:
+ current_tag, _, _ = _get_ai_analysis_state_for_update(conn, analysis_id)
+ if current_tag != "starred":
+ raise ValueError("unstar is only valid for starred analyses")
+
+ return _persist_ai_analysis_state(
+ conn,
+ analysis_id=analysis_id,
+ tag=None,
+ was_asked=None,
+ asked_at_transcript_id=None,
+ asked_at=None,
)
@@ -707,6 +877,7 @@ def add_ai_analysis(
db: PersistentDatabase,
project_id: ProjectId,
text: str,
+ category_code: str,
span: str | None,
transcript_span_id: TranscriptId | None,
transcript_context_start: TranscriptId,
@@ -717,6 +888,7 @@ def add_ai_analysis(
Adds a transcription result, returns the analysis ID
"""
analysis_id = str(ULID()).lower()
+ normalized_category_code = normalize_question_category_code(category_code)
with db.begin() as conn:
assert conn.execute(
sa.insert(models.AIAnalysis),
@@ -724,6 +896,7 @@ def add_ai_analysis(
"analysis_id": analysis_id,
"project_id": str(project_id),
"text": text,
+ "category_code": normalized_category_code,
"span": span,
"transcript_span_id": str(transcript_span_id)
if transcript_span_id
diff --git a/backend/src/interview_helper/context_manager/messages.py b/backend/src/interview_helper/context_manager/messages.py
index 82045c2..6d53a38 100644
--- a/backend/src/interview_helper/context_manager/messages.py
+++ b/backend/src/interview_helper/context_manager/messages.py
@@ -72,6 +72,41 @@ class UpdateAIAnalysisTag(BaseModel):
tag: Literal["starred", "dismissed", "starred_dismissed"] | None
was_asked: bool | None = None
asked_at_transcript_id: str | None = None
+ asked_at: datetime | None = None
+ time_tag_changed: datetime | None = None
+
+
+class MarkAIAnalysisAsked(BaseModel):
+ type: Literal["mark_ai_analysis_asked"] = "mark_ai_analysis_asked"
+ timestamp: datetime = Field(default_factory=datetime.now)
+ analysis_id: str
+ asked_at_transcript_id: str
+
+
+class UndoAIAnalysisDismissal(BaseModel):
+ type: Literal["undo_ai_analysis_dismissal"] = "undo_ai_analysis_dismissal"
+ timestamp: datetime = Field(default_factory=datetime.now)
+ analysis_id: str
+
+
+class MarkAIAnalysisDismissedNotAsked(BaseModel):
+ type: Literal["mark_ai_analysis_dismissed_not_asked"] = (
+ "mark_ai_analysis_dismissed_not_asked"
+ )
+ timestamp: datetime = Field(default_factory=datetime.now)
+ analysis_id: str
+
+
+class StarAIAnalysis(BaseModel):
+ type: Literal["star_ai_analysis"] = "star_ai_analysis"
+ timestamp: datetime = Field(default_factory=datetime.now)
+ analysis_id: str
+
+
+class UnstarAIAnalysis(BaseModel):
+ type: Literal["unstar_ai_analysis"] = "unstar_ai_analysis"
+ timestamp: datetime = Field(default_factory=datetime.now)
+ analysis_id: str
class RecordingStateMessage(BaseModel):
@@ -89,6 +124,11 @@ class RecordingStateMessage(BaseModel):
| AIResultMessage
| CatchupMessage
| ProjectMetadataMessage
+ | MarkAIAnalysisAsked
+ | UndoAIAnalysisDismissal
+ | MarkAIAnalysisDismissedNotAsked
+ | StarAIAnalysis
+ | UnstarAIAnalysis
| UpdateAIAnalysisTag
| RecordingStateMessage
)
diff --git a/backend/src/interview_helper/context_manager/models.py b/backend/src/interview_helper/context_manager/models.py
index 704ef2c..78157a7 100644
--- a/backend/src/interview_helper/context_manager/models.py
+++ b/backend/src/interview_helper/context_manager/models.py
@@ -19,7 +19,7 @@ class User(Base):
full_name: Mapped[str] = mapped_column(sa.String(100), nullable=False, unique=True)
oidc_id: Mapped[str] = mapped_column(sa.String(255), nullable=False, unique=True)
updated_at: Mapped[DateTime] = mapped_column(
- sa.DateTime,
+ sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
onupdate=sa.func.now(),
@@ -42,10 +42,10 @@ class Transcription(Base):
text_output: Mapped[str] = mapped_column(sa.Text, nullable=True)
speaker: Mapped[str] = mapped_column(sa.String(100), nullable=True)
created_at: Mapped[DateTime] = mapped_column(
- sa.DateTime, nullable=False, server_default=sa.func.now()
+ sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now()
)
updated_at: Mapped[DateTime] = mapped_column(
- sa.DateTime,
+ sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
onupdate=sa.func.now(),
@@ -65,9 +65,11 @@ class Session(Base):
sa.String(26), ForeignKey("users.user_id"), nullable=False
)
started_at: Mapped[DateTime] = mapped_column(
- sa.DateTime, nullable=False, server_default=sa.func.now()
+ sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now()
+ )
+ ended_at: Mapped[DateTime | None] = mapped_column(
+ sa.DateTime(timezone=True), nullable=True
)
- ended_at: Mapped[DateTime | None] = mapped_column(sa.DateTime, nullable=True)
class AIAnalysis(Base):
@@ -80,6 +82,7 @@ class AIAnalysis(Base):
)
text: Mapped[str] = mapped_column(sa.Text, nullable=False)
+ category_code: Mapped[str] = mapped_column(sa.String(1), nullable=False)
span: Mapped[str] = mapped_column(sa.Text, nullable=True)
@@ -99,7 +102,7 @@ class AIAnalysis(Base):
tag: Mapped[str | None] = mapped_column(sa.String(50), nullable=True)
time_tag_changed: Mapped[DateTime | None] = mapped_column(
- sa.DateTime, nullable=True
+ sa.DateTime(timezone=True), nullable=True
)
# Fields for tracking if the question was asked
@@ -107,6 +110,9 @@ class AIAnalysis(Base):
asked_at_transcript_id: Mapped[str | None] = mapped_column(
sa.String(26), ForeignKey("transcriptions.transcription_id"), nullable=True
)
+ asked_at: Mapped[DateTime | None] = mapped_column(
+ sa.DateTime(timezone=True), nullable=True
+ )
class Project(Base):
@@ -121,11 +127,11 @@ class Project(Base):
)
created_at: Mapped[DateTime] = mapped_column(
- sa.DateTime, nullable=False, server_default=sa.func.now()
+ sa.DateTime(timezone=True), nullable=False, server_default=sa.func.now()
)
updated_at: Mapped[DateTime] = mapped_column(
- sa.DateTime,
+ sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
onupdate=sa.func.now(),
diff --git a/backend/src/interview_helper/context_manager/question_categories.py b/backend/src/interview_helper/context_manager/question_categories.py
new file mode 100644
index 0000000..d3068b9
--- /dev/null
+++ b/backend/src/interview_helper/context_manager/question_categories.py
@@ -0,0 +1,51 @@
+from typing import Final
+
+# Missing "A" is intentional; category list starts at "B".
+QUESTION_CATEGORIES: Final[tuple[tuple[str, str], ...]] = (
+ ("B", "Source Information"),
+ ("C", "General Missing Person Information"),
+ ("D", "Physical Description"),
+ ("E", "Clothing"),
+ ("F", "Health / General & Emotional Condition"),
+ ("G", "Last Known location / Point last seen"),
+ (
+ "H",
+ "Summary of Events leading up to and following MP's Disappearance",
+ ),
+ ("I", "Trip plans of Subject"),
+ ("J", "Outdoor Experience"),
+ ("K", "Habits / Personality / Behavior Preferences"),
+ ("L", "Outdoor Equipment"),
+ ("M", "Contacts Person Might Make Upon Reaching Civilization"),
+ ("N", "Electronic Devices"),
+ ("O", "Family, Friends, and Press Relations"),
+ ("P", "Other Information"),
+ ("Q", "Groups Overdue / Dynamics"),
+ ("R", "Child / Adolescent"),
+ ("S", "Autistic Spectrum"),
+ ("T", "Cognitively Impaired / Intellectual Disability"),
+ ("U", "Depressed / Despondent / Possible Suicidal"),
+ ("V", "Exhibiting Psychotic Behavior"),
+ ("W", "Exhibiting Signs of Dementia or Alzheimer's"),
+)
+
+VALID_QUESTION_CATEGORY_CODES: Final[frozenset[str]] = frozenset(
+ code for code, _ in QUESTION_CATEGORIES
+)
+DEFAULT_QUESTION_CATEGORY_CODE: Final[str] = "P"
+
+QUESTION_CATEGORY_LABELS: Final[dict[str, str]] = dict(QUESTION_CATEGORIES)
+QUESTION_CATEGORY_ORDER: Final[tuple[str, ...]] = tuple(
+ code for code, _ in QUESTION_CATEGORIES
+)
+
+
+def normalize_question_category_code(category_code: str | None) -> str:
+ if category_code is None:
+ return DEFAULT_QUESTION_CATEGORY_CODE
+
+ normalized = category_code.strip().upper()
+ if normalized in VALID_QUESTION_CATEGORY_CODES:
+ return normalized
+
+ return DEFAULT_QUESTION_CATEGORY_CODE
diff --git a/backend/src/interview_helper/context_manager/session_context_manager.py b/backend/src/interview_helper/context_manager/session_context_manager.py
index 158d170..23f953e 100644
--- a/backend/src/interview_helper/context_manager/session_context_manager.py
+++ b/backend/src/interview_helper/context_manager/session_context_manager.py
@@ -557,6 +557,7 @@ async def _worker(
self.db,
project_id=job.project_id,
text=result.question,
+ category_code=result.category_code,
span=result.grounding_span,
transcript_span_id=transcript_span_id,
transcript_context_start=results.transcript_context_start,
diff --git a/backend/src/interview_helper/context_manager/tests/test_database.py b/backend/src/interview_helper/context_manager/tests/test_database.py
index 9aa0a33..73dd71a 100644
--- a/backend/src/interview_helper/context_manager/tests/test_database.py
+++ b/backend/src/interview_helper/context_manager/tests/test_database.py
@@ -1,6 +1,21 @@
-from interview_helper.context_manager.database import get_user_by_id
-from interview_helper.context_manager.database import get_or_add_user_by_oidc_id
-from interview_helper.context_manager.database import PersistentDatabase
+from ulid import ULID
+from interview_helper.context_manager.database import (
+ PersistentDatabase,
+ add_ai_analysis,
+ add_transcription,
+ create_new_project,
+ create_session,
+ get_all_ai_analyses,
+ get_or_add_user_by_oidc_id,
+ get_user_by_id,
+ mark_ai_analysis_asked,
+ mark_ai_analysis_dismissed_not_asked,
+ star_ai_analysis,
+ unstar_ai_analysis,
+ undo_ai_analysis_dismissal,
+)
+from interview_helper.context_manager.types import ProjectId, SessionId, TranscriptId
+from datetime import datetime, timezone
import sqlalchemy as sa
import pytest
@@ -46,3 +61,162 @@ def test_user_addition():
assert added_user.user_id == added_user2.user_id == added_user3.user_id
assert added_user.oidc_id == added_user2.oidc_id == added_user3.oidc_id
+
+
+def test_add_ai_analysis_normalizes_invalid_category_code_to_default():
+ db = PersistentDatabase.new_in_memory()
+ user = get_or_add_user_by_oidc_id(db, "oidc-1", "User One")
+
+ project = create_new_project(db, user.user_id, "P1")
+ project_id = ProjectId.from_str(project["id"])
+ session_id = SessionId(ULID())
+ create_session(db, session_id, project_id, user.user_id)
+
+ transcript_id = TranscriptId.from_str(
+ add_transcription(
+ db=db,
+ user_id=user.user_id,
+ session_id=session_id,
+ project_id=project_id,
+ text="Sample transcript",
+ speaker="Speaker-1",
+ )
+ )
+
+ _ = add_ai_analysis(
+ db=db,
+ project_id=project_id,
+ text="What time did they leave?",
+ category_code="INVALID",
+ span="they left at sunrise",
+ transcript_span_id=transcript_id,
+ transcript_context_start=transcript_id,
+ transcript_context_end=transcript_id,
+ summary="Summary",
+ )
+
+ rows = get_all_ai_analyses(db, project_id)
+ assert len(rows) == 1
+ assert rows[0].category_code == "P"
+
+
+def test_mark_ai_analysis_actions_update_tag_and_asked_fields():
+ db = PersistentDatabase.new_in_memory()
+ user = get_or_add_user_by_oidc_id(db, "oidc-asked-at", "Asked At User")
+
+ project = create_new_project(db, user.user_id, "P2")
+ project_id = ProjectId.from_str(project["id"])
+ session_id = SessionId(ULID())
+ create_session(db, session_id, project_id, user.user_id)
+
+ transcript_id = add_transcription(
+ db=db,
+ user_id=user.user_id,
+ session_id=session_id,
+ project_id=project_id,
+ text="Transcript chunk",
+ speaker="Speaker-1",
+ )
+
+ analysis_id = add_ai_analysis(
+ db=db,
+ project_id=project_id,
+ text="Question?",
+ category_code="P",
+ span=None,
+ transcript_span_id=TranscriptId.from_str(transcript_id),
+ transcript_context_start=TranscriptId.from_str(transcript_id),
+ transcript_context_end=TranscriptId.from_str(transcript_id),
+ summary="Summary",
+ )
+
+ current_datetime = datetime.now(timezone.utc)
+
+ _ = mark_ai_analysis_asked(
+ db=db, analysis_id=str(analysis_id), asked_at_transcript_id=transcript_id
+ )
+
+ rows = get_all_ai_analyses(db, project_id)
+ assert len(rows) == 1
+ asked_row = rows[0]
+ assert asked_row.was_asked is True
+ assert asked_row.asked_at_transcript_id == transcript_id
+ assert asked_row.asked_at is not None, "asked_at should be set when marked as asked"
+ assert asked_row.asked_at >= current_datetime, (
+ "asked_at should be at least the time before the 'mark as asked'"
+ )
+
+ _ = undo_ai_analysis_dismissal(db=db, analysis_id=str(analysis_id))
+ _ = star_ai_analysis(db=db, analysis_id=str(analysis_id))
+ _ = mark_ai_analysis_dismissed_not_asked(db=db, analysis_id=str(analysis_id))
+ rows_after_clear = get_all_ai_analyses(db, project_id)
+ assert len(rows_after_clear) == 1
+ cleared_row = rows_after_clear[0]
+ assert cleared_row.tag == "starred_dismissed"
+ assert cleared_row.was_asked is False
+ assert cleared_row.asked_at_transcript_id is None
+ assert cleared_row.asked_at is None
+
+ _ = undo_ai_analysis_dismissal(db=db, analysis_id=str(analysis_id))
+ rows_after_undo = get_all_ai_analyses(db, project_id)
+ assert len(rows_after_undo) == 1
+ undone_row = rows_after_undo[0]
+ assert undone_row.tag == "starred"
+ assert undone_row.was_asked is None
+ assert undone_row.asked_at_transcript_id is None
+ assert undone_row.asked_at is None
+
+ _ = unstar_ai_analysis(db=db, analysis_id=str(analysis_id))
+ rows_after_unstar = get_all_ai_analyses(db, project_id)
+ assert len(rows_after_unstar) == 1
+ unstarred_row = rows_after_unstar[0]
+ assert unstarred_row.tag is None
+
+
+def test_mark_ai_analysis_actions_validate_invalid_transitions():
+ db = PersistentDatabase.new_in_memory()
+ user = get_or_add_user_by_oidc_id(db, "oidc-validate-tags", "Validate User")
+
+ project = create_new_project(db, user.user_id, "P3")
+ project_id = ProjectId.from_str(project["id"])
+ session_id = SessionId(ULID())
+ create_session(db, session_id, project_id, user.user_id)
+
+ transcript_id = add_transcription(
+ db=db,
+ user_id=user.user_id,
+ session_id=session_id,
+ project_id=project_id,
+ text="Transcript chunk",
+ speaker="Speaker-1",
+ )
+
+ analysis_id = add_ai_analysis(
+ db=db,
+ project_id=project_id,
+ text="Question?",
+ category_code="P",
+ span=None,
+ transcript_span_id=TranscriptId.from_str(transcript_id),
+ transcript_context_start=TranscriptId.from_str(transcript_id),
+ transcript_context_end=TranscriptId.from_str(transcript_id),
+ summary="Summary",
+ )
+
+ with pytest.raises(ValueError):
+ _ = unstar_ai_analysis(db=db, analysis_id=str(analysis_id))
+
+ with pytest.raises(ValueError):
+ _ = undo_ai_analysis_dismissal(db=db, analysis_id=str(analysis_id))
+
+ _ = mark_ai_analysis_dismissed_not_asked(db=db, analysis_id=str(analysis_id))
+
+ with pytest.raises(ValueError):
+ _ = star_ai_analysis(db=db, analysis_id=str(analysis_id))
+
+ with pytest.raises(ValueError):
+ _ = mark_ai_analysis_asked(
+ db=db,
+ analysis_id=str(analysis_id),
+ asked_at_transcript_id=transcript_id,
+ )
diff --git a/backend/src/interview_helper/context_manager/types.py b/backend/src/interview_helper/context_manager/types.py
index e1e82f5..926705f 100644
--- a/backend/src/interview_helper/context_manager/types.py
+++ b/backend/src/interview_helper/context_manager/types.py
@@ -121,6 +121,7 @@ class AIJob:
class AIQuestion:
question: str
grounding_span: str
+ category_code: str
@dataclass(frozen=True)
diff --git a/backend/src/interview_helper/downloads/get_report.py b/backend/src/interview_helper/downloads/get_report.py
new file mode 100644
index 0000000..d70e14d
--- /dev/null
+++ b/backend/src/interview_helper/downloads/get_report.py
@@ -0,0 +1,756 @@
+from __future__ import annotations
+
+from collections import defaultdict, deque
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta, timezone
+from io import BytesIO
+from collections.abc import Sequence
+from typing import cast
+from xml.sax.saxutils import escape
+
+from reportlab.lib import colors
+from reportlab.lib.pagesizes import letter
+from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
+from reportlab.lib.units import inch
+from reportlab.platypus import PageBreak, Paragraph, SimpleDocTemplate, Spacer
+from reportlab.platypus.flowables import Flowable
+
+from interview_helper.context_manager.database import (
+ AnalysisRow,
+ PersistentDatabase,
+ TranscriptionWithProjectDetails,
+ get_all_ai_analyses,
+ get_all_transcriptions_for_project,
+)
+from interview_helper.context_manager.question_categories import (
+ QUESTION_CATEGORY_LABELS,
+ QUESTION_CATEGORY_ORDER,
+ normalize_question_category_code,
+)
+from interview_helper.context_manager.types import ProjectId
+from interview_helper.downloads.util import extract_timestamp_from_ulid
+
+# Time window for transcript excerpts before answered questions
+TRANSCRIPT_EXCERPT_WINDOW = timedelta(minutes=1)
+# Time gap threshold for adding visual separators between transcript entries
+TRANSCRIPT_ENTRY_GAP_SEPARATOR_THRESHOLD = timedelta(minutes=5)
+# If an answered-question event happens after this delay, render a standalone timestamp.
+ANSWERED_EVENT_TIMESTAMP_CUTOFF = timedelta(minutes=2)
+
+
+@dataclass
+class ReportQuestionEntry:
+ analysis_id: str
+ ordinal: int
+ text: str
+ category_code: str
+ span: str | None
+ question_anchor: str
+ context_anchor: str | None
+ answered_at_anchor: str | None
+ answered_at_text: str | None
+ is_starred: bool
+ transcript_excerpt: str | None = None
+
+
+@dataclass
+class ReportTranscriptSection:
+ anchor: str
+ speaker: str
+ text: str
+ started_at: datetime
+ ended_at: datetime
+ chunk_ids: list[str] = field(default_factory=list)
+ answered_question_refs: list[tuple[int, str, datetime]] = field(
+ default_factory=list
+ )
+
+
+@dataclass
+class ReportData:
+ project_name: str
+ start_time: datetime
+ total_duration: timedelta
+ answered_by_category: dict[str, list[ReportQuestionEntry]]
+ unanswered_by_category: dict[str, list[ReportQuestionEntry]]
+ transcript_sections: list[ReportTranscriptSection]
+
+
+@dataclass
+class _TranscriptAnchorIndex:
+ sections: list[ReportTranscriptSection]
+ chunk_to_section_anchor: dict[str, str]
+ section_by_anchor: dict[str, ReportTranscriptSection]
+
+
+def _format_utc(ts: datetime) -> str:
+ return ts.astimezone(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC")
+
+
+def _format_duration_hms(duration: timedelta) -> str:
+ total_seconds = int(max(duration.total_seconds(), 0))
+ hours, rem = divmod(total_seconds, 3600)
+ minutes, seconds = divmod(rem, 60)
+ return f"{hours}h {minutes}m {seconds}s"
+
+
+def _format_excerpt_window(duration: timedelta) -> str:
+ total_seconds = int(max(duration.total_seconds(), 0))
+ minutes, seconds = divmod(total_seconds, 60)
+ if minutes > 0 and seconds == 0:
+ return f"{minutes} minute" if minutes == 1 else f"{minutes} minutes"
+ if minutes > 0:
+ return f"{minutes}m {seconds}s"
+ return f"{seconds} second" if seconds == 1 else f"{seconds} seconds"
+
+
+def _format_gap_duration(duration: timedelta) -> str:
+ total_seconds = int(max(duration.total_seconds(), 0))
+ hours, rem = divmod(total_seconds, 3600)
+ minutes, seconds = divmod(rem, 60)
+
+ if hours > 0:
+ return f"{hours}h {minutes}m {seconds}s"
+ if minutes > 0:
+ return f"{minutes}m {seconds}s"
+ return f"{seconds}s"
+
+
+def _ordered_category_items(
+ grouped: dict[str, list[ReportQuestionEntry]],
+) -> list[tuple[str, list[ReportQuestionEntry]]]:
+ items: list[tuple[str, list[ReportQuestionEntry]]] = []
+ for code in QUESTION_CATEGORY_ORDER:
+ rows = grouped.get(code, [])
+ if rows:
+ items.append((code, rows))
+ return items
+
+
+def _build_transcript_anchor_index(
+ transcript_rows: Sequence[TranscriptionWithProjectDetails],
+) -> _TranscriptAnchorIndex:
+ sections: list[ReportTranscriptSection] = []
+ chunk_to_section_anchor: dict[str, str] = {}
+
+ current_speaker: str | None = None
+ current_texts: list[str] = []
+ current_chunk_ids: list[str] = []
+ current_started_at: datetime | None = None
+
+ def flush_current() -> None:
+ nonlocal current_speaker, current_texts, current_chunk_ids, current_started_at
+
+ if (
+ current_speaker is None
+ or current_started_at is None
+ or len(current_chunk_ids) == 0
+ ):
+ return
+
+ anchor = f"transcript-{len(sections) + 1}"
+ ended_at = extract_timestamp_from_ulid(current_chunk_ids[-1])
+ section = ReportTranscriptSection(
+ anchor=anchor,
+ speaker=current_speaker,
+ text=" ".join(current_texts).strip(),
+ started_at=current_started_at,
+ ended_at=ended_at,
+ chunk_ids=[*current_chunk_ids],
+ )
+ sections.append(section)
+
+ for chunk_id in current_chunk_ids:
+ chunk_to_section_anchor[chunk_id] = anchor
+
+ current_speaker = None
+ current_texts = []
+ current_chunk_ids = []
+ current_started_at = None
+
+ for row in transcript_rows:
+ transcription_id = str(row["transcription_id"])
+ speaker = str(row["speaker"] or "Unknown Speaker")
+ text = str(row["text_output"] or "").strip()
+ timestamp = extract_timestamp_from_ulid(transcription_id)
+
+ if current_speaker is None:
+ current_speaker = speaker
+ current_started_at = timestamp
+ current_chunk_ids = [transcription_id]
+ current_texts = [text]
+ continue
+
+ if speaker == current_speaker:
+ current_chunk_ids.append(transcription_id)
+ current_texts.append(text)
+ continue
+
+ flush_current()
+ current_speaker = speaker
+ current_started_at = timestamp
+ current_chunk_ids = [transcription_id]
+ current_texts = [text]
+
+ flush_current()
+
+ section_by_anchor = {section.anchor: section for section in sections}
+ return _TranscriptAnchorIndex(
+ sections=sections,
+ chunk_to_section_anchor=chunk_to_section_anchor,
+ section_by_anchor=section_by_anchor,
+ )
+
+
+def _compute_total_duration(
+ transcript_rows: Sequence[TranscriptionWithProjectDetails],
+) -> timedelta:
+ if not transcript_rows:
+ return timedelta(0)
+
+ per_session_bounds: dict[str, tuple[datetime, datetime]] = {}
+
+ for row in transcript_rows:
+ session_id = str(row["session_id"])
+ timestamp = extract_timestamp_from_ulid(str(row["transcription_id"]))
+
+ previous = per_session_bounds.get(session_id)
+ if previous is None:
+ per_session_bounds[session_id] = (timestamp, timestamp)
+ continue
+
+ min_ts, max_ts = previous
+ if timestamp < min_ts:
+ min_ts = timestamp
+ if timestamp > max_ts:
+ max_ts = timestamp
+ per_session_bounds[session_id] = (min_ts, max_ts)
+
+ total = timedelta(0)
+ for min_ts, max_ts in per_session_bounds.values():
+ total += max_ts - min_ts
+
+ return total
+
+
+def _analysis_context_anchor(
+ analysis: AnalysisRow, chunk_to_section_anchor: dict[str, str]
+) -> str | None:
+ if analysis.transcript_span_id is not None:
+ span_anchor = chunk_to_section_anchor.get(str(analysis.transcript_span_id))
+ if span_anchor is not None:
+ return span_anchor
+
+ start_anchor = chunk_to_section_anchor.get(str(analysis.transcript_context_start))
+ if start_anchor is not None:
+ return start_anchor
+
+ return chunk_to_section_anchor.get(str(analysis.transcript_context_end))
+
+
+def _format_excerpt_rows(excerpt_rows: Sequence[tuple[str, str]]) -> str | None:
+ if len(excerpt_rows) == 0:
+ return None
+
+ grouped_lines: list[str] = []
+ current_speaker: str | None = None
+ current_texts: list[str] = []
+
+ def flush_current() -> None:
+ nonlocal current_speaker, current_texts
+ if current_speaker is None or len(current_texts) == 0:
+ return
+ grouped_lines.append(f"{current_speaker}: {' '.join(current_texts)}")
+ current_speaker = None
+ current_texts = []
+
+ for speaker, text in excerpt_rows:
+ if current_speaker is None:
+ current_speaker = speaker
+ current_texts = [text]
+ continue
+
+ if speaker == current_speaker:
+ current_texts.append(text)
+ continue
+
+ flush_current()
+ current_speaker = speaker
+ current_texts = [text]
+
+ flush_current()
+ return "\n".join(grouped_lines) if grouped_lines else None
+
+
+def _precompute_transcript_excerpts_by_asked_at(
+ transcript_rows: Sequence[TranscriptionWithProjectDetails],
+ analyses: Sequence[AnalysisRow],
+) -> dict[str, str | None]:
+ asked_events = sorted(
+ [
+ (analysis.asked_at, analysis.analysis_id)
+ for analysis in analyses
+ if analysis.asked_at is not None
+ ],
+ key=lambda item: item[0],
+ )
+ if len(asked_events) == 0:
+ return {}
+
+ excerpts_by_analysis_id: dict[str, str | None] = {}
+ window_rows: deque[tuple[datetime, str, str]] = deque()
+ transcript_events: list[tuple[datetime, str, str]] = []
+
+ for row in transcript_rows:
+ row_timestamp = extract_timestamp_from_ulid(str(row["transcription_id"]))
+ speaker = str(row["speaker"] or "Unknown Speaker")
+ text = str(row["text_output"] or "").strip()
+ if text:
+ transcript_events.append((row_timestamp, speaker, text))
+
+ event_index = 0
+ event_count = len(transcript_events)
+
+ for asked_at_timestamp, analysis_id in asked_events:
+ while (
+ event_index < event_count
+ and transcript_events[event_index][0] < asked_at_timestamp
+ ):
+ window_rows.append(transcript_events[event_index])
+ event_index += 1
+
+ excerpt_start_time = asked_at_timestamp - TRANSCRIPT_EXCERPT_WINDOW
+ while window_rows and window_rows[0][0] < excerpt_start_time:
+ _ = window_rows.popleft()
+
+ excerpts_by_analysis_id[analysis_id] = _format_excerpt_rows(
+ [(speaker, text) for _, speaker, text in window_rows]
+ )
+
+ return excerpts_by_analysis_id
+
+
+def build_report_data(project_id: str, db: PersistentDatabase) -> ReportData | None:
+ typed_project_id = ProjectId.from_str(project_id)
+
+ transcript_rows = get_all_transcriptions_for_project(db, typed_project_id)
+ if not transcript_rows:
+ return None
+
+ anchor_index = _build_transcript_anchor_index(transcript_rows)
+ analyses = get_all_ai_analyses(db, typed_project_id)
+
+ transcript_excerpts_by_analysis_id = _precompute_transcript_excerpts_by_asked_at(
+ transcript_rows, analyses
+ )
+
+ answered_by_category: dict[str, list[ReportQuestionEntry]] = defaultdict(list)
+ unanswered_by_category: dict[str, list[ReportQuestionEntry]] = defaultdict(list)
+
+ for analysis in analyses:
+ normalized_category = normalize_question_category_code(analysis.category_code)
+
+ answered_anchor: str | None = None
+ answered_at_text: str | None = None
+ transcript_excerpt: str | None = None
+
+ if analysis.asked_at is not None:
+ assert analysis.asked_at_transcript_id, (
+ "asked_at_transcript_id should be set if asked_at is set"
+ )
+ asked_at_id = analysis.asked_at_transcript_id.lower()
+ answered_anchor = anchor_index.chunk_to_section_anchor.get(asked_at_id)
+ answered_at_text = _format_utc(analysis.asked_at)
+ transcript_excerpt = transcript_excerpts_by_analysis_id.get(
+ analysis.analysis_id
+ )
+
+ question_anchor = f"question-{analysis.ordinal}"
+
+ entry = ReportQuestionEntry(
+ analysis_id=analysis.analysis_id,
+ ordinal=analysis.ordinal,
+ text=analysis.text,
+ category_code=normalized_category,
+ span=analysis.span,
+ question_anchor=question_anchor,
+ context_anchor=_analysis_context_anchor(
+ analysis, anchor_index.chunk_to_section_anchor
+ ),
+ answered_at_anchor=answered_anchor,
+ answered_at_text=answered_at_text,
+ is_starred=analysis.tag in ("starred", "starred_dismissed"),
+ transcript_excerpt=transcript_excerpt,
+ )
+
+ if analysis.was_asked is True:
+ answered_by_category[normalized_category].append(entry)
+ if answered_anchor is not None and analysis.asked_at is not None:
+ section = anchor_index.section_by_anchor.get(answered_anchor)
+ if section is not None:
+ section.answered_question_refs.append(
+ (entry.ordinal, entry.question_anchor, analysis.asked_at)
+ )
+ else:
+ unanswered_by_category[normalized_category].append(entry)
+
+ for section in anchor_index.sections:
+ section.answered_question_refs.sort(key=lambda item: item[2])
+
+ project_name = str(transcript_rows[0]["project_name"] or "Untitled Project")
+ first_timestamp = extract_timestamp_from_ulid(
+ str(transcript_rows[0]["transcription_id"])
+ )
+
+ return ReportData(
+ project_name=project_name,
+ start_time=first_timestamp,
+ total_duration=_compute_total_duration(transcript_rows),
+ answered_by_category=dict(answered_by_category),
+ unanswered_by_category=dict(unanswered_by_category),
+ transcript_sections=anchor_index.sections,
+ )
+
+
+def _render_question_sections(
+ story: list[Flowable],
+ title: str,
+ grouped_questions: dict[str, list[ReportQuestionEntry]],
+ normal_style: ParagraphStyle,
+ heading_style: ParagraphStyle,
+ category_style: ParagraphStyle,
+ question_style: ParagraphStyle,
+ excerpt_style: ParagraphStyle,
+) -> None:
+ story.append(Paragraph(escape(title), heading_style))
+ story.append(Spacer(1, 0.15 * inch))
+
+ ordered_groups = _ordered_category_items(grouped_questions)
+ if len(ordered_groups) == 0:
+ story.append(Paragraph("No questions available.", normal_style))
+ return
+
+ for category_code, entries in ordered_groups:
+ category_label = QUESTION_CATEGORY_LABELS.get(category_code, "Unknown")
+ story.append(
+ Paragraph(
+ f'{escape(category_code)}. {escape(category_label)}',
+ category_style,
+ )
+ )
+ story.append(Spacer(1, 0.12 * inch))
+
+ for entry in entries:
+ star_icon = (
+ '★'
+ if entry.is_starred
+ else ""
+ )
+ question_label = (
+ f"Q{entry.ordinal}. {star_icon}"
+ if entry.is_starred
+ else f"Q{entry.ordinal}."
+ )
+ story.append(
+ Paragraph(
+ f'{question_label} {escape(entry.text)}',
+ question_style,
+ )
+ )
+
+ if entry.span:
+ escaped_span = escape(entry.span)
+ if entry.context_anchor:
+ story.append(
+ Paragraph(
+ f'Context: "{escaped_span}"',
+ question_style,
+ )
+ )
+ else:
+ story.append(
+ Paragraph(
+ f'Context: "{escaped_span}"',
+ question_style,
+ )
+ )
+
+ if entry.answered_at_text is not None:
+ if entry.answered_at_anchor:
+ story.append(
+ Paragraph(
+ f'Answered At: {escape(entry.answered_at_text)}',
+ question_style,
+ )
+ )
+ else:
+ story.append(
+ Paragraph(
+ f'Answered At: {escape(entry.answered_at_text)}',
+ question_style,
+ )
+ )
+
+ if entry.transcript_excerpt:
+ formatted_excerpt = (
+ '[ . . . ]
'
+ + escape(entry.transcript_excerpt).replace("\n", "
")
+ )
+ excerpt_label = (
+ f'Transcript Excerpt (Last {_format_excerpt_window(TRANSCRIPT_EXCERPT_WINDOW)})'
+ if entry.answered_at_anchor
+ else f"Transcript Excerpt (Last {_format_excerpt_window(TRANSCRIPT_EXCERPT_WINDOW)})"
+ )
+ story.append(
+ Paragraph(
+ f'{excerpt_label}:
{formatted_excerpt}',
+ excerpt_style,
+ )
+ )
+
+ story.append(Spacer(1, 0.12 * inch))
+
+ story.append(Spacer(1, 0.08 * inch))
+
+
+def generate_report_pdf(project_id: str, db: PersistentDatabase) -> bytes | None:
+ report_data = build_report_data(project_id, db)
+ if report_data is None:
+ return None
+
+ buffer = BytesIO()
+ document = SimpleDocTemplate(
+ buffer,
+ pagesize=letter,
+ title=f"Interview Report - {report_data.project_name}",
+ leftMargin=0.75 * inch,
+ rightMargin=0.75 * inch,
+ topMargin=0.75 * inch,
+ bottomMargin=0.75 * inch,
+ )
+
+ styles = getSampleStyleSheet()
+ title_style = cast(ParagraphStyle, styles["Title"])
+ title_style.textColor = colors.HexColor("#1a472a")
+ title_style.fontSize = 36
+ title_style.leading = 42
+
+ heading_style = cast(ParagraphStyle, styles["Heading2"])
+ heading_style.textColor = colors.HexColor("#2E5090")
+ heading_style.fontSize = 18
+ heading_style.spaceAfter = 6
+
+ normal_style = cast(ParagraphStyle, styles["BodyText"])
+ normal_style.fontSize = 11
+
+ # Custom styles for categories and questions
+ category_style = ParagraphStyle(
+ "CategoryStyle",
+ parent=normal_style,
+ fontSize=13,
+ textColor=colors.HexColor("#2E5090"),
+ spaceAfter=8,
+ spaceBefore=4,
+ )
+
+ question_style = ParagraphStyle(
+ "QuestionStyle",
+ parent=normal_style,
+ fontSize=11,
+ leftIndent=20,
+ spaceAfter=4,
+ )
+
+ subtitle_style = ParagraphStyle(
+ "SubtitleStyle",
+ parent=normal_style,
+ fontSize=13,
+ textColor=colors.HexColor("#555555"),
+ spaceAfter=6,
+ )
+ excerpt_style = ParagraphStyle(
+ "ExcerptStyle",
+ parent=normal_style,
+ fontSize=10,
+ leading=12,
+ leftIndent=36,
+ rightIndent=12,
+ textColor=colors.HexColor("#666666"),
+ spaceBefore=6,
+ spaceAfter=4,
+ )
+ gap_separator_style = ParagraphStyle(
+ "GapSeparatorStyle",
+ parent=normal_style,
+ fontSize=10,
+ textColor=colors.HexColor("#777777"),
+ alignment=1,
+ spaceBefore=6,
+ spaceAfter=6,
+ )
+
+ story: list[Flowable] = []
+
+ # Cover page
+ story.append(Spacer(1, 1.5 * inch))
+ story.append(
+ Paragraph('Interview Report', title_style)
+ )
+ story.append(Spacer(1, 0.1 * inch))
+ story.append(
+ Paragraph(
+ f'Project: {escape(report_data.project_name)}',
+ ParagraphStyle(
+ "ProjectTitle",
+ parent=title_style,
+ fontSize=24,
+ textColor=colors.HexColor("#2E5090"),
+ ),
+ )
+ )
+ story.append(Spacer(1, 0.5 * inch))
+ story.append(
+ Paragraph(
+ f'Interview Start: {_format_utc(report_data.start_time)}',
+ subtitle_style,
+ )
+ )
+ story.append(
+ Paragraph(
+ f'Total Interview Length: {_format_duration_hms(report_data.total_duration)}',
+ subtitle_style,
+ )
+ )
+ story.append(PageBreak())
+
+ # Answered questions
+ _render_question_sections(
+ story,
+ "Answered Questions (Categorized)",
+ report_data.answered_by_category,
+ normal_style,
+ heading_style,
+ category_style,
+ question_style,
+ excerpt_style,
+ )
+ story.append(PageBreak())
+
+ # Transcript
+ story.append(Paragraph("Transcript", heading_style))
+ story.append(Spacer(1, 0.15 * inch))
+
+ previous_ended_at: datetime | None = None
+ for section in report_data.transcript_sections:
+ if previous_ended_at is not None:
+ time_gap = section.started_at - previous_ended_at
+ if time_gap > TRANSCRIPT_ENTRY_GAP_SEPARATOR_THRESHOLD:
+ story.append(
+ Paragraph(
+ f'[ . . . {_format_gap_duration(time_gap)} passed . . . ]',
+ gap_separator_style,
+ )
+ )
+ story.append(Spacer(1, 0.06 * inch))
+
+ speaker = section.speaker if section.speaker else "Unknown Speaker"
+ transcript_heading = f"[{_format_utc(section.started_at)}] {speaker}"
+ story.append(
+ Paragraph(
+ f'{escape(transcript_heading)}',
+ normal_style,
+ )
+ )
+ story.append(
+ Paragraph(
+ escape(section.text if section.text else "(No transcript text)"),
+ normal_style,
+ )
+ )
+
+ if section.answered_question_refs:
+ grouped_answered_refs: list[
+ tuple[datetime, datetime, list[tuple[int, str]]]
+ ] = []
+ group_first_at: datetime | None = None
+ group_last_at: datetime | None = None
+ group_links: list[tuple[int, str]] = []
+
+ for ordinal, question_anchor, answered_at in section.answered_question_refs:
+ if group_first_at is None or group_last_at is None:
+ group_first_at = answered_at
+ group_last_at = answered_at
+ group_links = [(ordinal, question_anchor)]
+ continue
+
+ if answered_at - group_last_at <= ANSWERED_EVENT_TIMESTAMP_CUTOFF:
+ group_last_at = answered_at
+ group_links.append((ordinal, question_anchor))
+ continue
+
+ grouped_answered_refs.append(
+ (group_first_at, group_last_at, [*group_links])
+ )
+ group_first_at = answered_at
+ group_last_at = answered_at
+ group_links = [(ordinal, question_anchor)]
+
+ if group_first_at is not None and group_last_at is not None:
+ grouped_answered_refs.append(
+ (group_first_at, group_last_at, [*group_links])
+ )
+
+ last_rendered_timestamp = section.ended_at
+ for group_first_at, group_last_at, grouped_links in grouped_answered_refs:
+ group_gap = group_first_at - last_rendered_timestamp
+ if group_gap > TRANSCRIPT_ENTRY_GAP_SEPARATOR_THRESHOLD:
+ story.append(
+ Paragraph(
+ f'[ . . . {_format_gap_duration(group_gap)} passed . . . ]',
+ gap_separator_style,
+ )
+ )
+ story.append(Spacer(1, 0.06 * inch))
+
+ if group_gap > ANSWERED_EVENT_TIMESTAMP_CUTOFF:
+ story.append(
+ Paragraph(
+ f'[{_format_utc(group_first_at)}]',
+ normal_style,
+ )
+ )
+
+ grouped_links_text = ", ".join(
+ [
+ f'Q{ordinal}'
+ for ordinal, question_anchor in grouped_links
+ ]
+ )
+
+ story.append(
+ Paragraph(
+ f'Answered Here: {grouped_links_text}',
+ normal_style,
+ )
+ )
+ last_rendered_timestamp = group_last_at
+
+ story.append(Spacer(1, 0.1 * inch))
+ previous_ended_at = section.ended_at
+
+ story.append(PageBreak())
+
+ # Unanswered questions
+ _render_question_sections(
+ story,
+ "Unanswered Questions",
+ report_data.unanswered_by_category,
+ normal_style,
+ heading_style,
+ category_style,
+ question_style,
+ excerpt_style,
+ )
+
+ document.build(story)
+ return buffer.getvalue()
diff --git a/backend/src/interview_helper/tests/test_report_generation.py b/backend/src/interview_helper/tests/test_report_generation.py
new file mode 100644
index 0000000..ab49529
--- /dev/null
+++ b/backend/src/interview_helper/tests/test_report_generation.py
@@ -0,0 +1,196 @@
+from datetime import datetime, timedelta, timezone
+from typing import cast
+
+import pytest
+import sqlalchemy as sa
+from ulid import ULID
+
+from interview_helper.context_manager import models
+from interview_helper.context_manager.database import (
+ PersistentDatabase,
+ add_ai_analysis,
+ create_new_project,
+ create_session,
+ get_or_add_user_by_oidc_id,
+ mark_ai_analysis_asked,
+)
+from interview_helper.context_manager.types import (
+ ProjectId,
+ SessionId,
+ TranscriptId,
+ UserId,
+)
+from interview_helper.downloads.get_report import build_report_data, generate_report_pdf
+
+
+pytestmark = pytest.mark.anyio
+
+
+def _ulid_at(ts: datetime) -> str:
+ ulid_value = cast(ULID, ULID.from_datetime(ts))
+ return str(ulid_value).lower()
+
+
+def _insert_transcription(
+ db: PersistentDatabase,
+ *,
+ transcription_id: str,
+ project_id: ProjectId,
+ user_id: UserId,
+ session_id: SessionId,
+ speaker: str,
+ text: str,
+) -> None:
+ with db.begin() as conn:
+ _ = conn.execute(
+ sa.insert(models.Transcription),
+ {
+ "transcription_id": transcription_id,
+ "project_id": str(project_id),
+ "user_id": str(user_id),
+ "session_id": str(session_id),
+ "speaker": speaker,
+ "text_output": text,
+ },
+ )
+
+
+def test_build_report_data_groups_questions_and_creates_bidirectional_anchors():
+ db = PersistentDatabase.new_in_memory()
+ user = get_or_add_user_by_oidc_id(db, "oidc-report-user", "Report User")
+
+ project = create_new_project(db, user.user_id, "Mission Report")
+ project_id = ProjectId.from_str(project["id"])
+
+ session_1 = SessionId(ULID())
+ session_2 = SessionId(ULID())
+ create_session(db, session_1, project_id, user.user_id)
+ create_session(db, session_2, project_id, user.user_id)
+
+ t0 = datetime(2026, 1, 1, 10, 0, 0, tzinfo=timezone.utc)
+ t1 = t0 + timedelta(seconds=120)
+ t2 = t0 + timedelta(seconds=600)
+
+ transcript_1 = _ulid_at(t0)
+ transcript_2 = _ulid_at(t1)
+ transcript_3 = _ulid_at(t2)
+
+ _insert_transcription(
+ db,
+ transcription_id=transcript_1,
+ project_id=project_id,
+ user_id=user.user_id,
+ session_id=session_1,
+ speaker="Speaker-A",
+ text="We last saw him near the trailhead.",
+ )
+ _insert_transcription(
+ db,
+ transcription_id=transcript_2,
+ project_id=project_id,
+ user_id=user.user_id,
+ session_id=session_1,
+ speaker="Speaker-A",
+ text="He was carrying a blue jacket.",
+ )
+ _insert_transcription(
+ db,
+ transcription_id=transcript_3,
+ project_id=project_id,
+ user_id=user.user_id,
+ session_id=session_2,
+ speaker="Speaker-B",
+ text="He usually checks in every night.",
+ )
+
+ answered_analysis_id = add_ai_analysis(
+ db=db,
+ project_id=project_id,
+ text="What route did he usually take from the trailhead?",
+ category_code="B",
+ span="last saw him near the trailhead",
+ transcript_span_id=TranscriptId.from_str(transcript_1),
+ transcript_context_start=TranscriptId.from_str(transcript_1),
+ transcript_context_end=TranscriptId.from_str(transcript_2),
+ summary="Summary",
+ )
+
+ unanswered_analysis_id = add_ai_analysis(
+ db=db,
+ project_id=project_id,
+ text="What medication does he take?",
+ category_code="C",
+ span="",
+ transcript_span_id=TranscriptId.from_str(transcript_2),
+ transcript_context_start=TranscriptId.from_str(transcript_2),
+ transcript_context_end=TranscriptId.from_str(transcript_2),
+ summary="Summary",
+ )
+ _ = unanswered_analysis_id
+
+ _ = mark_ai_analysis_asked(
+ db,
+ analysis_id=str(answered_analysis_id),
+ asked_at_transcript_id=transcript_3,
+ )
+ explicit_asked_at = t1 + timedelta(seconds=30)
+ with db.begin() as conn:
+ _ = conn.execute(
+ sa.update(models.AIAnalysis)
+ .where(models.AIAnalysis.analysis_id == str(answered_analysis_id))
+ .values(asked_at=explicit_asked_at)
+ )
+
+ report = build_report_data(project["id"], db)
+ assert report is not None
+
+ assert report.project_name == "Mission Report"
+ assert report.start_time == t0
+ assert report.total_duration == timedelta(seconds=120)
+
+ answered = report.answered_by_category.get("B", [])
+ unanswered = report.unanswered_by_category.get("C", [])
+
+ assert len(answered) == 1
+ assert len(unanswered) == 1
+
+ answered_entry = answered[0]
+ assert answered_entry.context_anchor == "transcript-1"
+ assert answered_entry.answered_at_anchor == "transcript-2"
+ assert answered_entry.answered_at_text == "2026-01-01 10:02:30 UTC"
+ assert answered_entry.transcript_excerpt is not None
+ assert (
+ "Speaker-A: He was carrying a blue jacket." in answered_entry.transcript_excerpt
+ )
+
+ transcript_section = report.transcript_sections[1]
+ assert len(transcript_section.answered_question_refs) == 1
+ _, question_anchor, question_datetime = transcript_section.answered_question_refs[0]
+ assert question_anchor == answered_entry.question_anchor
+ assert question_datetime == explicit_asked_at
+
+
+def test_generate_report_pdf_returns_pdf_bytes():
+ db = PersistentDatabase.new_in_memory()
+ user = get_or_add_user_by_oidc_id(db, "oidc-pdf-user", "PDF User")
+
+ project = create_new_project(db, user.user_id, "PDF Project")
+ project_id = ProjectId.from_str(project["id"])
+
+ session = SessionId(ULID())
+ create_session(db, session, project_id, user.user_id)
+
+ transcript_id = _ulid_at(datetime(2026, 1, 1, 12, 0, 0, tzinfo=timezone.utc))
+ _insert_transcription(
+ db,
+ transcription_id=transcript_id,
+ project_id=project_id,
+ user_id=user.user_id,
+ session_id=session,
+ speaker="Speaker-A",
+ text="Sample text",
+ )
+
+ pdf_bytes = generate_report_pdf(project["id"], db)
+ assert pdf_bytes is not None
+ assert pdf_bytes.startswith(b"%PDF")
diff --git a/backend/src/long_run_eval.py b/backend/src/long_run_eval.py
index 8fa76fd..f4c3ed5 100644
--- a/backend/src/long_run_eval.py
+++ b/backend/src/long_run_eval.py
@@ -163,6 +163,7 @@ async def run_analysis(
db,
project_id=project,
text=result.question,
+ category_code=result.category_code,
span=result.grounding_span,
transcript_span_id=None,
transcript_context_start=analysis_results.transcript_context_start,
diff --git a/backend/src/main.py b/backend/src/main.py
index 2ca9a59..a1f30d9 100644
--- a/backend/src/main.py
+++ b/backend/src/main.py
@@ -10,12 +10,18 @@
vosk_transcriber_consumer_pair,
)
from interview_helper.context_manager.messages import (
+ MarkAIAnalysisAsked,
+ MarkAIAnalysisDismissedNotAsked,
UpdateAIAnalysisTag,
+ StarAIAnalysis,
+ UndoAIAnalysisDismissal,
+ UnstarAIAnalysis,
PingMessage,
CatchupMessage,
ProjectMetadataMessage,
TranscriptChunkToSend,
RecordingStateMessage,
+ ErrorMessage,
)
from interview_helper.security.http import (
verify_jwt_token,
@@ -48,7 +54,11 @@
from interview_helper.context_manager.database import (
ProjectListing,
create_new_project,
- update_ai_analysis_tag,
+ mark_ai_analysis_asked,
+ mark_ai_analysis_dismissed_not_asked,
+ undo_ai_analysis_dismissal,
+ star_ai_analysis,
+ unstar_ai_analysis,
get_all_projects,
get_or_add_user_by_oidc_id,
get_project_by_id,
@@ -58,7 +68,7 @@
delete_project,
get_project_creator_and_name,
)
-from interview_helper.context_manager.types import ProjectId, TranscriptId
+from interview_helper.context_manager.types import ProjectId
from fastapi.security import OpenIdConnect
from fastapi import FastAPI, WebSocket, Depends, HTTPException, status
@@ -70,6 +80,7 @@
import tempfile
import sqlalchemy as sa
from interview_helper.downloads.get_transcript import generate_transcript
+from interview_helper.downloads.get_report import generate_report_pdf
from interview_helper.context_manager import models
# Configure logging
@@ -398,19 +409,70 @@ async def websocket_endpoint(
await handle_webrtc_message(context, message)
elif isinstance(message, PingMessage):
await cws.send_message(PingMessage())
- elif isinstance(message, UpdateAIAnalysisTag):
- update_ai_analysis_tag(
- session_manager.db,
- message.analysis_id,
- message.tag,
- ticket.user_id,
- was_asked=message.was_asked,
- asked_at_transcript_id=message.asked_at_transcript_id,
- )
- # Broadcast the tag update to all sessions in this project
- await session_manager.broadcast_to_project(
- context.project_id, message
- )
+ elif isinstance(
+ message,
+ (
+ MarkAIAnalysisAsked,
+ UndoAIAnalysisDismissal,
+ MarkAIAnalysisDismissedNotAsked,
+ StarAIAnalysis,
+ UnstarAIAnalysis,
+ ),
+ ):
+ try:
+ if isinstance(message, MarkAIAnalysisAsked):
+ update = mark_ai_analysis_asked(
+ session_manager.db,
+ message.analysis_id,
+ message.asked_at_transcript_id,
+ )
+ elif isinstance(message, UndoAIAnalysisDismissal):
+ update = undo_ai_analysis_dismissal(
+ session_manager.db,
+ message.analysis_id,
+ )
+ elif isinstance(
+ message, MarkAIAnalysisDismissedNotAsked
+ ):
+ update = mark_ai_analysis_dismissed_not_asked(
+ session_manager.db,
+ message.analysis_id,
+ )
+ elif isinstance(message, StarAIAnalysis):
+ update = star_ai_analysis(
+ session_manager.db,
+ message.analysis_id,
+ )
+ else:
+ update = unstar_ai_analysis(
+ session_manager.db,
+ message.analysis_id,
+ )
+
+ update_message = UpdateAIAnalysisTag(
+ analysis_id=update.analysis_id,
+ tag=update.tag,
+ was_asked=update.was_asked,
+ asked_at_transcript_id=update.asked_at_transcript_id,
+ time_tag_changed=update.time_tag_changed,
+ asked_at=update.asked_at,
+ )
+ await session_manager.broadcast_to_project(
+ context.project_id, update_message
+ )
+ except ValueError as e:
+ logger.warning(
+ "Rejected invalid AI analysis action for %s: %s",
+ message.analysis_id,
+ e,
+ )
+ await cws.send_message(
+ ErrorMessage(
+ error_code="invalid_ai_analysis_action",
+ message=str(e),
+ session_id=str(context.session_id),
+ )
+ )
# handle other message types...
except WebSocketDisconnect:
logger.info(
@@ -631,14 +693,11 @@ async def download_questions(
transcript_lines.append("\tStarred")
if analysis.was_asked is True:
- if analysis.asked_at_transcript_id:
- transcript_id = TranscriptId.from_str(analysis.asked_at_transcript_id)
- timestamp = transcript_id.get_datetime().strftime(
- "%Y-%m-%d %H:%M:%S %Z"
- )
- transcript_lines.append(f"\tAsked at {timestamp}")
- else:
- transcript_lines.append("\tAsked at unknown")
+ assert analysis.asked_at is not None, (
+ "asked_at should be set if was_asked is True"
+ )
+ timestamp = analysis.asked_at.strftime("%Y-%m-%d %H:%M:%S %Z")
+ transcript_lines.append(f"\tAsked at {timestamp}")
elif analysis.was_asked is False:
transcript_lines.append("\tNot Asked")
@@ -653,6 +712,35 @@ async def download_questions(
)
+@app.get("/project/{project_id}/download/report")
+async def download_report(project_id: str, token: Annotated[str, Depends(oidc_scheme)]):
+ """
+ Download a unified interview report for a project as a PDF
+ """
+ clean_token = token.removeprefix("Bearer ")
+ _ = verify_jwt_token(clean_token, jwks_client, CLIENT_ID, signing_algos)
+
+ project_id_typed = ProjectId.from_str(project_id)
+ project = get_project_by_id(session_manager.db, project_id_typed)
+ if not project:
+ raise HTTPException(status_code=404, detail="Project not found")
+
+ report_pdf = generate_report_pdf(project_id=project_id, db=session_manager.db)
+ if report_pdf is None:
+ raise HTTPException(
+ status_code=404, detail="No transcriptions found for this project"
+ )
+
+ project_name = project["name"] or "report"
+ safe_filename = sanitize_filename(project_name, "report") + "_report.pdf"
+
+ return Response(
+ content=report_pdf,
+ media_type="application/pdf",
+ headers={"Content-Disposition": f'attachment; filename="{safe_filename}"'},
+ )
+
+
@app.get("/project/{project_id}/download/audio")
async def download_audio(project_id: str, token: Annotated[str, Depends(oidc_scheme)]):
"""
diff --git a/frontend/src/components/AudioSender.tsx b/frontend/src/components/AudioSender.tsx
index ce9df89..cfa129d 100644
--- a/frontend/src/components/AudioSender.tsx
+++ b/frontend/src/components/AudioSender.tsx
@@ -93,21 +93,10 @@ export function AudioSender({ projectId }: AudioSenderProps) {
// Handle starring an insight
const handleStarInsight = useCallback(
(analysisId: string) => {
- // Update local state immediately
- setInsights((prevState) =>
- prevState.map((insight) =>
- insight.analysis_id === analysisId
- ? { ...insight, tag: "starred" }
- : insight,
- ),
- );
-
- // Send update tag message to backend
ws.sendMessage({
- type: MessageType.UPDATE_AI_ANALYSIS_TAG,
+ type: MessageType.STAR_AI_ANALYSIS,
timestamp: new Date().toISOString(),
analysis_id: analysisId,
- tag: "starred",
});
},
[ws],
@@ -116,21 +105,10 @@ export function AudioSender({ projectId }: AudioSenderProps) {
// Handle unstarring an insight
const handleUnstarInsight = useCallback(
(analysisId: string) => {
- // Update local state immediately
- setInsights((prevState) =>
- prevState.map((insight) =>
- insight.analysis_id === analysisId
- ? { ...insight, tag: null }
- : insight,
- ),
- );
-
- // Send update tag message to backend
ws.sendMessage({
- type: MessageType.UPDATE_AI_ANALYSIS_TAG,
+ type: MessageType.UNSTAR_AI_ANALYSIS,
timestamp: new Date().toISOString(),
analysis_id: analysisId,
- tag: null,
});
},
[ws],
@@ -139,110 +117,47 @@ export function AudioSender({ projectId }: AudioSenderProps) {
// Handle dismissing an insight as answered
const handleDismissAsAnswered = useCallback(
(analysisId: string) => {
- // Get the latest transcript ID
const latestTranscriptId =
transcriptChunks.length > 0
? transcriptChunks[transcriptChunks.length - 1]
.transcription_id
: null;
-
- // Send update tag message to backend
- const insight = insights.find((i) => i.analysis_id === analysisId);
- const newTag =
- insight?.tag === "starred" ? "starred_dismissed" : "dismissed";
-
- // Update local state immediately
- setInsights((prevState) =>
- prevState.map((insight) => {
- if (insight.analysis_id === analysisId) {
- return {
- ...insight,
- tag: newTag,
- was_asked: true,
- asked_at_transcript_id: latestTranscriptId,
- };
- }
- return insight;
- }),
- );
+ if (!latestTranscriptId) {
+ return;
+ }
ws.sendMessage({
- type: MessageType.UPDATE_AI_ANALYSIS_TAG,
+ type: MessageType.MARK_AI_ANALYSIS_ASKED,
timestamp: new Date().toISOString(),
analysis_id: analysisId,
- tag: newTag,
- was_asked: true,
asked_at_transcript_id: latestTranscriptId,
});
},
- [ws, insights, transcriptChunks],
+ [ws, transcriptChunks],
);
// Handle dismissing an insight as not answered
const handleDismissNotAnswered = useCallback(
(analysisId: string) => {
- // Send update tag message to backend
- const insight = insights.find((i) => i.analysis_id === analysisId);
- const newTag =
- insight?.tag === "starred" ? "starred_dismissed" : "dismissed";
-
- // Update local state immediately
- setInsights((prevState) =>
- prevState.map((insight) => {
- if (insight.analysis_id === analysisId) {
- return {
- ...insight,
- tag: newTag,
- was_asked: false,
- asked_at_transcript_id: null,
- };
- }
- return insight;
- }),
- );
-
ws.sendMessage({
- type: MessageType.UPDATE_AI_ANALYSIS_TAG,
+ type: MessageType.MARK_AI_ANALYSIS_DISMISSED_NOT_ASKED,
timestamp: new Date().toISOString(),
analysis_id: analysisId,
- tag: newTag,
- was_asked: false,
- asked_at_transcript_id: null,
});
},
- [ws, insights],
+ [ws],
);
// Handle undoing a dismiss (restore to active or starred)
const handleUndoDismiss = useCallback(
(analysisId: string) => {
- // Update local state immediately
- setInsights((prevState) =>
- prevState.map((insight) => {
- if (insight.analysis_id === analysisId) {
- // If it was starred_dismissed, restore to starred, otherwise to active (null)
- const newTag =
- insight.tag === "starred_dismissed"
- ? "starred"
- : null;
- return { ...insight, tag: newTag };
- }
- return insight;
- }),
- );
-
- // Send update tag message to backend
- const insight = insights.find((i) => i.analysis_id === analysisId);
- const newTag =
- insight?.tag === "starred_dismissed" ? "starred" : null;
ws.sendMessage({
- type: MessageType.UPDATE_AI_ANALYSIS_TAG,
+ type: MessageType.UNDO_AI_ANALYSIS_DISMISSAL,
timestamp: new Date().toISOString(),
analysis_id: analysisId,
- tag: newTag,
});
},
- [ws, insights],
+ [ws],
);
const viewportRef = useRef(null);
@@ -577,11 +492,19 @@ export function AudioSender({ projectId }: AudioSenderProps) {
useEffect(() => {
const handleUpdateAIAnalysisTag = (message: UpdateAIAnalysisTag) => {
- // Update insight tag in local state
setInsights((prevState) =>
prevState.map((insight) =>
insight.analysis_id === message.analysis_id
- ? { ...insight, tag: message.tag }
+ ? {
+ ...insight,
+ tag: message.tag,
+ was_asked: message.was_asked ?? null,
+ asked_at_transcript_id:
+ message.asked_at_transcript_id ?? null,
+ asked_at: message.asked_at ?? null,
+ time_tag_changed:
+ message.time_tag_changed ?? null,
+ }
: insight,
),
);
diff --git a/frontend/src/components/audio-sender/DesktopLayout.tsx b/frontend/src/components/audio-sender/DesktopLayout.tsx
index 43ebd51..f86a3e1 100644
--- a/frontend/src/components/audio-sender/DesktopLayout.tsx
+++ b/frontend/src/components/audio-sender/DesktopLayout.tsx
@@ -1,4 +1,5 @@
import { Box } from "@mantine/core";
+import { useCallback, useEffect, useRef, useState } from "react";
import type { AnalysisRow } from "../../lib/message";
import { InsightsPanel } from "./InsightsPanel";
import { RecordingControls } from "./RecordingControls";
@@ -71,15 +72,113 @@ export function DesktopLayout({
onStopRecording,
}: DesktopLayoutProps) {
const isConnected = connectionState === "connected";
+ const DEFAULT_INSIGHTS_WIDTH = 340;
+ const MIN_TRANSCRIPT_WIDTH = 420;
+ const MIN_INSIGHTS_WIDTH = 260;
+ const RESIZE_HANDLE_WIDTH = 10;
+ const [insightsWidth, setInsightsWidth] = useState(DEFAULT_INSIGHTS_WIDTH);
+ const [isResizing, setIsResizing] = useState(false);
+ const resizeHandleRef = useRef(null);
+ const dragStateRef = useRef<{
+ startX: number;
+ startWidth: number;
+ containerWidth: number;
+ } | null>(null);
+
+ const clampInsightsWidth = useCallback((nextWidth: number) => {
+ const storedContainerWidth = dragStateRef.current?.containerWidth;
+ const containerWidth =
+ storedContainerWidth ??
+ resizeHandleRef.current?.parentElement?.clientWidth ??
+ 0;
+
+ if (containerWidth <= 0) return nextWidth;
+
+ const maxInsightsWidth = Math.max(
+ MIN_INSIGHTS_WIDTH,
+ containerWidth - MIN_TRANSCRIPT_WIDTH - RESIZE_HANDLE_WIDTH,
+ );
+
+ return Math.max(
+ MIN_INSIGHTS_WIDTH,
+ Math.min(nextWidth, maxInsightsWidth),
+ );
+ }, []);
+
+ useEffect(() => {
+ const handleWindowResize = () => {
+ setInsightsWidth((prevWidth) => clampInsightsWidth(prevWidth));
+ };
+
+ handleWindowResize();
+ window.addEventListener("resize", handleWindowResize);
+ return () => window.removeEventListener("resize", handleWindowResize);
+ }, [clampInsightsWidth]);
+
+ useEffect(() => {
+ if (!isResizing) return;
+
+ const handleMouseMove = (event: MouseEvent) => {
+ const dragState = dragStateRef.current;
+ if (!dragState) return;
+
+ const deltaX = event.clientX - dragState.startX;
+ const nextWidth = dragState.startWidth - deltaX;
+ setInsightsWidth(clampInsightsWidth(nextWidth));
+ };
+
+ const handleMouseUp = () => {
+ dragStateRef.current = null;
+ setIsResizing(false);
+ };
+
+ const previousUserSelect = document.body.style.userSelect;
+ const previousCursor = document.body.style.cursor;
+
+ document.body.style.userSelect = "none";
+ document.body.style.cursor = "col-resize";
+
+ window.addEventListener("mousemove", handleMouseMove);
+ window.addEventListener("mouseup", handleMouseUp);
+
+ return () => {
+ document.body.style.userSelect = previousUserSelect;
+ document.body.style.cursor = previousCursor;
+ window.removeEventListener("mousemove", handleMouseMove);
+ window.removeEventListener("mouseup", handleMouseUp);
+ };
+ }, [clampInsightsWidth, isResizing]);
+
+ const handleResizeStart = (event: React.MouseEvent) => {
+ event.preventDefault();
+
+ const containerWidth =
+ resizeHandleRef.current?.parentElement?.clientWidth ?? 0;
+ if (containerWidth <= 0) return;
+
+ dragStateRef.current = {
+ startX: event.clientX,
+ startWidth: insightsWidth,
+ containerWidth,
+ };
+ setIsResizing(true);
+ };
return (
- <>
+
{/* Transcript area fills the rest */}
@@ -108,8 +207,43 @@ export function DesktopLayout({
/>
+
+
+
+
{/* Insights Panel */}
-
+
- >
+
);
}
diff --git a/frontend/src/components/audio-sender/InsightsPanel.tsx b/frontend/src/components/audio-sender/InsightsPanel.tsx
index 58dc08c..1721a19 100644
--- a/frontend/src/components/audio-sender/InsightsPanel.tsx
+++ b/frontend/src/components/audio-sender/InsightsPanel.tsx
@@ -48,10 +48,30 @@ export function InsightsPanel({
a.tag !== "dismissed" &&
a.tag !== "starred_dismissed",
);
- const starredInsights = insights.filter((a) => a.tag === "starred");
- const dismissedInsights = insights.filter(
- (a) => a.tag === "dismissed" || a.tag === "starred_dismissed",
- );
+ const starredInsights = insights
+ .filter((a) => a.tag === "starred")
+ .sort((a, b) => {
+ // Sort by time_tag_changed, oldest first (newest at bottom)
+ const timeA = a.time_tag_changed
+ ? new Date(a.time_tag_changed).getTime()
+ : 0;
+ const timeB = b.time_tag_changed
+ ? new Date(b.time_tag_changed).getTime()
+ : 0;
+ return timeA - timeB;
+ });
+ const dismissedInsights = insights
+ .filter((a) => a.tag === "dismissed" || a.tag === "starred_dismissed")
+ .sort((a, b) => {
+ // Sort by time_tag_changed, oldest first
+ const timeA = a.time_tag_changed
+ ? new Date(a.time_tag_changed).getTime()
+ : 0;
+ const timeB = b.time_tag_changed
+ ? new Date(b.time_tag_changed).getTime()
+ : 0;
+ return timeA - timeB;
+ });
const renderActiveInsight = (analysis: AnalysisRow) => (
@@ -366,18 +386,15 @@ export function InsightsPanel({
here in real time.
) : (
- activeInsights
- .reverse()
- .map((analysis, index) => (
-
- {renderActiveInsight(analysis)}
- {index <
- activeInsights.length -
- 1 && }
-
- ))
+ activeInsights.map((analysis, index) => (
+
+ {renderActiveInsight(analysis)}
+ {index <
+ activeInsights.length - 1 && (
+
+ )}
+
+ ))
)}
@@ -397,18 +414,15 @@ export function InsightsPanel({
No starred questions.
) : (
- starredInsights
- .reverse()
- .map((analysis, index) => (
-
- {renderStarredInsight(analysis)}
- {index <
- starredInsights.length -
- 1 && }
-
- ))
+ starredInsights.map((analysis, index) => (
+
+ {renderStarredInsight(analysis)}
+ {index <
+ starredInsights.length - 1 && (
+
+ )}
+
+ ))
)}
diff --git a/frontend/src/components/audio-sender/TranscriptView.tsx b/frontend/src/components/audio-sender/TranscriptView.tsx
index d772b4b..fb4636b 100644
--- a/frontend/src/components/audio-sender/TranscriptView.tsx
+++ b/frontend/src/components/audio-sender/TranscriptView.tsx
@@ -18,6 +18,7 @@ import {
downloadTranscript,
downloadQuestions,
downloadAudio,
+ downloadReport,
} from "../../lib/api";
import { useState } from "react";
import { TranscriptSection } from "./TranscriptSection";
@@ -109,6 +110,18 @@ export function TranscriptView({
}
};
+ const handleDownloadReport = async () => {
+ if (!projectId || !auth.user?.access_token) return;
+ try {
+ setDownloading("report");
+ await downloadReport(projectId, auth.user.access_token);
+ } catch (error) {
+ console.error("Failed to download report:", error);
+ } finally {
+ setDownloading(null);
+ }
+ };
+
return (
@@ -149,6 +162,13 @@ export function TranscriptView({
Downloads
+ }
+ onClick={handleDownloadReport}
+ disabled={downloading !== null}
+ >
+ Download Report
+
}
onClick={handleDownloadAudio}
diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts
index 34d976d..1ff0691 100644
--- a/frontend/src/lib/api.ts
+++ b/frontend/src/lib/api.ts
@@ -182,6 +182,43 @@ export async function downloadAudio(
document.body.removeChild(a);
}
+/**
+ * Download unified report for a project
+ */
+export async function downloadReport(
+ projectId: string,
+ token: string,
+): Promise {
+ const response = await fetch(
+ `${BACKEND_URL}/project/${projectId}/download/report`,
+ {
+ headers: {
+ Authorization: `Bearer ${token}`,
+ },
+ },
+ );
+
+ if (!response.ok) {
+ throw new Error(
+ `Failed to download report: ${response.status} ${response.statusText}`,
+ );
+ }
+
+ const blob = await response.blob();
+ const url = window.URL.createObjectURL(blob);
+ const a = document.createElement("a");
+ a.href = url;
+ a.download =
+ response.headers
+ .get("content-disposition")
+ ?.split("filename=")[1]
+ ?.replace(/"/g, "") || "report.pdf";
+ document.body.appendChild(a);
+ a.click();
+ window.URL.revokeObjectURL(url);
+ document.body.removeChild(a);
+}
+
/**
* Get project info including session count
*/
diff --git a/frontend/src/lib/message.ts b/frontend/src/lib/message.ts
index 2600022..c518268 100644
--- a/frontend/src/lib/message.ts
+++ b/frontend/src/lib/message.ts
@@ -9,6 +9,12 @@ export const MessageType = {
CATCHUP: "catchup",
PROJECT_METADATA: "project_metadata",
UPDATE_AI_ANALYSIS_TAG: "update_ai_analysis_tag",
+ MARK_AI_ANALYSIS_ASKED: "mark_ai_analysis_asked",
+ UNDO_AI_ANALYSIS_DISMISSAL: "undo_ai_analysis_dismissal",
+ MARK_AI_ANALYSIS_DISMISSED_NOT_ASKED:
+ "mark_ai_analysis_dismissed_not_asked",
+ STAR_AI_ANALYSIS: "star_ai_analysis",
+ UNSTAR_AI_ANALYSIS: "unstar_ai_analysis",
RECORDING_STATE: "recording_state",
} as const;
@@ -59,6 +65,7 @@ export interface AIResultMessage {
export interface AnalysisRow {
analysis_id: string;
text: string;
+ category_code: string;
span: string | null;
transcript_span_id: string | null;
is_dismissed: boolean;
@@ -66,6 +73,8 @@ export interface AnalysisRow {
ordinal: number;
was_asked?: boolean | null;
asked_at_transcript_id?: string | null;
+ asked_at?: string | null;
+ time_tag_changed?: string | null;
}
export interface CatchupMessage {
@@ -89,6 +98,39 @@ export interface UpdateAIAnalysisTag {
tag: "starred" | "dismissed" | "starred_dismissed" | null;
was_asked?: boolean | null;
asked_at_transcript_id?: string | null;
+ asked_at?: string | null;
+ time_tag_changed?: string | null;
+}
+
+export interface MarkAIAnalysisAsked {
+ type: typeof MessageType.MARK_AI_ANALYSIS_ASKED;
+ timestamp: string;
+ analysis_id: string;
+ asked_at_transcript_id: string;
+}
+
+export interface UndoAIAnalysisDismissal {
+ type: typeof MessageType.UNDO_AI_ANALYSIS_DISMISSAL;
+ timestamp: string;
+ analysis_id: string;
+}
+
+export interface MarkAIAnalysisDismissedNotAsked {
+ type: typeof MessageType.MARK_AI_ANALYSIS_DISMISSED_NOT_ASKED;
+ timestamp: string;
+ analysis_id: string;
+}
+
+export interface StarAIAnalysis {
+ type: typeof MessageType.STAR_AI_ANALYSIS;
+ timestamp: string;
+ analysis_id: string;
+}
+
+export interface UnstarAIAnalysis {
+ type: typeof MessageType.UNSTAR_AI_ANALYSIS;
+ timestamp: string;
+ analysis_id: string;
}
export interface RecordingStateMessage {
@@ -111,6 +153,11 @@ export type Message =
| CatchupMessage
| ProjectMetadataMessage
| UpdateAIAnalysisTag
+ | MarkAIAnalysisAsked
+ | UndoAIAnalysisDismissal
+ | MarkAIAnalysisDismissedNotAsked
+ | StarAIAnalysis
+ | UnstarAIAnalysis
| RecordingStateMessage;
export interface Envelope {