Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions backend/alembic/versions/869cfd49ebd5_initial.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def upgrade() -> None:
sa.Column("oidc_id", sa.String(length=255), nullable=False),
sa.Column(
"updated_at",
sa.DateTime(),
sa.DateTime(timezone=True),
server_default=sa.text("(CURRENT_TIMESTAMP)"),
nullable=False,
),
Expand All @@ -42,13 +42,13 @@ def upgrade() -> None:
sa.Column("creator_user_id", sa.String(length=26), nullable=False),
sa.Column(
"created_at",
sa.DateTime(),
sa.DateTime(timezone=True),
server_default=sa.text("(CURRENT_TIMESTAMP)"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(),
sa.DateTime(timezone=True),
server_default=sa.text("(CURRENT_TIMESTAMP)"),
nullable=False,
),
Expand All @@ -65,11 +65,11 @@ def upgrade() -> None:
sa.Column("user_id", sa.String(length=26), nullable=False),
sa.Column(
"started_at",
sa.DateTime(),
sa.DateTime(timezone=True),
server_default=sa.text("(CURRENT_TIMESTAMP)"),
nullable=False,
),
sa.Column("ended_at", sa.DateTime(), nullable=True),
sa.Column("ended_at", sa.DateTime(timezone=True), nullable=True),
sa.ForeignKeyConstraint(
["project_id"],
["project.project_id"],
Expand All @@ -90,13 +90,13 @@ def upgrade() -> None:
sa.Column("speaker", sa.String(length=100), nullable=True),
sa.Column(
"created_at",
sa.DateTime(),
sa.DateTime(timezone=True),
server_default=sa.text("(CURRENT_TIMESTAMP)"),
nullable=False,
),
sa.Column(
"updated_at",
sa.DateTime(),
sa.DateTime(timezone=True),
server_default=sa.text("(CURRENT_TIMESTAMP)"),
nullable=False,
),
Expand All @@ -115,15 +115,17 @@ def upgrade() -> None:
sa.Column("analysis_id", sa.String(length=26), nullable=False),
sa.Column("project_id", sa.String(length=26), nullable=False),
sa.Column("text", sa.Text(), nullable=False),
sa.Column("category_code", sa.String(length=1), nullable=False),
sa.Column("span", sa.Text(), nullable=True),
sa.Column("transcript_span_id", sa.String(length=26), nullable=True),
sa.Column("transcript_context_start", sa.String(length=26), nullable=False),
sa.Column("transcript_context_end", sa.String(length=26), nullable=False),
sa.Column("summary", sa.Text(), nullable=False),
sa.Column("tag", sa.String(length=50), nullable=True),
sa.Column("time_tag_changed", sa.DateTime(), nullable=True),
sa.Column("time_tag_changed", sa.DateTime(timezone=True), nullable=True),
sa.Column("was_asked", sa.Boolean(), nullable=True),
sa.Column("asked_at_transcript_id", sa.String(length=26), nullable=True),
sa.Column("asked_at", sa.DateTime(timezone=True), nullable=True),
sa.ForeignKeyConstraint(
["project_id"],
["project.project_id"],
Expand Down
16 changes: 15 additions & 1 deletion backend/src/interview_helper/ai_analysis/ai_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
ProjectId,
TranscriptId,
)
from interview_helper.context_manager.question_categories import (
QUESTION_CATEGORIES,
normalize_question_category_code,
)
from langchain_openai import AzureChatOpenAI
from langchain.tools import ToolRuntime, tool # pyright: ignore[reportUnknownVariableType]
from langchain.agents import create_agent # pyright: ignore[reportUnknownVariableType]
Expand All @@ -40,6 +44,7 @@
class Question(BaseModel):
question: str
grounding_span: str
category_code: str


class Analysis(BaseModel):
Expand All @@ -55,7 +60,11 @@ class ProjectContext:
class SimpleAnalyzer:
"""Simple LLM-based interview analyzer."""

SYSTEM_PROMPT: str = dedent("""\
CATEGORY_PROMPT_BLOCK: str = "\n".join(
[f" - {code}: {label}" for code, label in QUESTION_CATEGORIES]
)

SYSTEM_PROMPT: str = dedent(f"""\
ROLE: Interview Follow-Up Generator for SAR Profiles

You will receive a chunk of transcript from an in-depth profile interview for a Search and Rescue operation.
Expand All @@ -79,8 +88,12 @@ class SimpleAnalyzer:
7) Output: ONE to THREE questions, each with:
- question (string)
- grounding_span (short verbatim quote from the transcript)
- category_code (single letter in B-W from list below)
As well as a brief summary of the entire situation so far, based on your knowledge.

Category codes:
{CATEGORY_PROMPT_BLOCK}

ALWAYS use the provided TOOLS to check for duplicates or
gather more context from the transcript history before finalizing your questions.

Expand Down Expand Up @@ -246,6 +259,7 @@ def clean_grounding_span(span: str) -> str:
AIQuestion(
question=q.question,
grounding_span=clean_grounding_span(q.grounding_span),
category_code=normalize_question_category_code(q.category_code),
)
for q in analysis.questions
]
Expand Down
2 changes: 1 addition & 1 deletion backend/src/interview_helper/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def model_post_init(self, __context):
bytes_per_sample: int = 2

# AI Processing
process_transcript_every_secs: float = 60.0 * 2 # 2 minutes
process_transcript_every_secs: float = 60.0 * 4 # 4 minutes
process_transcript_every_word_count: int = 100

azure_api_endpoint: str = Field(alias="OPENAI_API_ENDPOINT")
Expand Down
Loading
Loading