Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ dependencies = [
"nltk>=3.8",
"langchain-text-splitters>=0.3.0",
"pathspec>=1.1.1",
"python-docx>=1.1.0",
]

[project.optional-dependencies]
Expand Down
19 changes: 19 additions & 0 deletions src/perspicacite/rag/agentic/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from perspicacite.models.kb import chroma_collection_name_for_kb
from perspicacite.provenance.context import get_collector
from perspicacite.rag.dynamic_kb import DynamicKnowledgeBase
from perspicacite.rag.export.apa_docx_exporter import export_apa_docx
from perspicacite.rag.utils import format_references_academic
from perspicacite.retrieval.hybrid import hybrid_retrieval

Expand Down Expand Up @@ -2427,6 +2428,15 @@ async def _generate_answer(
answer = answer.rstrip() + "\n\n" + references_section
logger.info("agentic_references_section_added", answer_chars=len(answer))

doc_id = session.session_id.split("-")[0] if session else "manuscript"
output_path = f"output/{doc_id}_manuscript.docx"
print("OUTPUT PATH:", output_path)
try:
export_apa_docx(answer, papers, output_path)
logger.info("agentic_manuscript_exported", path=output_path)
except Exception:
logger.warning("agentic_manuscript_export_failed", exc_info=True)

return answer, citation_map

_CITE_RE = re.compile(r"\[(\d+(?:\s*,\s*\d+)*)\]")
Expand Down Expand Up @@ -2626,6 +2636,15 @@ async def _generate_single_paper_answer(
if references_section:
answer = answer.rstrip() + "\n\n" + references_section

doc_id = session.session_id.split("-")[0] if session else "manuscript"
output_path = f"output/{doc_id}_manuscript.docx"
print("OUTPUT PATH:", output_path)
try:
export_apa_docx(answer, papers, output_path)
logger.info("agentic_manuscript_exported", path=output_path)
except Exception:
logger.warning("agentic_manuscript_export_failed", exc_info=True)

return answer, citation_map

def _build_numbered_paper_list(
Expand Down
56 changes: 56 additions & 0 deletions src/perspicacite/rag/export/apa_docx_exporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from docx import Document
from typing import List, Dict


def format_authors(authors):
if not authors:
return ""

if len(authors) == 1:
return authors[0]

if len(authors) == 2:
return f"{authors[0]} & {authors[1]}"

return ", ".join(authors[:-1]) + f", & {authors[-1]}"


def to_apa(paper: Dict) -> str:
authors = format_authors(paper.get("authors", []))
year = paper.get("year", "n.d.")
title = paper.get("title", "")
journal = paper.get("journal", "")
doi = paper.get("doi", "")

text = f"{authors} ({year}). {title}. {journal}."

if doi:
doi_clean = doi.replace("https://doi.org/", "")
text += f" https://doi.org/{doi_clean}"

return text


def export_apa_docx(manuscript_text: str, papers: List[Dict], output_path: str):
doc = Document()

doc.add_heading("Manuscript", level=1)
doc.add_paragraph(manuscript_text)

doc.add_heading("References (APA Style)", level=1)

seen = set()
merged = []

for p in papers:
key = p.get("doi") or p.get("title")
if key in seen:
continue
seen.add(key)
merged.append(p)

for i, paper in enumerate(merged, 1):
doc.add_paragraph(f"{i}. {to_apa(paper)}")

doc.save(output_path)
return output_path