diff --git a/hospexplorer/ask/kb_connector.py b/hospexplorer/ask/kb_connector.py index 77f87b8..b374a55 100644 --- a/hospexplorer/ask/kb_connector.py +++ b/hospexplorer/ask/kb_connector.py @@ -113,6 +113,47 @@ def add_pdf_to_kb(file_bytes, filename, title, url=None): raise last_exc +def download_kb_pdf(doc_id): + """Download the original PDF bytes for a KB document. + + Calls GET /docs/{doc_id}/file on the MCP KB server. Returns + (filename, bytes) on 200, or (None, None) on 404 — the KB has no + local file for that document and the caller should fall back to a + tracking-only record. Other HTTP error statuses raise via + response.raise_for_status(); transport errors raise via httpx. + """ + headers = { + "Authorization": f"Bearer {settings.KB_MCP_JWT_TOKEN}", + } + endpoint = f"{settings.KB_MCP_HOST}/docs/{doc_id}/file" + + with httpx.Client() as client: + response = client.get( + endpoint, + headers=headers, + timeout=settings.KB_MCP_PDF_TIMEOUT, + ) + + if response.status_code == 404: + return None, None + response.raise_for_status() + + filename = f"kb_doc_{doc_id}.pdf" + cd = response.headers.get("content-disposition", "") + # prefer KB filename + if "filename=" in cd: + try: + raw = cd.split("filename=", 1)[1].split(";", 1)[0].strip() + if raw.startswith('"') and raw.endswith('"'): + raw = raw[1:-1] + if raw: + filename = raw + except Exception: + pass + + return filename, response.content + + def delete_kb_document(doc_id): """Delete a document from the MCP KB server by its ID. diff --git a/hospexplorer/ask/migrations/0014_alter_pdfresource_file.py b/hospexplorer/ask/migrations/0014_alter_pdfresource_file.py new file mode 100644 index 0000000..7597d8b --- /dev/null +++ b/hospexplorer/ask/migrations/0014_alter_pdfresource_file.py @@ -0,0 +1,18 @@ +# Generated by Django 6.0.2 on 2026-05-27 22:29 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('ask', '0013_pdfresource_original_filename'), + ] + + operations = [ + migrations.AlterField( + model_name='pdfresource', + name='file', + field=models.FileField(blank=True, null=True, upload_to='kb_pdfs/'), + ), + ] diff --git a/hospexplorer/ask/models.py b/hospexplorer/ask/models.py index 2552cda..7ed7390 100644 --- a/hospexplorer/ask/models.py +++ b/hospexplorer/ask/models.py @@ -54,7 +54,7 @@ class Meta: class PDFResource(Resource): - file = models.FileField(upload_to="kb_pdfs/") + file = models.FileField(upload_to="kb_pdfs/", null=True, blank=True) # original upload name, kept so re-uploads can be skipped — Django renames file.name on collision original_filename = models.CharField(max_length=255, blank=True, default="") mcp_kb_document_id = models.IntegerField(null=True, blank=True, help_text="Document ID returned by the MCP Knowledge Base.") diff --git a/hospexplorer/ask/templates/kb/resources.html b/hospexplorer/ask/templates/kb/resources.html index c856509..ea9374c 100644 --- a/hospexplorer/ask/templates/kb/resources.html +++ b/hospexplorer/ask/templates/kb/resources.html @@ -263,15 +263,27 @@
PDFs in KB but not tracked internally
Title - {% if can_delete_pdf %}Actions{% endif %} + {% if can_add_pdf or can_delete_pdf %}Actions{% endif %}