Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions hospexplorer/ask/kb_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,47 @@ def add_pdf_to_kb(file_bytes, filename, title, url=None):
raise last_exc


def download_kb_pdf(doc_id):
"""Download the original PDF bytes for a KB document.

Calls GET /docs/{doc_id}/file on the MCP KB server. Returns
(filename, bytes) on 200, or (None, None) on 404 — the KB has no
local file for that document and the caller should fall back to a
tracking-only record. Other HTTP error statuses raise via
response.raise_for_status(); transport errors raise via httpx.
"""
headers = {
"Authorization": f"Bearer {settings.KB_MCP_JWT_TOKEN}",
}
endpoint = f"{settings.KB_MCP_HOST}/docs/{doc_id}/file"

with httpx.Client() as client:
response = client.get(
endpoint,
headers=headers,
timeout=settings.KB_MCP_PDF_TIMEOUT,
)

if response.status_code == 404:
return None, None
response.raise_for_status()

filename = f"kb_doc_{doc_id}.pdf"
cd = response.headers.get("content-disposition", "")
# prefer KB filename
if "filename=" in cd:
try:
raw = cd.split("filename=", 1)[1].split(";", 1)[0].strip()
if raw.startswith('"') and raw.endswith('"'):
raw = raw[1:-1]
if raw:
filename = raw
except Exception:
pass

return filename, response.content


def delete_kb_document(doc_id):
"""Delete a document from the MCP KB server by its ID.

Expand Down
18 changes: 18 additions & 0 deletions hospexplorer/ask/migrations/0014_alter_pdfresource_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 6.0.2 on 2026-05-27 22:29

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('ask', '0013_pdfresource_original_filename'),
]

operations = [
migrations.AlterField(
model_name='pdfresource',
name='file',
field=models.FileField(blank=True, null=True, upload_to='kb_pdfs/'),
),
]
2 changes: 1 addition & 1 deletion hospexplorer/ask/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class Meta:


class PDFResource(Resource):
file = models.FileField(upload_to="kb_pdfs/")
file = models.FileField(upload_to="kb_pdfs/", null=True, blank=True)
# original upload name, kept so re-uploads can be skipped — Django renames file.name on collision
original_filename = models.CharField(max_length=255, blank=True, default="")
mcp_kb_document_id = models.IntegerField(null=True, blank=True, help_text="Document ID returned by the MCP Knowledge Base.")
Expand Down
60 changes: 58 additions & 2 deletions hospexplorer/ask/templates/kb/resources.html
Original file line number Diff line number Diff line change
Expand Up @@ -263,15 +263,27 @@ <h5 class="kb-section-heading">PDFs in KB but not tracked internally</h5>
<thead>
<tr>
<th>Title</th>
{% if can_delete_pdf %}<th>Actions</th>{% endif %}
{% if can_add_pdf or can_delete_pdf %}<th>Actions</th>{% endif %}
</tr>
</thead>
<tbody>
<template x-for="doc in untrackedPdfs" :key="doc.doc_id">
<tr class="kb-row-untracked">
<td x-text="doc.title"></td>
{% if can_delete_pdf %}
{% if can_add_pdf or can_delete_pdf %}
<td class="text-nowrap align-middle">
{% if can_add_pdf %}
<button
@click="trackPdfInHopper(doc)"
:disabled="loadingKey === 'track-pdf-' + doc.doc_id"
class="btn btn-sm btn-primary-maroon kb-action-btn me-1">
<span x-show="loadingKey !== 'track-pdf-' + doc.doc_id">Track in Hopper</span>
<span x-show="loadingKey === 'track-pdf-' + doc.doc_id">
<span class="spinner-border spinner-border-sm me-1"></span> Tracking...
</span>
</button>
{% endif %}
{% if can_delete_pdf %}
<button
@click="removeFromKb(doc)"
:disabled="loadingKey === 'remove-' + doc.doc_id"
Expand All @@ -281,6 +293,7 @@ <h5 class="kb-section-heading">PDFs in KB but not tracked internally</h5>
<span class="spinner-border spinner-border-sm me-1"></span> Removing...
</span>
</button>
{% endif %}
</td>
{% endif %}
</tr>
Expand Down Expand Up @@ -541,6 +554,49 @@ <h5 class="kb-section-heading">PDFs in KB but not tracked internally</h5>
this.loadingKey = null;
},

async trackPdfInHopper(doc) {
this.loadingKey = `track-pdf-${doc.doc_id}`;
try {
const response = await fetch('{% url "ask:kb-add-pdf-resource" %}', {
method: 'POST',
headers: {
'X-CSRFToken': '{{ csrf_token }}',
'Content-Type': 'application/json',
},
body: JSON.stringify({ doc_id: doc.doc_id, title: doc.title }),
});
const data = await response.json();
if (data.success) {
this.untrackedPdfs = this.untrackedPdfs.filter(d => d.doc_id !== doc.doc_id);
this.pdfStatusMap[data.id] = 'in_kb';

const tbody = document.querySelector('#pdfs-pane .kb-table tbody');
if (tbody) {
const tr = document.createElement('tr');
tr.className = 'kb-row-maroon';
const filename = data.filename || '';
const truncFilename = filename.length > 40 ? filename.slice(0, 37) + '...' : filename;
const now = new Date().toLocaleString('en-US', { month: 'short', day: 'numeric', year: 'numeric', hour: 'numeric', minute: '2-digit' });
tr.innerHTML = `
<td>${data.title}</td>
<td class="text-muted">${truncFilename}</td>
<td class="text-muted">${now}</td>
<td>In Sync</td>
{% if can_change_pdf %}<td></td>{% endif %}
`;
tbody.prepend(tr);
}

this.showToast(`Now tracking "${doc.title}" in Hopper.`);
} else {
this.showToast(data.error || 'Failed to track PDF.', 'error');
}
} catch (e) {
this.showToast('Failed to connect to server.', 'error');
}
this.loadingKey = null;
},

async removeFromKb(doc) {
this.loadingKey = `remove-${doc.doc_id}`;
try {
Expand Down
155 changes: 153 additions & 2 deletions hospexplorer/ask/tests.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import json
import shutil
import tempfile
from unittest.mock import patch

from django.contrib.auth.models import User
from django.conf import settings
from django.contrib.auth.models import Permission, User
from django.core.files.base import ContentFile
from django.test import TestCase, override_settings

from ask.models import PDFResource
from ask.models import PDFResource, TermsAcceptance


class PDFResourceDeletionTests(TestCase):
Expand Down Expand Up @@ -48,3 +50,152 @@ def test_successful_file_removal_is_not_flagged(self):
pdf.file.save("report.pdf", ContentFile(b"%PDF-1.4 test"), save=True)
pdf.delete()
self.assertFalse(pdf.file_deletion_failed)


class KBAddPdfResourceViewTests(TestCase):
"""The Track-in-Hopper endpoint for untracked KB PDFs

Verifies both branches: (a) KB serves the file back, in which case the
new PDFResource has the bytes attached; (b) KB returns 404, in which
case the row is created as tracking-only (file=None) — legacy KB docs
ingested before local_path was recorded land in this branch.
"""

URL = "/hopper/ask/kb/add-pdf-resource/"

def setUp(self):
media_root = tempfile.mkdtemp()
self.addCleanup(shutil.rmtree, media_root, ignore_errors=True)
override = override_settings(MEDIA_ROOT=media_root)
override.enable()
self.addCleanup(override.disable)

self.user = User.objects.create_user("curator", password="pw")
self.user.user_permissions.add(
Permission.objects.get(codename="add_pdfresource")
)
TermsAcceptance.objects.create(
user=self.user, terms_version=settings.TERMS_VERSION
)
self.client.force_login(self.user)

def _post(self, body=None, raw=None):
payload = raw if raw is not None else json.dumps(body or {})
return self.client.post(self.URL, data=payload, content_type="application/json")

@patch("ask.views.download_kb_pdf")
def test_attaches_file_when_kb_returns_bytes(self, mock_download):
mock_download.return_value = ("1780-report.pdf", b"%PDF-1.4 fake")
resp = self._post({"doc_id": 99, "title": "Fresh doc"})
self.assertEqual(resp.status_code, 200)
pdf = PDFResource.objects.get(pk=resp.json()["id"])
self.assertEqual(pdf.mcp_kb_document_id, 99)
self.assertTrue(pdf.file)
self.assertEqual(pdf.file.read(), b"%PDF-1.4 fake")
self.assertEqual(pdf.original_filename, "1780-report.pdf")
self.assertEqual(pdf.status_message, "")

@patch("ask.views.download_kb_pdf")
def test_creates_tracking_only_when_kb_has_no_file(self, mock_download):
mock_download.return_value = (None, None)
resp = self._post({"doc_id": 42, "title": "Legacy doc"})
self.assertEqual(resp.status_code, 200)
pdf = PDFResource.objects.get(pk=resp.json()["id"])
self.assertEqual(pdf.mcp_kb_document_id, 42)
self.assertFalse(pdf.file)
self.assertEqual(
pdf.status_message, "Tracked from KB; file not stored locally."
)

@patch("ask.views.download_kb_pdf")
def test_blank_title_falls_back_to_placeholder(self, mock_download):
mock_download.return_value = (None, None)
resp = self._post({"doc_id": 7})
self.assertEqual(resp.status_code, 200)
pdf = PDFResource.objects.get(pk=resp.json()["id"])
self.assertEqual(pdf.title, "Untitled KB doc 7")

@patch("ask.views.download_kb_pdf")
def test_duplicate_doc_id_refused(self, mock_download):
mock_download.return_value = (None, None)
first = self._post({"doc_id": 42, "title": "first"})
self.assertEqual(first.status_code, 200)
second = self._post({"doc_id": 42, "title": "second"})
self.assertEqual(second.status_code, 400)
self.assertIn("Already tracked", second.json()["error"])
self.assertEqual(PDFResource.objects.filter(mcp_kb_document_id=42).count(), 1)

def test_missing_doc_id_rejected(self):
resp = self._post({"title": "no id"})
self.assertEqual(resp.status_code, 400)
self.assertIn("doc_id", resp.json()["error"])

def test_non_integer_doc_id_rejected(self):
resp = self._post({"doc_id": "not-an-int", "title": "x"})
self.assertEqual(resp.status_code, 400)

def test_malformed_json_rejected(self):
resp = self._post(raw="not json")
self.assertEqual(resp.status_code, 400)

def test_permission_required(self):
noperm = User.objects.create_user("viewer", password="pw")
TermsAcceptance.objects.create(
user=noperm, terms_version=settings.TERMS_VERSION
)
self.client.force_login(noperm)
resp = self._post({"doc_id": 42, "title": "x"})
self.assertEqual(resp.status_code, 403)

@patch("ask.views.download_kb_pdf")
def test_response_payload_powers_the_dom_injection(self, mock_download):
# The frontend needs id, title, and filename to render the new row
# without a full page reload
mock_download.return_value = ("1780-foo.pdf", b"bytes")
resp = self._post({"doc_id": 11, "title": "Fresh"})
body = resp.json()
self.assertTrue(body["success"])
self.assertEqual(body["title"], "Fresh")
self.assertTrue(body["filename"])
self.assertIn("id", body)


class DownloadKBPdfHelperTests(TestCase):
"""Unit tests for the new kb_connector.download_kb_pdf helper."""

def _stub_response(self, *, status_code, content=b"", headers=None):
resp = type("Resp", (), {})()
resp.status_code = status_code
resp.content = content
resp.headers = headers or {}
resp.raise_for_status = lambda: None
return resp

@patch("ask.kb_connector.httpx.Client")
def test_returns_filename_and_bytes_on_200(self, mock_client_cls):
mock_client = mock_client_cls.return_value.__enter__.return_value
mock_client.get.return_value = self._stub_response(
status_code=200,
content=b"%PDF fake",
headers={"content-disposition": 'attachment; filename="1780-foo.pdf"'},
)
from ask.kb_connector import download_kb_pdf
self.assertEqual(download_kb_pdf(5), ("1780-foo.pdf", b"%PDF fake"))

@patch("ask.kb_connector.httpx.Client")
def test_returns_none_pair_on_404(self, mock_client_cls):
mock_client = mock_client_cls.return_value.__enter__.return_value
mock_client.get.return_value = self._stub_response(status_code=404)
from ask.kb_connector import download_kb_pdf
self.assertEqual(download_kb_pdf(99), (None, None))

@patch("ask.kb_connector.httpx.Client")
def test_falls_back_to_synthetic_filename_when_header_missing(self, mock_client_cls):
mock_client = mock_client_cls.return_value.__enter__.return_value
mock_client.get.return_value = self._stub_response(
status_code=200, content=b"bytes"
)
from ask.kb_connector import download_kb_pdf
fname, content = download_kb_pdf(7)
self.assertEqual(fname, "kb_doc_7.pdf")
self.assertEqual(content, b"bytes")
1 change: 1 addition & 0 deletions hospexplorer/ask/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
re_path(r"^kb/$", views.kb_resources, name="kb-resources"),
re_path(r"^kb/compare/$", views.kb_compare, name="kb-compare"),
re_path(r"^kb/add-resource/$", views.kb_add_resource, name="kb-add-resource"),
re_path(r"^kb/add-pdf-resource/$", views.kb_add_pdf_resource, name="kb-add-pdf-resource"),
re_path(r"^kb/remove-from-kb/$", views.kb_remove_from_kb, name="kb-remove-from-kb"),
re_path(r"^kb/add-to-kb/$", views.kb_add_website_to_mcp, name="kb-add-to-kb"),
re_path(r"^kb/upload-pdf/$", views.kb_upload_pdf, name="kb-upload-pdf"),
Expand Down
Loading