diff --git a/hospexplorer/ask/kb_connector.py b/hospexplorer/ask/kb_connector.py
index 77f87b8..b374a55 100644
--- a/hospexplorer/ask/kb_connector.py
+++ b/hospexplorer/ask/kb_connector.py
@@ -113,6 +113,47 @@ def add_pdf_to_kb(file_bytes, filename, title, url=None):
raise last_exc
+def download_kb_pdf(doc_id):
+ """Download the original PDF bytes for a KB document.
+
+ Calls GET /docs/{doc_id}/file on the MCP KB server. Returns
+ (filename, bytes) on 200, or (None, None) on 404 — the KB has no
+ local file for that document and the caller should fall back to a
+ tracking-only record. Other HTTP error statuses raise via
+ response.raise_for_status(); transport errors raise via httpx.
+ """
+ headers = {
+ "Authorization": f"Bearer {settings.KB_MCP_JWT_TOKEN}",
+ }
+ endpoint = f"{settings.KB_MCP_HOST}/docs/{doc_id}/file"
+
+ with httpx.Client() as client:
+ response = client.get(
+ endpoint,
+ headers=headers,
+ timeout=settings.KB_MCP_PDF_TIMEOUT,
+ )
+
+ if response.status_code == 404:
+ return None, None
+ response.raise_for_status()
+
+ filename = f"kb_doc_{doc_id}.pdf"
+ cd = response.headers.get("content-disposition", "")
+ # prefer KB filename
+ if "filename=" in cd:
+ try:
+ raw = cd.split("filename=", 1)[1].split(";", 1)[0].strip()
+ if raw.startswith('"') and raw.endswith('"'):
+ raw = raw[1:-1]
+ if raw:
+ filename = raw
+ except Exception:
+ pass
+
+ return filename, response.content
+
+
def delete_kb_document(doc_id):
"""Delete a document from the MCP KB server by its ID.
diff --git a/hospexplorer/ask/migrations/0014_alter_pdfresource_file.py b/hospexplorer/ask/migrations/0014_alter_pdfresource_file.py
new file mode 100644
index 0000000..7597d8b
--- /dev/null
+++ b/hospexplorer/ask/migrations/0014_alter_pdfresource_file.py
@@ -0,0 +1,18 @@
+# Generated by Django 6.0.2 on 2026-05-27 22:29
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('ask', '0013_pdfresource_original_filename'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='pdfresource',
+ name='file',
+ field=models.FileField(blank=True, null=True, upload_to='kb_pdfs/'),
+ ),
+ ]
diff --git a/hospexplorer/ask/models.py b/hospexplorer/ask/models.py
index 2552cda..7ed7390 100644
--- a/hospexplorer/ask/models.py
+++ b/hospexplorer/ask/models.py
@@ -54,7 +54,7 @@ class Meta:
class PDFResource(Resource):
- file = models.FileField(upload_to="kb_pdfs/")
+ file = models.FileField(upload_to="kb_pdfs/", null=True, blank=True)
# original upload name, kept so re-uploads can be skipped — Django renames file.name on collision
original_filename = models.CharField(max_length=255, blank=True, default="")
mcp_kb_document_id = models.IntegerField(null=True, blank=True, help_text="Document ID returned by the MCP Knowledge Base.")
diff --git a/hospexplorer/ask/templates/kb/resources.html b/hospexplorer/ask/templates/kb/resources.html
index c856509..ea9374c 100644
--- a/hospexplorer/ask/templates/kb/resources.html
+++ b/hospexplorer/ask/templates/kb/resources.html
@@ -263,15 +263,27 @@
PDFs in KB but not tracked internally
| Title |
- {% if can_delete_pdf %}Actions | {% endif %}
+ {% if can_add_pdf or can_delete_pdf %}Actions | {% endif %}
|
- {% if can_delete_pdf %}
+ {% if can_add_pdf or can_delete_pdf %}
+ {% if can_add_pdf %}
+
+ {% endif %}
+ {% if can_delete_pdf %}
+ {% endif %}
|
{% endif %}
@@ -541,6 +554,49 @@ PDFs in KB but not tracked internally
this.loadingKey = null;
},
+ async trackPdfInHopper(doc) {
+ this.loadingKey = `track-pdf-${doc.doc_id}`;
+ try {
+ const response = await fetch('{% url "ask:kb-add-pdf-resource" %}', {
+ method: 'POST',
+ headers: {
+ 'X-CSRFToken': '{{ csrf_token }}',
+ 'Content-Type': 'application/json',
+ },
+ body: JSON.stringify({ doc_id: doc.doc_id, title: doc.title }),
+ });
+ const data = await response.json();
+ if (data.success) {
+ this.untrackedPdfs = this.untrackedPdfs.filter(d => d.doc_id !== doc.doc_id);
+ this.pdfStatusMap[data.id] = 'in_kb';
+
+ const tbody = document.querySelector('#pdfs-pane .kb-table tbody');
+ if (tbody) {
+ const tr = document.createElement('tr');
+ tr.className = 'kb-row-maroon';
+ const filename = data.filename || '';
+ const truncFilename = filename.length > 40 ? filename.slice(0, 37) + '...' : filename;
+ const now = new Date().toLocaleString('en-US', { month: 'short', day: 'numeric', year: 'numeric', hour: 'numeric', minute: '2-digit' });
+ tr.innerHTML = `
+ ${data.title} |
+ ${truncFilename} |
+ ${now} |
+ In Sync |
+ {% if can_change_pdf %} | {% endif %}
+ `;
+ tbody.prepend(tr);
+ }
+
+ this.showToast(`Now tracking "${doc.title}" in Hopper.`);
+ } else {
+ this.showToast(data.error || 'Failed to track PDF.', 'error');
+ }
+ } catch (e) {
+ this.showToast('Failed to connect to server.', 'error');
+ }
+ this.loadingKey = null;
+ },
+
async removeFromKb(doc) {
this.loadingKey = `remove-${doc.doc_id}`;
try {
diff --git a/hospexplorer/ask/tests.py b/hospexplorer/ask/tests.py
index 749b619..c51517a 100644
--- a/hospexplorer/ask/tests.py
+++ b/hospexplorer/ask/tests.py
@@ -1,12 +1,14 @@
+import json
import shutil
import tempfile
from unittest.mock import patch
-from django.contrib.auth.models import User
+from django.conf import settings
+from django.contrib.auth.models import Permission, User
from django.core.files.base import ContentFile
from django.test import TestCase, override_settings
-from ask.models import PDFResource
+from ask.models import PDFResource, TermsAcceptance
class PDFResourceDeletionTests(TestCase):
@@ -48,3 +50,152 @@ def test_successful_file_removal_is_not_flagged(self):
pdf.file.save("report.pdf", ContentFile(b"%PDF-1.4 test"), save=True)
pdf.delete()
self.assertFalse(pdf.file_deletion_failed)
+
+
+class KBAddPdfResourceViewTests(TestCase):
+ """The Track-in-Hopper endpoint for untracked KB PDFs
+
+ Verifies both branches: (a) KB serves the file back, in which case the
+ new PDFResource has the bytes attached; (b) KB returns 404, in which
+ case the row is created as tracking-only (file=None) — legacy KB docs
+ ingested before local_path was recorded land in this branch.
+ """
+
+ URL = "/hopper/ask/kb/add-pdf-resource/"
+
+ def setUp(self):
+ media_root = tempfile.mkdtemp()
+ self.addCleanup(shutil.rmtree, media_root, ignore_errors=True)
+ override = override_settings(MEDIA_ROOT=media_root)
+ override.enable()
+ self.addCleanup(override.disable)
+
+ self.user = User.objects.create_user("curator", password="pw")
+ self.user.user_permissions.add(
+ Permission.objects.get(codename="add_pdfresource")
+ )
+ TermsAcceptance.objects.create(
+ user=self.user, terms_version=settings.TERMS_VERSION
+ )
+ self.client.force_login(self.user)
+
+ def _post(self, body=None, raw=None):
+ payload = raw if raw is not None else json.dumps(body or {})
+ return self.client.post(self.URL, data=payload, content_type="application/json")
+
+ @patch("ask.views.download_kb_pdf")
+ def test_attaches_file_when_kb_returns_bytes(self, mock_download):
+ mock_download.return_value = ("1780-report.pdf", b"%PDF-1.4 fake")
+ resp = self._post({"doc_id": 99, "title": "Fresh doc"})
+ self.assertEqual(resp.status_code, 200)
+ pdf = PDFResource.objects.get(pk=resp.json()["id"])
+ self.assertEqual(pdf.mcp_kb_document_id, 99)
+ self.assertTrue(pdf.file)
+ self.assertEqual(pdf.file.read(), b"%PDF-1.4 fake")
+ self.assertEqual(pdf.original_filename, "1780-report.pdf")
+ self.assertEqual(pdf.status_message, "")
+
+ @patch("ask.views.download_kb_pdf")
+ def test_creates_tracking_only_when_kb_has_no_file(self, mock_download):
+ mock_download.return_value = (None, None)
+ resp = self._post({"doc_id": 42, "title": "Legacy doc"})
+ self.assertEqual(resp.status_code, 200)
+ pdf = PDFResource.objects.get(pk=resp.json()["id"])
+ self.assertEqual(pdf.mcp_kb_document_id, 42)
+ self.assertFalse(pdf.file)
+ self.assertEqual(
+ pdf.status_message, "Tracked from KB; file not stored locally."
+ )
+
+ @patch("ask.views.download_kb_pdf")
+ def test_blank_title_falls_back_to_placeholder(self, mock_download):
+ mock_download.return_value = (None, None)
+ resp = self._post({"doc_id": 7})
+ self.assertEqual(resp.status_code, 200)
+ pdf = PDFResource.objects.get(pk=resp.json()["id"])
+ self.assertEqual(pdf.title, "Untitled KB doc 7")
+
+ @patch("ask.views.download_kb_pdf")
+ def test_duplicate_doc_id_refused(self, mock_download):
+ mock_download.return_value = (None, None)
+ first = self._post({"doc_id": 42, "title": "first"})
+ self.assertEqual(first.status_code, 200)
+ second = self._post({"doc_id": 42, "title": "second"})
+ self.assertEqual(second.status_code, 400)
+ self.assertIn("Already tracked", second.json()["error"])
+ self.assertEqual(PDFResource.objects.filter(mcp_kb_document_id=42).count(), 1)
+
+ def test_missing_doc_id_rejected(self):
+ resp = self._post({"title": "no id"})
+ self.assertEqual(resp.status_code, 400)
+ self.assertIn("doc_id", resp.json()["error"])
+
+ def test_non_integer_doc_id_rejected(self):
+ resp = self._post({"doc_id": "not-an-int", "title": "x"})
+ self.assertEqual(resp.status_code, 400)
+
+ def test_malformed_json_rejected(self):
+ resp = self._post(raw="not json")
+ self.assertEqual(resp.status_code, 400)
+
+ def test_permission_required(self):
+ noperm = User.objects.create_user("viewer", password="pw")
+ TermsAcceptance.objects.create(
+ user=noperm, terms_version=settings.TERMS_VERSION
+ )
+ self.client.force_login(noperm)
+ resp = self._post({"doc_id": 42, "title": "x"})
+ self.assertEqual(resp.status_code, 403)
+
+ @patch("ask.views.download_kb_pdf")
+ def test_response_payload_powers_the_dom_injection(self, mock_download):
+ # The frontend needs id, title, and filename to render the new row
+ # without a full page reload
+ mock_download.return_value = ("1780-foo.pdf", b"bytes")
+ resp = self._post({"doc_id": 11, "title": "Fresh"})
+ body = resp.json()
+ self.assertTrue(body["success"])
+ self.assertEqual(body["title"], "Fresh")
+ self.assertTrue(body["filename"])
+ self.assertIn("id", body)
+
+
+class DownloadKBPdfHelperTests(TestCase):
+ """Unit tests for the new kb_connector.download_kb_pdf helper."""
+
+ def _stub_response(self, *, status_code, content=b"", headers=None):
+ resp = type("Resp", (), {})()
+ resp.status_code = status_code
+ resp.content = content
+ resp.headers = headers or {}
+ resp.raise_for_status = lambda: None
+ return resp
+
+ @patch("ask.kb_connector.httpx.Client")
+ def test_returns_filename_and_bytes_on_200(self, mock_client_cls):
+ mock_client = mock_client_cls.return_value.__enter__.return_value
+ mock_client.get.return_value = self._stub_response(
+ status_code=200,
+ content=b"%PDF fake",
+ headers={"content-disposition": 'attachment; filename="1780-foo.pdf"'},
+ )
+ from ask.kb_connector import download_kb_pdf
+ self.assertEqual(download_kb_pdf(5), ("1780-foo.pdf", b"%PDF fake"))
+
+ @patch("ask.kb_connector.httpx.Client")
+ def test_returns_none_pair_on_404(self, mock_client_cls):
+ mock_client = mock_client_cls.return_value.__enter__.return_value
+ mock_client.get.return_value = self._stub_response(status_code=404)
+ from ask.kb_connector import download_kb_pdf
+ self.assertEqual(download_kb_pdf(99), (None, None))
+
+ @patch("ask.kb_connector.httpx.Client")
+ def test_falls_back_to_synthetic_filename_when_header_missing(self, mock_client_cls):
+ mock_client = mock_client_cls.return_value.__enter__.return_value
+ mock_client.get.return_value = self._stub_response(
+ status_code=200, content=b"bytes"
+ )
+ from ask.kb_connector import download_kb_pdf
+ fname, content = download_kb_pdf(7)
+ self.assertEqual(fname, "kb_doc_7.pdf")
+ self.assertEqual(content, b"bytes")
diff --git a/hospexplorer/ask/urls.py b/hospexplorer/ask/urls.py
index e098a24..2248954 100644
--- a/hospexplorer/ask/urls.py
+++ b/hospexplorer/ask/urls.py
@@ -16,6 +16,7 @@
re_path(r"^kb/$", views.kb_resources, name="kb-resources"),
re_path(r"^kb/compare/$", views.kb_compare, name="kb-compare"),
re_path(r"^kb/add-resource/$", views.kb_add_resource, name="kb-add-resource"),
+ re_path(r"^kb/add-pdf-resource/$", views.kb_add_pdf_resource, name="kb-add-pdf-resource"),
re_path(r"^kb/remove-from-kb/$", views.kb_remove_from_kb, name="kb-remove-from-kb"),
re_path(r"^kb/add-to-kb/$", views.kb_add_website_to_mcp, name="kb-add-to-kb"),
re_path(r"^kb/upload-pdf/$", views.kb_upload_pdf, name="kb-upload-pdf"),
diff --git a/hospexplorer/ask/views.py b/hospexplorer/ask/views.py
index ecdf0e1..6135717 100644
--- a/hospexplorer/ask/views.py
+++ b/hospexplorer/ask/views.py
@@ -13,7 +13,9 @@
from ask.models import Conversation, QARecord, QueryTask, TermsAcceptance, WebsiteResource, PDFResource
from ask.tasks import run_llm_task
-from ask.kb_connector import list_kb_documents, add_website_to_kb, add_pdf_to_kb, delete_kb_document
+from django.core.files.base import ContentFile
+
+from ask.kb_connector import list_kb_documents, add_website_to_kb, add_pdf_to_kb, delete_kb_document, download_kb_pdf
logger = logging.getLogger(__name__)
@@ -345,6 +347,69 @@ def kb_add_resource(request):
return JsonResponse({"success": True, "id": resource.id})
+@login_required
+@require_POST
+def kb_add_pdf_resource(request):
+ """Create a PDFResource record for an untracked KB PDF document.
+
+ This tracks a KB PDF in Hopper's internal database without re-ingesting
+ or downloading the file — the document already exists in the KB, which
+ remains the source of truth for the bytes. The new PDFResource has no
+ local file attached.
+ """
+
+ if not request.user.has_perm("ask.add_pdfresource"):
+ return JsonResponse({"success": False, "error": "Permission denied."}, status=403)
+
+ try:
+ body = json.loads(request.body)
+ except json.JSONDecodeError:
+ return JsonResponse({"success": False, "error": "Invalid request body."}, status=400)
+
+ doc_id = body.get("doc_id")
+ title = (body.get("title") or "").strip()
+
+ try:
+ doc_id = int(doc_id)
+ except (TypeError, ValueError):
+ return JsonResponse({"success": False, "error": "doc_id is required."}, status=400)
+
+ if PDFResource.objects.filter(mcp_kb_document_id=doc_id).exists():
+ return JsonResponse({"success": False, "error": "Already tracked in Hopper."}, status=400)
+
+ try:
+ filename, content = download_kb_pdf(doc_id)
+ except httpx.ConnectError:
+ return JsonResponse({"success": False, "error": "Could not connect to the Knowledge Base server."}, status=503)
+ except httpx.HTTPStatusError as e:
+ return JsonResponse({"success": False, "error": f"Knowledge Base server returned an error (HTTP {e.response.status_code})."}, status=502)
+
+ file_field = None
+ original_filename = ""
+ status_message = "Tracked from KB; file not stored locally."
+ if content is not None:
+ file_field = ContentFile(content, name=filename)
+ original_filename = filename
+ status_message = ""
+
+ resource = PDFResource.objects.create(
+ title=title or f"Untitled KB doc {doc_id}",
+ file=file_field,
+ original_filename=original_filename,
+ mcp_kb_document_id=doc_id,
+ creator=request.user,
+ modifier=request.user,
+ status=PDFResource.Status.SUCCESS,
+ status_message=status_message,
+ )
+ return JsonResponse({
+ "success": True,
+ "id": resource.id,
+ "title": resource.title,
+ "filename": resource.file.name if resource.file else "",
+ })
+
+
@login_required
@require_POST
def kb_remove_from_kb(request):