vishalp-dev24 · google-labs-jules · Jan 17, 2026 · Jan 17, 2026
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -0,0 +1,32 @@
+name: Publish to PyPI
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  build-and-publish:
+    name: Build and publish to PyPI
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.12'
+
+    - name: Install build tools
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+
+    - name: Build package
+      run: python -m build
+
+    - name: Publish to PyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/src/docblocks/io/pdf_reader.py b/src/docblocks/io/pdf_reader.py
@@ -1,12 +1,26 @@
-import fitz
-import os
-
-def load_pdf(path: str):
-    if not path.lower().endswith(".pdf"):
-        raise ValueError("Input file must be a .pdf file")
-
-    if not os.path.exists(path):
-        raise FileNotFoundError(f"File not found: {path}")
-
-    doc = fitz.open(path)
-    return [page for page in doc]
+import fitz
+import os
+
+def load_pdf(path: str):
+    if not isinstance(path, str):
+        raise TypeError(f"Input path must be a string, got {type(path).__name__}")
+
+    if not path.lower().endswith(".pdf"):
+        raise ValueError("Input file must be a .pdf file")
+
+    if not os.path.exists(path):
+        raise FileNotFoundError(f"File not found: {path}")
+
+    if not os.path.isfile(path):
+        raise ValueError(f"Path is not a file: {path}")
+
+    try:
+        doc = fitz.open(path)
+    except Exception as e:
+        # Wrap fitz errors for corrupt files
+        raise ValueError(f"Failed to open PDF file: {e}") from e
+
+    if doc.is_encrypted:
+        raise ValueError("PDF is encrypted and cannot be processed without a password")
+
+    return [page for page in doc]
diff --git a/tests/test_edge_cases.py b/tests/test_edge_cases.py
@@ -0,0 +1,83 @@
+import pytest
+import os
+import fitz
+import docblocks
+from docblocks.io.pdf_reader import load_pdf
+
+# Helper to create PDF files
+@pytest.fixture
+def pdf_factory(tmp_path):
+    def _create_pdf(name, content="text", encrypted=False, corrupt=False, image_only=False, zero_pages=False):
+        p = tmp_path / name
+        if corrupt:
+            p.write_text("Not a PDF")
+            return str(p)
+
+        doc = fitz.open()
+        if not zero_pages:
+            page = doc.new_page()
+            if image_only:
+                # Create a minimal image
+                img_data = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\x00\x01\x00\x00\x05\x00\x01\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82'
+                page.insert_image(page.rect, stream=img_data)
+            else:
+                page.insert_text((50, 50), content)
+
+        if encrypted:
+            doc.save(str(p), encryption=fitz.PDF_ENCRYPT_AES_256, user_pw="user", owner_pw="owner")
+        else:
+            doc.save(str(p))
+        doc.close()
+        return str(p)
+    return _create_pdf
+
+def test_extract_normal(pdf_factory):
+    path = pdf_factory("normal.pdf", content="Hello World")
+    text = docblocks.extract(path)
+    assert "Hello World" in text
+    assert "PAGE 1" in text
+
+def test_extract_encrypted(pdf_factory):
+    path = pdf_factory("encrypted.pdf", encrypted=True)
+    with pytest.raises(ValueError, match="encrypted"):
+        docblocks.extract(path)
+
+def test_extract_corrupt(pdf_factory):
+    path = pdf_factory("corrupt.pdf", corrupt=True)
+    with pytest.raises(ValueError, match="Failed to open PDF file"):
+        docblocks.extract(path)
+
+def test_extract_wrong_type():
+    with pytest.raises(TypeError, match="must be a string"):
+        docblocks.extract(123)
+    with pytest.raises(TypeError, match="must be a string"):
+        docblocks.extract(None)
+
+def test_extract_directory(tmp_path):
+    d = tmp_path / "folder.pdf"
+    d.mkdir()
+    # Should raise IsADirectoryError or our custom error
+    with pytest.raises((IsADirectoryError, ValueError), match="file"):
+        docblocks.extract(str(d))
+
+def test_extract_image_only(pdf_factory):
+    path = pdf_factory("image.pdf", image_only=True)
+    text = docblocks.extract(path)
+    # Should be empty of content text, but contain page markers
+    assert "PAGE 1" in text
+    # The text content between markers should be empty or just newlines
+    lines = text.splitlines()
+    # PAGE 1 header, empty line, footer, empty line
+    # Depending on formatting.
+    # Just check that no text is found (we didn't add any).
+    assert "Hello" not in text
+
+def test_extract_missing_file():
+    with pytest.raises(FileNotFoundError):
+        docblocks.extract("non_existent.pdf")
+
+def test_extract_wrong_extension(tmp_path):
+    p = tmp_path / "test.txt"
+    p.write_text("content")
+    with pytest.raises(ValueError, match="must be a .pdf file"):
+        docblocks.extract(str(p))
diff --git a/tests/test_run.py b/tests/test_run.py