From f59622f9e2133ed050009b055a86c584fa870c75 Mon Sep 17 00:00:00 2001
From: Andrew <3300522+dpieski@users.noreply.github.com>
Date: Tue, 3 Mar 2026 10:00:23 -0600
Subject: [PATCH 1/9] fix: auto-quote classification_q values containing spaces
 or slashes; closes #101

---
 src/pyUSPTO/clients/patent_data.py        | 14 ++++++++++----
 tests/clients/test_patent_data_clients.py |  8 ++++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/pyUSPTO/clients/patent_data.py b/src/pyUSPTO/clients/patent_data.py
index ab2ce96..3ca9efd 100644
--- a/src/pyUSPTO/clients/patent_data.py
+++ b/src/pyUSPTO/clients/patent_data.py
@@ -298,9 +298,12 @@ def search_applications(
                     )
                     q_parts.append(f"assignmentBag.assigneeBag.assigneeNameText:{v}")
                 if classification_q:
-                    q_parts.append(
-                        f"applicationMetaData.cpcClassificationBag:{classification_q}"
+                    v = (
+                        f'"{classification_q}"'
+                        if any(c in classification_q for c in [" ", "/"])
+                        else classification_q
                     )
+                    q_parts.append(f"applicationMetaData.cpcClassificationBag:{v}")
                 if earliestPublicationNumber_q:
                     q_parts.append(
                         f"applicationMetaData.earliestPublicationNumber:{earliestPublicationNumber_q}"
@@ -439,9 +442,12 @@ def get_search_results(
                     )
                     q_parts.append(f"assignmentBag.assigneeBag.assigneeNameText:{v}")
                 if classification_q:
-                    q_parts.append(
-                        f"applicationMetaData.cpcClassificationBag:{classification_q}"
+                    v = (
+                        f'"{classification_q}"'
+                        if any(c in classification_q for c in [" ", "/"])
+                        else classification_q
                     )
+                    q_parts.append(f"applicationMetaData.cpcClassificationBag:{v}")
 
                 if filing_date_from_q and filing_date_to_q:
                     q_parts.append(
diff --git a/tests/clients/test_patent_data_clients.py b/tests/clients/test_patent_data_clients.py
index a14a5a4..9cd378e 100644
--- a/tests/clients/test_patent_data_clients.py
+++ b/tests/clients/test_patent_data_clients.py
@@ -384,6 +384,10 @@ def test_search_applications_post(
                 {"classification_q": "H04L"},
                 "applicationMetaData.cpcClassificationBag:H04L",
             ),
+            (
+                {"classification_q": "H10D  64/667"},
+                'applicationMetaData.cpcClassificationBag:"H10D  64/667"',
+            ),
             (
                 {"earliestPublicationNumber_q": "*12345678*"},
                 "applicationMetaData.earliestPublicationNumber:*12345678*",
@@ -1971,6 +1975,10 @@ def test_get_search_results_get_with_combined_q_convenience_params(
                 {"classification_q": "H04L"},
                 "applicationMetaData.cpcClassificationBag:H04L",
             ),
+            (
+                {"classification_q": "H10D  64/667"},
+                'applicationMetaData.cpcClassificationBag:"H10D  64/667"',
+            ),
             (
                 {"filing_date_from_q": "2021-01-01"},
                 "applicationMetaData.filingDate:>=2021-01-01",

From a0d1a0cf1d18e757f900961979157ccf62cc0dfe Mon Sep 17 00:00:00 2001
From: Andrew <3300522+dpieski@users.noreply.github.com>
Date: Tue, 3 Mar 2026 10:31:17 -0600
Subject: [PATCH 2/9] feat: populate document_bag in get_IFW_metadata (closes
 #99)

---
 examples/patent_data_example.py           |  17 ++++
 src/pyUSPTO/clients/patent_data.py        |  30 ++++---
 src/pyUSPTO/models/patent_data.py         |   1 +
 tests/clients/test_patent_data_clients.py | 103 +++++++++++++---------
 4 files changed, 98 insertions(+), 53 deletions(-)

diff --git a/examples/patent_data_example.py b/examples/patent_data_example.py
index 48e0998..43ffa5f 100644
--- a/examples/patent_data_example.py
+++ b/examples/patent_data_example.py
@@ -302,6 +302,23 @@
     print(f"Error with POST search: {e}")
 
 
+# Search by CPC classification code
+# CPC codes containing spaces or slashes are automatically quoted for the Lucene query.
+try:
+    print("\nSearching by CPC classification code 'H10D  64/667'...")
+    cpc_response = client.search_applications(
+        classification_q="H10D  64/667", limit=3
+    )
+    print(f"Found {cpc_response.count} applications with CPC code H10D 64/667.")
+    for patent_wrapper in cpc_response.patent_file_wrapper_data_bag:
+        if patent_wrapper.application_meta_data:
+            print(
+                f"  - App No: {patent_wrapper.application_number_text}, Title: {patent_wrapper.application_meta_data.invention_title}"
+            )
+except Exception as e:
+    print(f"Error searching by CPC classification: {e}")
+
+
 # Example of getting status codes
 try:
     print("\nGetting first 5 status codes...")
diff --git a/src/pyUSPTO/clients/patent_data.py b/src/pyUSPTO/clients/patent_data.py
index 3ca9efd..7cc27ae 100644
--- a/src/pyUSPTO/clients/patent_data.py
+++ b/src/pyUSPTO/clients/patent_data.py
@@ -4,6 +4,7 @@
 It allows you to search for and retrieve patent application data.
 """
 
+import dataclasses
 import warnings
 from collections.abc import Iterator
 from typing import Any
@@ -121,6 +122,9 @@ def sanitize_application_number(self, input_number: str) -> str:
         # Example: "PCT/US2024/012345" -> "PCTUS2412345"
         if raw.startswith("PCT"):
             parts = raw.split("/")
+            if len(parts) == 1:
+                # Already sanitized (e.g. "PCTUS0812705"), return as-is
+                return raw
             if len(parts) != 3:
                 raise ValueError(
                     f"Invalid PCT application format: {input_number}. "
@@ -1064,27 +1068,33 @@ def get_IFW_metadata(
                 comprehensive data if found using one of the identifiers,
                 otherwise None.
         """
+        wrapper = None
         if application_number:
-            return self.get_application_by_number(application_number=application_number)
-        if patent_number:
+            wrapper = self.get_application_by_number(
+                application_number=application_number
+            )
+        elif patent_number:
             pdr = self.search_applications(patent_number_q=patent_number, limit=1)
             if pdr.patent_file_wrapper_data_bag:
-                return pdr.patent_file_wrapper_data_bag[0]
-        if publication_number:
+                wrapper = pdr.patent_file_wrapper_data_bag[0]
+        elif publication_number:
             pdr = self.search_applications(
                 earliestPublicationNumber_q=publication_number, limit=1
             )
             if pdr.patent_file_wrapper_data_bag:
-                return pdr.patent_file_wrapper_data_bag[0]
-        if PCT_app_number:
-            return self.get_application_by_number(application_number=PCT_app_number)
-        if PCT_pub_number:
+                wrapper = pdr.patent_file_wrapper_data_bag[0]
+        elif PCT_app_number:
+            wrapper = self.get_application_by_number(application_number=PCT_app_number)
+        elif PCT_pub_number:
             pdr = self.search_applications(
                 pctPublicationNumber_q=PCT_pub_number, limit=1
             )
             if pdr.patent_file_wrapper_data_bag:
-                return pdr.patent_file_wrapper_data_bag[0]
-        return None
+                wrapper = pdr.patent_file_wrapper_data_bag[0]
+        if wrapper is None:
+            return None
+        doc_bag = self.get_application_documents(wrapper.application_number_text)
+        return dataclasses.replace(wrapper, document_bag=doc_bag)
 
     def download_archive(
         self,
diff --git a/src/pyUSPTO/models/patent_data.py b/src/pyUSPTO/models/patent_data.py
index 800ecdd..7f174ce 100644
--- a/src/pyUSPTO/models/patent_data.py
+++ b/src/pyUSPTO/models/patent_data.py
@@ -2063,6 +2063,7 @@ class PatentFileWrapper:
     pgpub_document_meta_data: PrintedMetaData | None = None
     grant_document_meta_data: PrintedMetaData | None = None
     last_ingestion_date_time: datetime | None = None
+    document_bag: DocumentBag | None = None
 
     @classmethod
     def from_dict(
diff --git a/tests/clients/test_patent_data_clients.py b/tests/clients/test_patent_data_clients.py
index 9cd378e..4424be1 100644
--- a/tests/clients/test_patent_data_clients.py
+++ b/tests/clients/test_patent_data_clients.py
@@ -11,7 +11,7 @@
 from datetime import date, datetime, timezone
 from typing import Any
 from unittest import mock
-from unittest.mock import MagicMock, mock_open, patch
+from unittest.mock import MagicMock, call, mock_open, patch
 
 import pytest
 import requests
@@ -1231,20 +1231,22 @@ def test_get_ifw_by_application_number(
     ) -> None:
         """Test get_IFW with application_number calls get_application_by_number."""
         client, mock_make_request = client_with_mocked_request
-        mock_make_request.return_value = PatentDataResponse(
-            count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]
-        )
+        mock_make_request.side_effect = [
+            PatentDataResponse(count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]),
+            {"documentBag": []},
+        ]
 
         app_num = "12345678"
         result = client.get_IFW_metadata(application_number=app_num)
 
-        # Should call get_application_by_number
-        mock_make_request.assert_called_once_with(
+        # Should call get_application_by_number first
+        assert mock_make_request.call_args_list[0] == call(
             method="GET",
             endpoint=f"api/v1/patent/applications/{app_num}",
             response_class=PatentDataResponse,
         )
-        assert result is mock_patent_file_wrapper
+        assert result.application_number_text == mock_patent_file_wrapper.application_number_text
+        assert isinstance(result.document_bag, DocumentBag)
 
     def test_get_ifw_by_patent_number(
         self,
@@ -1253,15 +1255,16 @@ def test_get_ifw_by_patent_number(
     ) -> None:
         """Test get_IFW with patent_number calls search_applications."""
         client, mock_make_request = client_with_mocked_request
-        mock_make_request.return_value = PatentDataResponse(
-            count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]
-        )
+        mock_make_request.side_effect = [
+            PatentDataResponse(count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]),
+            {"documentBag": []},
+        ]
 
         patent_num = "10000000"
         result = client.get_IFW_metadata(patent_number=patent_num)
 
-        # Should call search_applications with patent_number_q
-        mock_make_request.assert_called_once_with(
+        # Should call search_applications with patent_number_q first
+        assert mock_make_request.call_args_list[0] == call(
             method="GET",
             endpoint="api/v1/patent/applications/search",
             params={
@@ -1271,7 +1274,8 @@ def test_get_ifw_by_patent_number(
             },
             response_class=PatentDataResponse,
         )
-        assert result is mock_patent_file_wrapper
+        assert result.application_number_text == mock_patent_file_wrapper.application_number_text
+        assert isinstance(result.document_bag, DocumentBag)
 
     def test_get_ifw_by_publication_number(
         self,
@@ -1280,15 +1284,16 @@ def test_get_ifw_by_publication_number(
     ) -> None:
         """Test get_IFW with publication_number calls search_applications."""
         client, mock_make_request = client_with_mocked_request
-        mock_make_request.return_value = PatentDataResponse(
-            count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]
-        )
+        mock_make_request.side_effect = [
+            PatentDataResponse(count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]),
+            {"documentBag": []},
+        ]
 
         pub_num = "US20240123456A1"
         result = client.get_IFW_metadata(publication_number=pub_num)
 
-        # Should call search_applications with earliestPublicationNumber_q
-        mock_make_request.assert_called_once_with(
+        # Should call search_applications with earliestPublicationNumber_q first
+        assert mock_make_request.call_args_list[0] == call(
             method="GET",
             endpoint="api/v1/patent/applications/search",
             params={
@@ -1298,7 +1303,8 @@ def test_get_ifw_by_publication_number(
             },
             response_class=PatentDataResponse,
         )
-        assert result is mock_patent_file_wrapper
+        assert result.application_number_text == mock_patent_file_wrapper.application_number_text
+        assert isinstance(result.document_bag, DocumentBag)
 
     def test_get_ifw_by_pct_app_number(
         self,
@@ -1312,9 +1318,10 @@ def test_get_ifw_by_pct_app_number(
         This is expected test behavior for validating the warning system.
         """
         client, mock_make_request = client_with_mocked_request
-        mock_make_request.return_value = PatentDataResponse(
-            count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]
-        )
+        mock_make_request.side_effect = [
+            PatentDataResponse(count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]),
+            {"documentBag": []},
+        ]
 
         pct_app = "PCT/US2024/012345"
 
@@ -1322,13 +1329,14 @@ def test_get_ifw_by_pct_app_number(
         with pytest.warns(USPTODataMismatchWarning):
             result = client.get_IFW_metadata(PCT_app_number=pct_app)
 
-        # Should call get_application_by_number
-        mock_make_request.assert_called_once_with(
+        # Should call get_application_by_number first
+        assert mock_make_request.call_args_list[0] == call(
             method="GET",
             endpoint="api/v1/patent/applications/PCTUS24012345",
             response_class=PatentDataResponse,
         )
-        assert result is mock_patent_file_wrapper
+        assert result.application_number_text == mock_patent_file_wrapper.application_number_text
+        assert isinstance(result.document_bag, DocumentBag)
 
     def test_get_ifw_by_short_pct_app_number(
         self,
@@ -1345,9 +1353,10 @@ def test_get_ifw_by_short_pct_app_number(
         This is expected test behavior for validating the warning system.
         """
         client, mock_make_request = client_with_mocked_request
-        mock_make_request.return_value = PatentDataResponse(
-            count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]
-        )
+        mock_make_request.side_effect = [
+            PatentDataResponse(count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]),
+            {"documentBag": []},
+        ]
 
         pct_app = "PCT/US24/012345"
 
@@ -1355,13 +1364,14 @@ def test_get_ifw_by_short_pct_app_number(
         with pytest.warns(USPTODataMismatchWarning):
             result = client.get_IFW_metadata(PCT_app_number=pct_app)
 
-        # Should call get_application_by_number
-        mock_make_request.assert_called_once_with(
+        # Should call get_application_by_number first
+        assert mock_make_request.call_args_list[0] == call(
             method="GET",
             endpoint="api/v1/patent/applications/PCTUS24012345",
             response_class=PatentDataResponse,
         )
-        assert result is mock_patent_file_wrapper
+        assert result.application_number_text == mock_patent_file_wrapper.application_number_text
+        assert isinstance(result.document_bag, DocumentBag)
 
     def test_get_ifw_by_pct_app_number_malformed(
         self,
@@ -1467,15 +1477,16 @@ def test_get_ifw_by_pct_pub_number(
     ) -> None:
         """Test get_IFW with PCT_pub_number calls search_applications."""
         client, mock_make_request = client_with_mocked_request
-        mock_make_request.return_value = PatentDataResponse(
-            count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]
-        )
+        mock_make_request.side_effect = [
+            PatentDataResponse(count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]),
+            {"documentBag": []},
+        ]
 
         pct_pub = "WO2024012345A1"
         result = client.get_IFW_metadata(PCT_pub_number=pct_pub)
 
-        # Should call search_applications with pctPublicationNumber_q
-        mock_make_request.assert_called_once_with(
+        # Should call search_applications with pctPublicationNumber_q first
+        assert mock_make_request.call_args_list[0] == call(
             method="GET",
             endpoint="api/v1/patent/applications/search",
             params={
@@ -1485,7 +1496,8 @@ def test_get_ifw_by_pct_pub_number(
             },
             response_class=PatentDataResponse,
         )
-        assert result is mock_patent_file_wrapper
+        assert result.application_number_text == mock_patent_file_wrapper.application_number_text
+        assert isinstance(result.document_bag, DocumentBag)
 
     def test_get_ifw_no_parameters_returns_none(
         self, patent_data_client: PatentDataClient
@@ -1513,9 +1525,10 @@ def test_get_ifw_prioritizes_first_parameter(
     ) -> None:
         """Test get_IFW uses application_number when multiple parameters provided."""
         client, mock_make_request = client_with_mocked_request
-        mock_make_request.return_value = PatentDataResponse(
-            count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]
-        )
+        mock_make_request.side_effect = [
+            PatentDataResponse(count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]),
+            {"documentBag": []},
+        ]
 
         app_num = "12345678"
         # Provide multiple parameters - should use application_number
@@ -1525,13 +1538,14 @@ def test_get_ifw_prioritizes_first_parameter(
             publication_number="US20240123456A1",
         )
 
-        # Should only call get_application_by_number, not search
-        mock_make_request.assert_called_once_with(
+        # Should call get_application_by_number first, not search
+        assert mock_make_request.call_args_list[0] == call(
             method="GET",
             endpoint=f"api/v1/patent/applications/{app_num}",
             response_class=PatentDataResponse,
         )
-        assert result is mock_patent_file_wrapper
+        assert result.application_number_text == mock_patent_file_wrapper.application_number_text
+        assert isinstance(result.document_bag, DocumentBag)
 
 
 class TestDownloadArchive:
@@ -2536,6 +2550,9 @@ def test_sanitize_invalid_series_code_format_raises(
         with pytest.raises(ValueError, match="Expected format: NNNNNNNN or NN/NNNNNN"):
             patent_data_client.sanitize_application_number("08/123/456")
 
+        # Already-sanitized PCT number passes through unchanged
+        assert patent_data_client.sanitize_application_number("PCTUS0812705") == "PCTUS0812705"
+
 
 class TestRawDataFeature:
     """Tests for the include_raw_data feature."""

From 3c2e002c07dd1956ab39aafaffaf8e5ea6dcd8f5 Mon Sep 17 00:00:00 2001
From: Andrew <3300522+dpieski@users.noreply.github.com>
Date: Tue, 3 Mar 2026 10:43:02 -0600
Subject: [PATCH 3/9] chore(deps): update dependencies

---
 requirements-dev.txt | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index b36ed0d..6bb3543 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -22,14 +22,14 @@ colorama==0.4.6
     #   sphinx
 coverage==7.13.4
     # via pytest-cov
-docutils==0.21.2
+docutils==0.22.4
     # via
     #   myst-parser
     #   sphinx
     #   sphinx-rtd-theme
 idna==3.11
     # via requests
-imagesize==1.4.1
+imagesize==2.0.0
     # via sphinx
 iniconfig==2.3.0
     # via pytest
@@ -39,7 +39,7 @@ jinja2==3.1.6
     #   sphinx
 librt==0.8.1
     # via mypy
-markdown-it-py==3.0.0
+markdown-it-py==4.0.0
     # via
     #   mdit-py-plugins
     #   myst-parser
@@ -55,7 +55,7 @@ mypy==1.19.1
     # via pyUSPTO (pyproject.toml)
 mypy-extensions==1.1.0
     # via mypy
-myst-parser==4.0.1
+myst-parser==5.0.0
     # via pyUSPTO (pyproject.toml)
 packaging==26.0
     # via
@@ -71,7 +71,7 @@ pydantic==2.12.5
     # via
     #   pydantic-extra-types
     #   sphinx-immaterial
-pydantic-core==2.41.5
+pydantic-core==2.42.0
     # via pydantic
 pydantic-extra-types==2.11.0
     # via sphinx-immaterial
@@ -99,7 +99,7 @@ ruff==0.15.4
     # via pyUSPTO (pyproject.toml)
 snowballstemmer==3.0.1
     # via sphinx
-sphinx==8.1.3
+sphinx==9.1.0
     # via
     #   myst-parser
     #   pyUSPTO (pyproject.toml)
@@ -108,7 +108,7 @@ sphinx==8.1.3
     #   sphinx-immaterial
     #   sphinx-rtd-theme
     #   sphinxcontrib-jquery
-sphinx-autodoc-typehints==3.0.1
+sphinx-autodoc-typehints==3.9.5
     # via pyUSPTO (pyproject.toml)
 sphinx-copybutton==0.5.2
     # via pyUSPTO (pyproject.toml)

From 22693f167818970c46b24464480880ffda0b6e65 Mon Sep 17 00:00:00 2001
From: Andrew <3300522+dpieski@users.noreply.github.com>
Date: Tue, 3 Mar 2026 11:04:03 -0600
Subject: [PATCH 4/9] [DOCS] Example how to search CPC codes (closes #102)

---
 examples/patent_data_example.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/examples/patent_data_example.py b/examples/patent_data_example.py
index 43ffa5f..5809925 100644
--- a/examples/patent_data_example.py
+++ b/examples/patent_data_example.py
@@ -304,6 +304,8 @@
 
 # Search by CPC classification code
 # CPC codes containing spaces or slashes are automatically quoted for the Lucene query.
+# cpc_classification_bag on ApplicationMetaData is a list[str] of all CPC codes assigned
+# to the application, so each result may have multiple codes.
 try:
     print("\nSearching by CPC classification code 'H10D  64/667'...")
     cpc_response = client.search_applications(
@@ -311,10 +313,13 @@
     )
     print(f"Found {cpc_response.count} applications with CPC code H10D 64/667.")
     for patent_wrapper in cpc_response.patent_file_wrapper_data_bag:
-        if patent_wrapper.application_meta_data:
+        app_meta = patent_wrapper.application_meta_data
+        if app_meta:
             print(
-                f"  - App No: {patent_wrapper.application_number_text}, Title: {patent_wrapper.application_meta_data.invention_title}"
+                f"  - App No: {patent_wrapper.application_number_text}, Title: {app_meta.invention_title}"
             )
+            if app_meta.cpc_classification_bag:
+                print(f"    CPC codes: {', '.join(app_meta.cpc_classification_bag)}")
 except Exception as e:
     print(f"Error searching by CPC classification: {e}")
 

From 2000f48e5801fb144f43dff9a2ba19e7fe64cba1 Mon Sep 17 00:00:00 2001
From: Andrew <3300522+dpieski@users.noreply.github.com>
Date: Wed, 4 Mar 2026 10:07:14 -0600
Subject: [PATCH 5/9] feat: add get_IFW method with IFWResult and bulk download

---
 examples/ifw_example.py                   |   8 +
 src/pyUSPTO/clients/patent_data.py        |  92 +++++++++
 src/pyUSPTO/models/patent_data.py         |  13 ++
 tests/clients/test_patent_data_clients.py | 228 ++++++++++++++++++++++
 4 files changed, 341 insertions(+)

diff --git a/examples/ifw_example.py b/examples/ifw_example.py
index 9e185e8..00ac391 100644
--- a/examples/ifw_example.py
+++ b/examples/ifw_example.py
@@ -61,6 +61,14 @@
     print(f" - IFW Found based on PCT Pub No: {PCT_pub_number}")
 
 
+print("\nGet IFW + download all prosecution docs as a ZIP archive -->")
+ifw_result = client.get_IFW(application_number=application_number, destination="./download-example", overwrite=True)
+if ifw_result:
+    print(f"Title: {ifw_result.wrapper.application_meta_data.invention_title if ifw_result.wrapper.application_meta_data else 'N/A'}")
+    print(f"Archive: {ifw_result.archive_path}")
+    print(f"Documents in bag: {len(ifw_result.wrapper.document_bag)}")
+
+
 print("\nNow let's download the Patent Publication Text -->")
 if app_no_ifw and app_no_ifw.pgpub_document_meta_data:
     pgpub_archive = app_no_ifw.pgpub_document_meta_data
diff --git a/src/pyUSPTO/clients/patent_data.py b/src/pyUSPTO/clients/patent_data.py
index 7cc27ae..7b98dd7 100644
--- a/src/pyUSPTO/clients/patent_data.py
+++ b/src/pyUSPTO/clients/patent_data.py
@@ -5,7 +5,10 @@
 """
 
 import dataclasses
+import os
+import tempfile
 import warnings
+import zipfile
 from collections.abc import Iterator
 from typing import Any
 
@@ -21,6 +24,7 @@
     DocumentMimeType,
     EventData,
     ForeignPriority,
+    IFWResult,
     PatentDataResponse,
     PatentFileWrapper,
     PatentTermAdjustmentData,
@@ -1096,6 +1100,94 @@ def get_IFW_metadata(
         doc_bag = self.get_application_documents(wrapper.application_number_text)
         return dataclasses.replace(wrapper, document_bag=doc_bag)
 
+    def get_IFW(
+        self,
+        *,
+        application_number: str | None = None,
+        publication_number: str | None = None,
+        patent_number: str | None = None,
+        PCT_app_number: str | None = None,
+        PCT_pub_number: str | None = None,
+        destination: str | None = None,
+        overwrite: bool = False,
+    ) -> IFWResult | None:
+        """Retrieve IFW metadata and download all prosecution documents as a ZIP archive.
+
+        Combines `get_IFW_metadata` with a bulk download of all available prosecution
+        history documents (PDF preferred, DOCX fallback). Documents with no downloadable
+        format (e.g., NPL references) are silently skipped. A warning is issued only
+        if a document has a download URL but the download itself fails.
+
+        Args:
+            application_number: USPTO application number (e.g., "16123456").
+            publication_number: USPTO pre-grant publication number.
+            patent_number: USPTO patent number.
+            PCT_app_number: PCT application number.
+            PCT_pub_number: PCT publication number.
+            destination: Directory to save the ZIP archive. Defaults to current directory.
+            overwrite: Whether to overwrite an existing ZIP. Default False.
+
+        Returns:
+            IFWResult with the PatentFileWrapper and the path to the ZIP archive,
+            or None if no application was found.
+
+        Raises:
+            FileExistsError: If the ZIP archive already exists and overwrite=False.
+        """
+        wrapper = self.get_IFW_metadata(
+            application_number=application_number,
+            publication_number=publication_number,
+            patent_number=patent_number,
+            PCT_app_number=PCT_app_number,
+            PCT_pub_number=PCT_pub_number,
+        )
+        if wrapper is None:
+            return None
+
+        dest_dir = destination or "."
+        app_no = wrapper.application_number_text or "unknown"
+        zip_name = f"{app_no}_ifw.zip"
+        zip_path = os.path.join(dest_dir, zip_name)
+
+        if os.path.exists(zip_path) and not overwrite:
+            raise FileExistsError(
+                f"ZIP archive already exists: {zip_path}. Use overwrite=True to replace."
+            )
+
+        os.makedirs(dest_dir, exist_ok=True)
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
+                for doc in wrapper.document_bag:
+                    # Prefer PDF, fall back to MS_WORD; skip XML and formatless docs.
+                    fmt_obj = next(
+                        (
+                            f
+                            for f in doc.document_formats
+                            if f.mime_type_identifier in ("PDF", "MS_WORD")
+                            and f.download_url
+                        ),
+                        None,
+                    )
+                    if fmt_obj is None:
+                        continue
+
+                    try:
+                        downloaded = self._download_and_extract(
+                            url=fmt_obj.download_url,
+                            destination=tmp_dir,
+                            overwrite=True,
+                        )
+                        zf.write(downloaded, arcname=os.path.basename(downloaded))
+                    except Exception as exc:
+                        warnings.warn(
+                            f"Failed to download document {doc.document_identifier} "
+                            f"({doc.document_code}): {exc}",
+                            stacklevel=2,
+                        )
+
+        return IFWResult(wrapper=wrapper, archive_path=os.path.abspath(zip_path))
+
     def download_archive(
         self,
         printed_metadata: PrintedMetaData,
diff --git a/src/pyUSPTO/models/patent_data.py b/src/pyUSPTO/models/patent_data.py
index 7f174ce..5bbad3d 100644
--- a/src/pyUSPTO/models/patent_data.py
+++ b/src/pyUSPTO/models/patent_data.py
@@ -2210,6 +2210,19 @@ def to_dict(self) -> dict[str, Any]:
         }
 
 
+@dataclass(frozen=True)
+class IFWResult:
+    """Result of a get_IFW call: metadata wrapper and path to the downloaded archive.
+
+    Attributes:
+        wrapper: The PatentFileWrapper containing all IFW metadata and document_bag.
+        archive_path: Absolute path to the ZIP archive of downloaded prosecution documents.
+    """
+
+    wrapper: PatentFileWrapper
+    archive_path: str
+
+
 @dataclass(frozen=True)
 class PatentDataResponse:
     """Represents the overall response from a patent data API request.
diff --git a/tests/clients/test_patent_data_clients.py b/tests/clients/test_patent_data_clients.py
index 4424be1..c8f410a 100644
--- a/tests/clients/test_patent_data_clients.py
+++ b/tests/clients/test_patent_data_clients.py
@@ -33,6 +33,7 @@
     DocumentMimeType,
     EventData,
     ForeignPriority,
+    IFWResult,
     Inventor,
     ParentContinuity,
     PatentDataResponse,
@@ -3094,3 +3095,230 @@ def test_to_csv_with_multiple_wrappers(
             assert data_rows[1][1] == "APP002"
             assert data_rows[1][2] == serialize_date(wrapper2_meta.filing_date)
             assert data_rows[1][7] == wrapper2_meta.first_inventor_name
+
+
+class TestGetIFWDownload:
+    """Tests for the get_IFW method (metadata + bulk document download)."""
+
+    @pytest.fixture
+    def pdf_doc(self) -> Document:
+        """A document that has a PDF download URL."""
+        return Document(
+            document_identifier="DOC001",
+            document_code="CTNF",
+            document_formats=[
+                DocumentFormat(
+                    mime_type_identifier="PDF",
+                    download_url="https://example.com/doc001.pdf",
+                ),
+            ],
+        )
+
+    @pytest.fixture
+    def xml_only_doc(self) -> Document:
+        """A document with only XML (no PDF/DOCX) — should be skipped silently."""
+        return Document(
+            document_identifier="DOC002",
+            document_code="SPEC",
+            document_formats=[
+                DocumentFormat(
+                    mime_type_identifier="XML",
+                    download_url="https://example.com/doc002.xml",
+                ),
+            ],
+        )
+
+    @pytest.fixture
+    def no_url_doc(self) -> Document:
+        """A document with a PDF format entry but no download URL (e.g. NPL ref)."""
+        return Document(
+            document_identifier="DOC003",
+            document_code="NPL",
+            document_formats=[
+                DocumentFormat(mime_type_identifier="PDF", download_url=None),
+            ],
+        )
+
+    @pytest.fixture
+    def docx_doc(self) -> Document:
+        """A document that has only a DOCX (MS_WORD) download URL."""
+        return Document(
+            document_identifier="DOC004",
+            document_code="AMND",
+            document_formats=[
+                DocumentFormat(
+                    mime_type_identifier="MS_WORD",
+                    download_url="https://example.com/doc004.docx",
+                ),
+            ],
+        )
+
+    def _make_wrapper(self, *docs: Document) -> PatentFileWrapper:
+        return PatentFileWrapper(
+            application_number_text="12345678",
+            document_bag=DocumentBag(documents=list(docs)),
+        )
+
+    def test_returns_none_when_not_found(
+        self, patent_data_client: PatentDataClient
+    ) -> None:
+        """get_IFW returns None when no application is found."""
+        with patch.object(patent_data_client, "get_IFW_metadata", return_value=None):
+            result = patent_data_client.get_IFW(application_number="00000000")
+        assert result is None
+
+    def test_returns_ifw_result_with_zip(
+        self, patent_data_client: PatentDataClient, pdf_doc: Document, tmp_path
+    ) -> None:
+        """get_IFW returns IFWResult with a valid ZIP containing the downloaded doc."""
+        wrapper = self._make_wrapper(pdf_doc)
+        fake_pdf = tmp_path / "staging" / "doc001.pdf"
+        fake_pdf.parent.mkdir()
+        fake_pdf.write_bytes(b"%PDF fake content")
+
+        with (
+            patch.object(patent_data_client, "get_IFW_metadata", return_value=wrapper),
+            patch.object(
+                patent_data_client,
+                "_download_and_extract",
+                return_value=str(fake_pdf),
+            ),
+        ):
+            result = patent_data_client.get_IFW(
+                application_number="12345678",
+                destination=str(tmp_path / "out"),
+            )
+
+        assert isinstance(result, IFWResult)
+        assert result.wrapper is wrapper
+        assert result.archive_path.endswith("12345678_ifw.zip")
+        import zipfile as zf
+        with zf.ZipFile(result.archive_path) as z:
+            assert "doc001.pdf" in z.namelist()
+
+    def test_skips_xml_only_docs_silently(
+        self, patent_data_client: PatentDataClient, xml_only_doc: Document, tmp_path
+    ) -> None:
+        """Documents with only XML format are silently skipped — _download_and_extract not called."""
+        wrapper = self._make_wrapper(xml_only_doc)
+
+        with (
+            patch.object(patent_data_client, "get_IFW_metadata", return_value=wrapper),
+            patch.object(patent_data_client, "_download_and_extract") as mock_dl,
+        ):
+            result = patent_data_client.get_IFW(
+                application_number="12345678",
+                destination=str(tmp_path),
+            )
+        mock_dl.assert_not_called()
+        assert isinstance(result, IFWResult)
+
+    def test_skips_no_url_docs_silently(
+        self, patent_data_client: PatentDataClient, no_url_doc: Document, tmp_path
+    ) -> None:
+        """Documents with no download URL are silently skipped."""
+        wrapper = self._make_wrapper(no_url_doc)
+
+        with (
+            patch.object(patent_data_client, "get_IFW_metadata", return_value=wrapper),
+            patch.object(patent_data_client, "_download_and_extract") as mock_dl,
+        ):
+            result = patent_data_client.get_IFW(
+                application_number="12345678",
+                destination=str(tmp_path),
+            )
+        mock_dl.assert_not_called()
+        assert isinstance(result, IFWResult)
+
+    def test_warns_on_download_failure(
+        self, patent_data_client: PatentDataClient, pdf_doc: Document, tmp_path
+    ) -> None:
+        """A warning is issued when a doc has a URL but the download raises."""
+        wrapper = self._make_wrapper(pdf_doc)
+
+        with (
+            patch.object(patent_data_client, "get_IFW_metadata", return_value=wrapper),
+            patch.object(
+                patent_data_client,
+                "_download_and_extract",
+                side_effect=OSError("network error"),
+            ),
+            pytest.warns(match="DOC001"),
+        ):
+            result = patent_data_client.get_IFW(
+                application_number="12345678",
+                destination=str(tmp_path),
+            )
+        assert isinstance(result, IFWResult)
+
+    def test_raises_file_exists_error(
+        self, patent_data_client: PatentDataClient, pdf_doc: Document, tmp_path
+    ) -> None:
+        """FileExistsError raised if ZIP already exists and overwrite=False."""
+        wrapper = self._make_wrapper(pdf_doc)
+        existing_zip = tmp_path / "12345678_ifw.zip"
+        existing_zip.write_bytes(b"")
+
+        with patch.object(patent_data_client, "get_IFW_metadata", return_value=wrapper):
+            with pytest.raises(FileExistsError):
+                patent_data_client.get_IFW(
+                    application_number="12345678",
+                    destination=str(tmp_path),
+                    overwrite=False,
+                )
+
+    def test_overwrite_replaces_existing_zip(
+        self, patent_data_client: PatentDataClient, pdf_doc: Document, tmp_path
+    ) -> None:
+        """overwrite=True replaces an existing ZIP without error."""
+        wrapper = self._make_wrapper(pdf_doc)
+        existing_zip = tmp_path / "12345678_ifw.zip"
+        existing_zip.write_bytes(b"old content")
+
+        fake_pdf = tmp_path / "staging" / "doc001.pdf"
+        fake_pdf.parent.mkdir()
+        fake_pdf.write_bytes(b"%PDF new")
+
+        with (
+            patch.object(patent_data_client, "get_IFW_metadata", return_value=wrapper),
+            patch.object(
+                patent_data_client,
+                "_download_and_extract",
+                return_value=str(fake_pdf),
+            ),
+        ):
+            result = patent_data_client.get_IFW(
+                application_number="12345678",
+                destination=str(tmp_path),
+                overwrite=True,
+            )
+        assert isinstance(result, IFWResult)
+
+    def test_docx_downloaded_when_no_pdf(
+        self, patent_data_client: PatentDataClient, docx_doc: Document, tmp_path
+    ) -> None:
+        """DOCX format is used as fallback when PDF is not available."""
+        wrapper = self._make_wrapper(docx_doc)
+        fake_docx = tmp_path / "staging" / "doc004.docx"
+        fake_docx.parent.mkdir()
+        fake_docx.write_bytes(b"fake docx")
+
+        with (
+            patch.object(patent_data_client, "get_IFW_metadata", return_value=wrapper),
+            patch.object(
+                patent_data_client,
+                "_download_and_extract",
+                return_value=str(fake_docx),
+            ) as mock_dl,
+        ):
+            result = patent_data_client.get_IFW(
+                application_number="12345678",
+                destination=str(tmp_path / "out"),
+            )
+
+        mock_dl.assert_called_once_with(
+            url="https://example.com/doc004.docx",
+            destination=mock.ANY,
+            overwrite=True,
+        )
+        assert isinstance(result, IFWResult)

From be604d53c210c5c291570f1ad19bfc10b316037b Mon Sep 17 00:00:00 2001
From: Andrew <3300522+dpieski@users.noreply.github.com>
Date: Wed, 4 Mar 2026 10:33:13 -0600
Subject: [PATCH 6/9] feat: add IFWResult with document map and optional ZIP
 output

---
 examples/ifw_example.py                   |  38 +++++-
 src/pyUSPTO/clients/patent_data.py        | 136 +++++++++++++++-------
 src/pyUSPTO/models/patent_data.py         |  13 ++-
 tests/clients/test_patent_data_clients.py | 136 ++++++++++++++++++++--
 4 files changed, 264 insertions(+), 59 deletions(-)

diff --git a/examples/ifw_example.py b/examples/ifw_example.py
index 00ac391..8e51643 100644
--- a/examples/ifw_example.py
+++ b/examples/ifw_example.py
@@ -1,7 +1,26 @@
 """Example usage of pyUSPTO for IFW data.
 
-This example demonstrates how to use the PatentDataClient to interact with the USPTO Patent Data API.
-It shows how to retrieve IFW based on various identifying values.
+This example demonstrates how to use the PatentDataClient to retrieve Image File
+Wrapper (IFW) data from the USPTO Patent Data API. It covers:
+
+- get_IFW_metadata(): retrieve a PatentFileWrapper (with populated document_bag)
+  using any of the five supported identifiers:
+    - application_number
+    - patent_number
+    - publication_number
+    - PCT_app_number
+    - PCT_pub_number
+
+- get_IFW(): retrieve metadata AND bulk-download all prosecution history documents
+  (PDF preferred, DOCX fallback; XML and formatless docs are skipped). Returns an
+  IFWResult with:
+    - wrapper: the PatentFileWrapper
+    - output_path: path to the ZIP archive (as_zip=True, default) or output directory
+    - downloaded_documents: dict mapping document_identifier -> filename, allowing
+      each Document in document_bag to be linked to its downloaded file
+
+- download_archive() / download_publication(): download the pgpub or grant XML
+  archive from PrintedMetaData.
 """
 
 import json
@@ -65,8 +84,19 @@
 ifw_result = client.get_IFW(application_number=application_number, destination="./download-example", overwrite=True)
 if ifw_result:
     print(f"Title: {ifw_result.wrapper.application_meta_data.invention_title if ifw_result.wrapper.application_meta_data else 'N/A'}")
-    print(f"Archive: {ifw_result.archive_path}")
-    print(f"Documents in bag: {len(ifw_result.wrapper.document_bag)}")
+    print(f"Output: {ifw_result.output_path}")
+    doc_bag = ifw_result.wrapper.document_bag or []
+    print(f"Documents downloaded: {len(ifw_result.downloaded_documents)} of {len(doc_bag)}")
+    for doc in doc_bag:
+        if doc.document_identifier:
+            filename = ifw_result.downloaded_documents.get(doc.document_identifier)
+            status = f"-> {filename}" if filename else "(skipped)"
+            print(f"  {doc.document_code} [{doc.document_identifier}] {status}")
+
+print("\nGet IFW + download all prosecution docs as a directory (no ZIP) -->")
+ifw_dir_result = client.get_IFW(application_number=application_number, destination="./download-example", overwrite=True, as_zip=False)
+if ifw_dir_result:
+    print(f"Output directory: {ifw_dir_result.output_path}")
 
 
 print("\nNow let's download the Patent Publication Text -->")
diff --git a/src/pyUSPTO/clients/patent_data.py b/src/pyUSPTO/clients/patent_data.py
index 7b98dd7..c815ca3 100644
--- a/src/pyUSPTO/clients/patent_data.py
+++ b/src/pyUSPTO/clients/patent_data.py
@@ -1110,8 +1110,9 @@ def get_IFW(
         PCT_pub_number: str | None = None,
         destination: str | None = None,
         overwrite: bool = False,
+        as_zip: bool = True,
     ) -> IFWResult | None:
-        """Retrieve IFW metadata and download all prosecution documents as a ZIP archive.
+        """Retrieve IFW metadata and download all prosecution documents.
 
         Combines `get_IFW_metadata` with a bulk download of all available prosecution
         history documents (PDF preferred, DOCX fallback). Documents with no downloadable
@@ -1124,15 +1125,19 @@ def get_IFW(
             patent_number: USPTO patent number.
             PCT_app_number: PCT application number.
             PCT_pub_number: PCT publication number.
-            destination: Directory to save the ZIP archive. Defaults to current directory.
-            overwrite: Whether to overwrite an existing ZIP. Default False.
+            destination: Directory for output. Defaults to current directory.
+            overwrite: Whether to overwrite an existing output. Default False.
+            as_zip: If True (default), package all downloads into a ZIP archive
+                at ``{destination}/{app_no}_ifw.zip``. If False, download files
+                directly into ``{destination}/{app_no}_ifw/``.
 
         Returns:
-            IFWResult with the PatentFileWrapper and the path to the ZIP archive,
-            or None if no application was found.
+            IFWResult with the PatentFileWrapper, the output path, and a mapping
+            of document_identifier to filename for each downloaded document.
+            Returns None if no application was found.
 
         Raises:
-            FileExistsError: If the ZIP archive already exists and overwrite=False.
+            FileExistsError: If the output path already exists and overwrite=False.
         """
         wrapper = self.get_IFW_metadata(
             application_number=application_number,
@@ -1146,47 +1151,90 @@ def get_IFW(
 
         dest_dir = destination or "."
         app_no = wrapper.application_number_text or "unknown"
-        zip_name = f"{app_no}_ifw.zip"
-        zip_path = os.path.join(dest_dir, zip_name)
+        downloaded_documents: dict[str, str] = {}
 
-        if os.path.exists(zip_path) and not overwrite:
-            raise FileExistsError(
-                f"ZIP archive already exists: {zip_path}. Use overwrite=True to replace."
-            )
-
-        os.makedirs(dest_dir, exist_ok=True)
-
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
-                for doc in wrapper.document_bag:
-                    # Prefer PDF, fall back to MS_WORD; skip XML and formatless docs.
-                    fmt_obj = next(
-                        (
-                            f
-                            for f in doc.document_formats
-                            if f.mime_type_identifier in ("PDF", "MS_WORD")
-                            and f.download_url
-                        ),
-                        None,
-                    )
-                    if fmt_obj is None:
-                        continue
-
-                    try:
-                        downloaded = self._download_and_extract(
-                            url=fmt_obj.download_url,
-                            destination=tmp_dir,
-                            overwrite=True,
-                        )
-                        zf.write(downloaded, arcname=os.path.basename(downloaded))
-                    except Exception as exc:
-                        warnings.warn(
-                            f"Failed to download document {doc.document_identifier} "
-                            f"({doc.document_code}): {exc}",
-                            stacklevel=2,
+        if as_zip:
+            output_path = os.path.join(dest_dir, f"{app_no}_ifw.zip")
+            if os.path.exists(output_path) and not overwrite:
+                raise FileExistsError(
+                    f"ZIP archive already exists: {output_path}. Use overwrite=True to replace."
+                )
+            os.makedirs(dest_dir, exist_ok=True)
+            with tempfile.TemporaryDirectory() as tmp_dir:
+                with zipfile.ZipFile(
+                    output_path, "w", compression=zipfile.ZIP_DEFLATED
+                ) as zf:
+                    for doc in wrapper.document_bag or []:
+                        if not doc.document_identifier:
+                            continue
+                        fmt_obj = next(
+                            (
+                                f
+                                for f in doc.document_formats
+                                if f.mime_type_identifier in ("PDF", "MS_WORD")
+                                and f.download_url
+                            ),
+                            None,
                         )
+                        if fmt_obj is None or not fmt_obj.download_url:
+                            continue
+                        try:
+                            downloaded = self._download_and_extract(
+                                url=fmt_obj.download_url,
+                                destination=tmp_dir,
+                                overwrite=True,
+                            )
+                            arcname = os.path.basename(downloaded)
+                            zf.write(downloaded, arcname=arcname)
+                            downloaded_documents[doc.document_identifier] = arcname
+                        except Exception as exc:
+                            warnings.warn(
+                                f"Failed to download document {doc.document_identifier} "
+                                f"({doc.document_code}): {exc}",
+                                stacklevel=2,
+                            )
+        else:
+            output_path = os.path.join(dest_dir, f"{app_no}_ifw")
+            if os.path.exists(output_path) and not overwrite:
+                raise FileExistsError(
+                    f"Output directory already exists: {output_path}. Use overwrite=True to replace."
+                )
+            os.makedirs(output_path, exist_ok=True)
+            for doc in wrapper.document_bag or []:
+                if not doc.document_identifier:
+                    continue
+                fmt_obj = next(
+                    (
+                        f
+                        for f in doc.document_formats
+                        if f.mime_type_identifier in ("PDF", "MS_WORD")
+                        and f.download_url
+                    ),
+                    None,
+                )
+                if fmt_obj is None or not fmt_obj.download_url:
+                    continue
+                try:
+                    downloaded = self._download_and_extract(
+                        url=fmt_obj.download_url,
+                        destination=output_path,
+                        overwrite=overwrite,
+                    )
+                    downloaded_documents[doc.document_identifier] = os.path.basename(
+                        downloaded
+                    )
+                except Exception as exc:
+                    warnings.warn(
+                        f"Failed to download document {doc.document_identifier} "
+                        f"({doc.document_code}): {exc}",
+                        stacklevel=2,
+                    )
 
-        return IFWResult(wrapper=wrapper, archive_path=os.path.abspath(zip_path))
+        return IFWResult(
+            wrapper=wrapper,
+            output_path=os.path.abspath(output_path),
+            downloaded_documents=downloaded_documents,
+        )
 
     def download_archive(
         self,
diff --git a/src/pyUSPTO/models/patent_data.py b/src/pyUSPTO/models/patent_data.py
index 5bbad3d..36fde89 100644
--- a/src/pyUSPTO/models/patent_data.py
+++ b/src/pyUSPTO/models/patent_data.py
@@ -2048,6 +2048,7 @@ class PatentFileWrapper:
         pgpub_document_meta_data: `PrintedMetaData` for Pre-Grant Publication.
         grant_document_meta_data: `PrintedMetaData` for the granted patent.
         last_ingestion_date_time: Timestamp of when this data was last ingested by the API (UTC).
+        document_bag: `DocumentBag` containing associated documents and their metadata.
     """
 
     application_number_text: str
@@ -2212,15 +2213,21 @@ def to_dict(self) -> dict[str, Any]:
 
 @dataclass(frozen=True)
 class IFWResult:
-    """Result of a get_IFW call: metadata wrapper and path to the downloaded archive.
+    """Result of a get_IFW call: metadata wrapper, output path, and document map.
 
     Attributes:
         wrapper: The PatentFileWrapper containing all IFW metadata and document_bag.
-        archive_path: Absolute path to the ZIP archive of downloaded prosecution documents.
+        output_path: Absolute path to the ZIP archive (when as_zip=True) or the
+            output directory (when as_zip=False).
+        downloaded_documents: Maps document_identifier to the filename of the
+            downloaded file — the arcname inside the ZIP (as_zip=True) or the
+            basename inside the output directory (as_zip=False). Documents that
+            were skipped (no PDF/DOCX URL) or failed to download are absent.
     """
 
     wrapper: PatentFileWrapper
-    archive_path: str
+    output_path: str
+    downloaded_documents: dict[str, str]
 
 
 @dataclass(frozen=True)
diff --git a/tests/clients/test_patent_data_clients.py b/tests/clients/test_patent_data_clients.py
index c8f410a..e16f652 100644
--- a/tests/clients/test_patent_data_clients.py
+++ b/tests/clients/test_patent_data_clients.py
@@ -7,6 +7,7 @@
 
 import csv
 import io
+import os
 from collections.abc import Iterator
 from datetime import date, datetime, timezone
 from typing import Any
@@ -3170,7 +3171,7 @@ def test_returns_none_when_not_found(
     def test_returns_ifw_result_with_zip(
         self, patent_data_client: PatentDataClient, pdf_doc: Document, tmp_path
     ) -> None:
-        """get_IFW returns IFWResult with a valid ZIP containing the downloaded doc."""
+        """get_IFW returns IFWResult with a valid ZIP and populated downloaded_documents."""
         wrapper = self._make_wrapper(pdf_doc)
         fake_pdf = tmp_path / "staging" / "doc001.pdf"
         fake_pdf.parent.mkdir()
@@ -3191,11 +3192,42 @@ def test_returns_ifw_result_with_zip(
 
         assert isinstance(result, IFWResult)
         assert result.wrapper is wrapper
-        assert result.archive_path.endswith("12345678_ifw.zip")
+        assert result.output_path.endswith("12345678_ifw.zip")
+        assert result.downloaded_documents == {"DOC001": "doc001.pdf"}
         import zipfile as zf
-        with zf.ZipFile(result.archive_path) as z:
+        with zf.ZipFile(result.output_path) as z:
             assert "doc001.pdf" in z.namelist()
 
+    def test_returns_ifw_result_as_directory(
+        self, patent_data_client: PatentDataClient, pdf_doc: Document, tmp_path
+    ) -> None:
+        """as_zip=False downloads into a subdirectory and populates downloaded_documents."""
+        wrapper = self._make_wrapper(pdf_doc)
+        out_dir = tmp_path / "out" / "12345678_ifw"
+        out_dir.mkdir(parents=True)
+        fake_pdf = out_dir / "doc001.pdf"
+        fake_pdf.write_bytes(b"%PDF fake content")
+
+        with (
+            patch.object(patent_data_client, "get_IFW_metadata", return_value=wrapper),
+            patch.object(
+                patent_data_client,
+                "_download_and_extract",
+                return_value=str(fake_pdf),
+            ),
+        ):
+            result = patent_data_client.get_IFW(
+                application_number="12345678",
+                destination=str(tmp_path / "out"),
+                as_zip=False,
+                overwrite=True,
+            )
+
+        assert isinstance(result, IFWResult)
+        assert result.output_path.endswith("12345678_ifw")
+        assert os.path.isdir(result.output_path)
+        assert result.downloaded_documents == {"DOC001": "doc001.pdf"}
+
     def test_skips_xml_only_docs_silently(
         self, patent_data_client: PatentDataClient, xml_only_doc: Document, tmp_path
     ) -> None:
@@ -3212,6 +3244,7 @@ def test_skips_xml_only_docs_silently(
             )
         mock_dl.assert_not_called()
         assert isinstance(result, IFWResult)
+        assert result.downloaded_documents == {}
 
     def test_skips_no_url_docs_silently(
         self, patent_data_client: PatentDataClient, no_url_doc: Document, tmp_path
@@ -3229,6 +3262,7 @@ def test_skips_no_url_docs_silently(
             )
         mock_dl.assert_not_called()
         assert isinstance(result, IFWResult)
+        assert result.downloaded_documents == {}
 
     def test_warns_on_download_failure(
         self, patent_data_client: PatentDataClient, pdf_doc: Document, tmp_path
@@ -3250,14 +3284,14 @@ def test_warns_on_download_failure(
                 destination=str(tmp_path),
             )
         assert isinstance(result, IFWResult)
+        assert result.downloaded_documents == {}
 
-    def test_raises_file_exists_error(
+    def test_raises_file_exists_error_zip(
         self, patent_data_client: PatentDataClient, pdf_doc: Document, tmp_path
     ) -> None:
         """FileExistsError raised if ZIP already exists and overwrite=False."""
         wrapper = self._make_wrapper(pdf_doc)
-        existing_zip = tmp_path / "12345678_ifw.zip"
-        existing_zip.write_bytes(b"")
+        (tmp_path / "12345678_ifw.zip").write_bytes(b"")
 
         with patch.object(patent_data_client, "get_IFW_metadata", return_value=wrapper):
             with pytest.raises(FileExistsError):
@@ -3267,13 +3301,28 @@ def test_raises_file_exists_error(
                     overwrite=False,
                 )
 
+    def test_raises_file_exists_error_directory(
+        self, patent_data_client: PatentDataClient, pdf_doc: Document, tmp_path
+    ) -> None:
+        """FileExistsError raised if output directory already exists and overwrite=False."""
+        wrapper = self._make_wrapper(pdf_doc)
+        (tmp_path / "12345678_ifw").mkdir()
+
+        with patch.object(patent_data_client, "get_IFW_metadata", return_value=wrapper):
+            with pytest.raises(FileExistsError):
+                patent_data_client.get_IFW(
+                    application_number="12345678",
+                    destination=str(tmp_path),
+                    overwrite=False,
+                    as_zip=False,
+                )
+
     def test_overwrite_replaces_existing_zip(
         self, patent_data_client: PatentDataClient, pdf_doc: Document, tmp_path
     ) -> None:
         """overwrite=True replaces an existing ZIP without error."""
         wrapper = self._make_wrapper(pdf_doc)
-        existing_zip = tmp_path / "12345678_ifw.zip"
-        existing_zip.write_bytes(b"old content")
+        (tmp_path / "12345678_ifw.zip").write_bytes(b"old content")
 
         fake_pdf = tmp_path / "staging" / "doc001.pdf"
         fake_pdf.parent.mkdir()
@@ -3322,3 +3371,74 @@ def test_docx_downloaded_when_no_pdf(
             overwrite=True,
         )
         assert isinstance(result, IFWResult)
+        assert result.downloaded_documents == {"DOC004": "doc004.docx"}
+
+    def test_directory_skips_xml_only_docs(
+        self, patent_data_client: PatentDataClient, xml_only_doc: Document, tmp_path
+    ) -> None:
+        """as_zip=False: docs with only XML are silently skipped."""
+        wrapper = self._make_wrapper(xml_only_doc)
+
+        with (
+            patch.object(patent_data_client, "get_IFW_metadata", return_value=wrapper),
+            patch.object(patent_data_client, "_download_and_extract") as mock_dl,
+        ):
+            result = patent_data_client.get_IFW(
+                application_number="12345678",
+                destination=str(tmp_path),
+                as_zip=False,
+            )
+        mock_dl.assert_not_called()
+        assert result.downloaded_documents == {}
+
+    def test_directory_warns_on_download_failure(
+        self, patent_data_client: PatentDataClient, pdf_doc: Document, tmp_path
+    ) -> None:
+        """as_zip=False: warning issued when download raises despite having a URL."""
+        wrapper = self._make_wrapper(pdf_doc)
+
+        with (
+            patch.object(patent_data_client, "get_IFW_metadata", return_value=wrapper),
+            patch.object(
+                patent_data_client,
+                "_download_and_extract",
+                side_effect=OSError("network error"),
+            ),
+            pytest.warns(match="DOC001"),
+        ):
+            result = patent_data_client.get_IFW(
+                application_number="12345678",
+                destination=str(tmp_path),
+                as_zip=False,
+            )
+        assert result.downloaded_documents == {}
+
+    def test_skips_docs_with_no_identifier(
+        self, patent_data_client: PatentDataClient, tmp_path
+    ) -> None:
+        """Documents with no document_identifier are silently skipped in both modes."""
+        no_id_doc = Document(
+            document_identifier=None,
+            document_code="CTNF",
+            document_formats=[
+                DocumentFormat(
+                    mime_type_identifier="PDF",
+                    download_url="https://example.com/doc.pdf",
+                ),
+            ],
+        )
+        wrapper = self._make_wrapper(no_id_doc)
+
+        for as_zip in (True, False):
+            with (
+                patch.object(patent_data_client, "get_IFW_metadata", return_value=wrapper),
+                patch.object(patent_data_client, "_download_and_extract") as mock_dl,
+            ):
+                result = patent_data_client.get_IFW(
+                    application_number="12345678",
+                    destination=str(tmp_path),
+                    as_zip=as_zip,
+                    overwrite=True,
+                )
+            mock_dl.assert_not_called()
+            assert result.downloaded_documents == {}

From e5f04f3961e18034fb89b562fe4323869d5c6839 Mon Sep 17 00:00:00 2001
From: Andrew <3300522+dpieski@users.noreply.github.com>
Date: Wed, 4 Mar 2026 17:23:11 -0600
Subject: [PATCH 7/9] fix: Update CI to not include dev requirements.

---
 .github/workflows/python-tests.yml | 2 +-
 CONTRIBUTING.md                    | 2 ++
 tox.ini                            | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
index ea3ef34..011764b 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -25,7 +25,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -r requirements.txt
-          pip install -r requirements-dev.txt
+          pip install pytest pytest-cov pytest-mock
           pip install -e .
 
       - name: Run tests
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index c347fad..7528dd3 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -22,6 +22,8 @@ cd pyUSPTO
 
 ### Set Up Development Environment
 
+> **Note:** Python 3.11+ is required for the development environment. Some dev dependencies (e.g. `myst-parser`) do not support Python 3.10. The package itself supports Python 3.10+.
+
 ```bash
 # Create and activate a virtual environment
 python -m venv venv
diff --git a/tox.ini b/tox.ini
index cc55d4a..fe6db8e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -13,6 +13,7 @@ basepython =
     py314: {env:LOCALAPPDATA}\Python\pythoncore-3.14-64\python.exe
     # py315: {env:LOCALAPPDATA}\Python\pythoncore-3.15-64\python.exe
 deps =
+    -r requirements.txt
     pytest>=9.0.2
     pytest-cov>=7.0.0
     pytest-mock>=3.15.1

From dc5eb461970cceb03435d1053ab276a1a692424d Mon Sep 17 00:00:00 2001
From: Andrew <3300522+dpieski@users.noreply.github.com>
Date: Wed, 4 Mar 2026 17:33:05 -0600
Subject: [PATCH 8/9] fix: refactor CI

---
 .github/workflows/python-tests.yml |  7 +++---
 pyproject.toml                     | 25 +++++++------------
 requirements-dev.txt               | 39 ++++++++++++++++++------------
 tox.ini                            |  5 +---
 4 files changed, 37 insertions(+), 39 deletions(-)

diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
index 011764b..3cd574d 100644
--- a/.github/workflows/python-tests.yml
+++ b/.github/workflows/python-tests.yml
@@ -25,8 +25,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -r requirements.txt
-          pip install pytest pytest-cov pytest-mock
-          pip install -e .
+          pip install -e ".[test]"
 
       - name: Run tests
         run: |
@@ -46,12 +45,12 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v6
         with:
-          python-version: "3.10"
+          python-version: "3.14"
 
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install ruff mypy
+          pip install -e ".[lint]"
 
       - name: Lint with ruff
         run: |
diff --git a/pyproject.toml b/pyproject.toml
index 6b86858..b46dd25 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,24 +51,17 @@ dependencies = [
 dynamic = ["version"]
 
 [project.optional-dependencies]
-dev = [
-    # Testing
-    "pytest>=9.0.2",
-    "pytest-cov>=7.0.0",
-    "pytest-mock>=3.15.1",
-    # Documentation
-    "sphinx==8.1.3",
-    "sphinx-rtd-theme>=3.0.0",
-    "sphinx_immaterial>=0.13.8",
-    "sphinx-autodoc-typehints==3.0.1",
+test = ["pytest>=9.0.2", "pytest-cov>=7.0.0", "pytest-mock>=3.15.1"]
+docs = [
+    "sphinx>=9.1.0",
+    "sphinx-rtd-theme>=3.1.0",
+    "sphinx_immaterial>=0.13.9",
+    "sphinx-autodoc-typehints>=3.9.5",
     "sphinx-copybutton>=0.5.2",
-    "myst-parser>=4.0.1",
-    # Type checking
-    "mypy>=1.19.0",
-    "types-requests>=2.32.4",
-    # Code quality and formatting
-    "ruff>=0.15.0",
+    "myst-parser>=5.0.0",
 ]
+lint = ["mypy>=1.19.0", "types-requests>=2.32.4", "ruff>=0.15.0"]
+dev = ["pyUSPTO[test]", "pyUSPTO[docs]", "pyUSPTO[lint]"]
 
 [project.urls]
 GitHub = "https://github.com/DunlapCoddingPC/pyUSPTO"
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 6bb3543..82cbf57 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -52,11 +52,11 @@ mdit-py-plugins==0.5.0
 mdurl==0.1.2
     # via markdown-it-py
 mypy==1.19.1
-    # via pyUSPTO (pyproject.toml)
+    # via pyuspto
 mypy-extensions==1.1.0
     # via mypy
 myst-parser==5.0.0
-    # via pyUSPTO (pyproject.toml)
+    # via pyuspto
 packaging==26.0
     # via
     #   pytest
@@ -71,7 +71,7 @@ pydantic==2.12.5
     # via
     #   pydantic-extra-types
     #   sphinx-immaterial
-pydantic-core==2.42.0
+pydantic-core==2.41.5
     # via pydantic
 pydantic-extra-types==2.11.0
     # via sphinx-immaterial
@@ -81,41 +81,48 @@ pygments==2.19.2
     #   sphinx
 pytest==9.0.2
     # via
-    #   pyUSPTO (pyproject.toml)
     #   pytest-cov
     #   pytest-mock
+    #   pyuspto
 pytest-cov==7.0.0
-    # via pyUSPTO (pyproject.toml)
+    # via pyuspto
 pytest-mock==3.15.1
-    # via pyUSPTO (pyproject.toml)
+    # via pyuspto
+pyuspto @ file:///C:/Users/andrewp/Documents/GitHub/pyUSPTO
+    # via
+    #   pyUSPTO (pyproject.toml)
+    #   pyuspto
 pyyaml==6.0.3
     # via myst-parser
 requests==2.32.5
     # via
     #   pyUSPTO (pyproject.toml)
+    #   pyuspto
     #   sphinx
     #   sphinx-immaterial
+roman-numerals==4.1.0
+    # via sphinx
 ruff==0.15.4
-    # via pyUSPTO (pyproject.toml)
+    # via pyuspto
 snowballstemmer==3.0.1
     # via sphinx
 sphinx==9.1.0
     # via
     #   myst-parser
-    #   pyUSPTO (pyproject.toml)
+    #   pyuspto
     #   sphinx-autodoc-typehints
     #   sphinx-copybutton
     #   sphinx-immaterial
     #   sphinx-rtd-theme
     #   sphinxcontrib-jquery
-sphinx-autodoc-typehints==3.9.5
-    # via pyUSPTO (pyproject.toml)
+sphinx-autodoc-typehints==3.9.6
+    # via pyuspto
 sphinx-copybutton==0.5.2
-    # via pyUSPTO (pyproject.toml)
+    # via pyuspto
 sphinx-immaterial==0.13.9
-    # via pyUSPTO (pyproject.toml)
+    # via pyuspto
 sphinx-rtd-theme==3.1.0
-    # via pyUSPTO (pyproject.toml)
+    # via pyuspto
 sphinxcontrib-applehelp==2.0.0
     # via sphinx
 sphinxcontrib-devhelp==2.0.0
@@ -131,7 +138,7 @@ sphinxcontrib-qthelp==2.0.0
 sphinxcontrib-serializinghtml==2.0.0
     # via sphinx
 types-requests==2.32.4.20260107
-    # via pyUSPTO (pyproject.toml)
+    # via pyuspto
 typing-extensions==4.15.0
     # via
     #   mypy
@@ -143,7 +150,9 @@ typing-extensions==4.15.0
 typing-inspection==0.4.2
     # via pydantic
 tzdata==2025.3
-    # via pyUSPTO (pyproject.toml)
+    # via
+    #   pyUSPTO (pyproject.toml)
+    #   pyuspto
 urllib3==2.6.3
     # via
     #   requests
diff --git a/tox.ini b/tox.ini
index fe6db8e..ac508a9 100644
--- a/tox.ini
+++ b/tox.ini
@@ -14,9 +14,6 @@ basepython =
     # py315: {env:LOCALAPPDATA}\Python\pythoncore-3.15-64\python.exe
 deps =
     -r requirements.txt
-    pytest>=9.0.2
-    pytest-cov>=7.0.0
-    pytest-mock>=3.15.1
-    typing_extensions>=4.15.0
+extras = test
 commands =
     pytest tests/ --cov=src/pyUSPTO

From 10a21df98f54e9343f34159c03468eb93411cf47 Mon Sep 17 00:00:00 2001
From: Andrew <3300522+dpieski@users.noreply.github.com>
Date: Thu, 5 Mar 2026 09:05:28 -0600
Subject: [PATCH 9/9] fix: missing typing_extensions

---
 pyproject.toml       | 2 +-
 requirements-dev.txt | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index b46dd25..f591731 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,7 +51,7 @@ dependencies = [
 dynamic = ["version"]
 
 [project.optional-dependencies]
-test = ["pytest>=9.0.2", "pytest-cov>=7.0.0", "pytest-mock>=3.15.1"]
+test = ["pytest>=9.0.2", "pytest-cov>=7.0.0", "pytest-mock>=3.15.1", "typing_extensions>=4.15.0"]
 docs = [
     "sphinx>=9.1.0",
     "sphinx-rtd-theme>=3.1.0",
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 82cbf57..195a534 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -145,6 +145,7 @@ typing-extensions==4.15.0
     #   pydantic
     #   pydantic-core
     #   pydantic-extra-types
+    #   pyuspto
     #   sphinx-immaterial
     #   typing-inspection
 typing-inspection==0.4.2