diff --git a/.github/workflows/publish-to-test-pypi.yml b/.github/workflows/publish-to-test-pypi.yml index f0cd42a..de90759 100644 --- a/.github/workflows/publish-to-test-pypi.yml +++ b/.github/workflows/publish-to-test-pypi.yml @@ -12,7 +12,7 @@ on: jobs: build: - name: Build distribution 📦 + name: Build distribution runs-on: ubuntu-latest steps: diff --git a/README.md b/README.md index f8b7d42..1c6dc9f 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,10 @@ -# pyUSPTO +# USPTO Open Data Portal Client Library [![PyPI version](https://badge.fury.io/py/pyUSPTO.svg)](https://badge.fury.io/py/pyUSPTO) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/) [![Read the Docs](https://img.shields.io/readthedocs/pyuspto)](https://pyuspto.readthedocs.io/en/latest/) +[![PyPI Downloads](https://static.pepy.tech/personalized-badge/pyuspto?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLUE&right_color=GREY&left_text=downloads)](https://pepy.tech/projects/pyuspto) A Python client library for interacting with the United Stated Patent and Trademark Office (USPTO) [Open Data Portal](https://data.uspto.gov/home) APIs. diff --git a/pyproject.toml b/pyproject.toml index f591731..51bf24c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ classifiers = [ "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Operating System :: OS Independent", - "Development Status :: 4 - Beta", + "Development Status :: 5 - Production/Stable", "Environment :: Console", "Intended Audience :: Developers", "Intended Audience :: Education", @@ -51,7 +51,12 @@ dependencies = [ dynamic = ["version"] [project.optional-dependencies] -test = ["pytest>=9.0.2", "pytest-cov>=7.0.0", "pytest-mock>=3.15.1", "typing_extensions>=4.15.0"] +test = [ + "pytest>=9.0.2", + "pytest-cov>=7.0.0", + "pytest-mock>=3.15.1", + "typing_extensions>=4.15.0", +] docs = [ "sphinx>=9.1.0", "sphinx-rtd-theme>=3.1.0", diff --git a/src/pyUSPTO/clients/patent_data.py b/src/pyUSPTO/clients/patent_data.py index c815ca3..656c0cd 100644 --- a/src/pyUSPTO/clients/patent_data.py +++ b/src/pyUSPTO/clients/patent_data.py @@ -163,7 +163,7 @@ def sanitize_application_number(self, input_number: str) -> str: if not serial.isdigit(): raise ValueError(f"Invalid PCT serial: {serial}. Must be numeric.") - return f"PCT{country}{year}{serial}" + return f"PCT{country}{year}{serial.lstrip('0')}" # Strip whitespace and remove commas/spaces cleaned = raw.replace(",", "").replace(" ", "") @@ -1039,6 +1039,80 @@ def download_document( overwrite=overwrite, ) + def _resolve_by_search(self, **search_kwargs: Any) -> PatentFileWrapper | None: + """Search for an application and return the first matching wrapper. + + This is a shared helper for convenience methods that resolve + non-application-number identifiers to a PatentFileWrapper. + + Args: + **search_kwargs: Keyword arguments passed to search_applications(). + + Returns: + Optional[PatentFileWrapper]: The first matching wrapper, or None. + """ + pdr = self.search_applications(**search_kwargs, limit=1) + if pdr.patent_file_wrapper_data_bag: + return pdr.patent_file_wrapper_data_bag[0] + return None + + def get_patent(self, patent_number: str) -> PatentFileWrapper | None: + """Retrieve application metadata by patent number. + + Searches the USPTO API for the given patent number and returns + the corresponding PatentFileWrapper. This is a lightweight lookup + that does not fetch the full document bag. + + Args: + patent_number (str): The USPTO patent number (e.g., "11000000"). + + Returns: + Optional[PatentFileWrapper]: The matching patent file wrapper, + or None if not found. + """ + return self._resolve_by_search(patent_number_q=patent_number) + + def get_publication(self, publication_number: str) -> PatentFileWrapper | None: + """Retrieve application metadata by publication number. + + Searches the USPTO API for the given pre-grant publication number + and returns the corresponding PatentFileWrapper. This is a lightweight + lookup that does not fetch the full document bag. + + Args: + publication_number (str): The USPTO publication number + (e.g., "20230123456"). + + Returns: + Optional[PatentFileWrapper]: The matching patent file wrapper, + or None if not found. + """ + return self._resolve_by_search(earliestPublicationNumber_q=publication_number) + + def get_pct(self, pct_number: str) -> PatentFileWrapper | None: + """Retrieve application metadata by PCT number. + + Accepts both PCT application numbers and PCT publication numbers. + The format is auto-detected: + + - PCT application numbers (starting with "PCT") are resolved via + direct lookup using get_application_by_number. + - PCT publication numbers (e.g., "WO2024012345A1") are resolved + via search. + + Args: + pct_number (str): A PCT application number (e.g., + "PCT/US2024/012345") or PCT publication number + (e.g., "WO2024012345A1"). + + Returns: + Optional[PatentFileWrapper]: The matching patent file wrapper, + or None if not found. + """ + if pct_number.strip().upper().startswith("PCT"): + return self.get_application_by_number(application_number=pct_number) + return self._resolve_by_search(pctPublicationNumber_q=pct_number) + def get_IFW_metadata( self, *, diff --git a/tests/clients/test_patent_data_clients.py b/tests/clients/test_patent_data_clients.py index e16f652..ac194a5 100644 --- a/tests/clients/test_patent_data_clients.py +++ b/tests/clients/test_patent_data_clients.py @@ -1334,7 +1334,7 @@ def test_get_ifw_by_pct_app_number( # Should call get_application_by_number first assert mock_make_request.call_args_list[0] == call( method="GET", - endpoint="api/v1/patent/applications/PCTUS24012345", + endpoint="api/v1/patent/applications/PCTUS2412345", response_class=PatentDataResponse, ) assert result.application_number_text == mock_patent_file_wrapper.application_number_text @@ -1348,7 +1348,7 @@ def test_get_ifw_by_short_pct_app_number( """Test PCT application number sanitization with 2-digit year format (US24 vs US2024). Verifies that PCT numbers with short year format (PCT/US24/012345) are correctly - sanitized to PCTUS24012345 before making API request. + sanitized to PCTUS2412345 before making API request. Note: This will trigger a data mismatch warning because the mock_patent_file_wrapper has application_number_text='12345678' but we're requesting a PCT number. @@ -1369,7 +1369,7 @@ def test_get_ifw_by_short_pct_app_number( # Should call get_application_by_number first assert mock_make_request.call_args_list[0] == call( method="GET", - endpoint="api/v1/patent/applications/PCTUS24012345", + endpoint="api/v1/patent/applications/PCTUS2412345", response_class=PatentDataResponse, ) assert result.application_number_text == mock_patent_file_wrapper.application_number_text @@ -1550,6 +1550,166 @@ def test_get_ifw_prioritizes_first_parameter( assert isinstance(result.document_bag, DocumentBag) +class TestGetPatent: + """Tests for the get_patent convenience method.""" + + def test_get_patent_found( + self, + client_with_mocked_request: tuple[PatentDataClient, MagicMock], + mock_patent_file_wrapper: PatentFileWrapper, + ) -> None: + """Test get_patent returns wrapper when patent is found.""" + client, mock_make_request = client_with_mocked_request + mock_make_request.return_value = PatentDataResponse( + count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper] + ) + + patent_num = "11000000" + result = client.get_patent(patent_num) + + assert mock_make_request.call_args == call( + method="GET", + endpoint="api/v1/patent/applications/search", + params={ + "q": f"applicationMetaData.patentNumber:{patent_num}", + "limit": 1, + "offset": 0, + }, + response_class=PatentDataResponse, + ) + assert result is not None + assert result.application_number_text == mock_patent_file_wrapper.application_number_text + + def test_get_patent_not_found( + self, + client_with_mocked_request: tuple[PatentDataClient, MagicMock], + ) -> None: + """Test get_patent returns None when patent is not found.""" + client, mock_make_request = client_with_mocked_request + mock_make_request.return_value = PatentDataResponse( + count=0, patent_file_wrapper_data_bag=[] + ) + + result = client.get_patent("nonexistent") + assert result is None + + +class TestGetPublication: + """Tests for the get_publication convenience method.""" + + def test_get_publication_found( + self, + client_with_mocked_request: tuple[PatentDataClient, MagicMock], + mock_patent_file_wrapper: PatentFileWrapper, + ) -> None: + """Test get_publication returns wrapper when publication is found.""" + client, mock_make_request = client_with_mocked_request + mock_make_request.return_value = PatentDataResponse( + count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper] + ) + + pub_num = "20230123456" + result = client.get_publication(pub_num) + + assert mock_make_request.call_args == call( + method="GET", + endpoint="api/v1/patent/applications/search", + params={ + "q": f"applicationMetaData.earliestPublicationNumber:{pub_num}", + "limit": 1, + "offset": 0, + }, + response_class=PatentDataResponse, + ) + assert result is not None + assert result.application_number_text == mock_patent_file_wrapper.application_number_text + + def test_get_publication_not_found( + self, + client_with_mocked_request: tuple[PatentDataClient, MagicMock], + ) -> None: + """Test get_publication returns None when publication is not found.""" + client, mock_make_request = client_with_mocked_request + mock_make_request.return_value = PatentDataResponse( + count=0, patent_file_wrapper_data_bag=[] + ) + + result = client.get_publication("nonexistent") + assert result is None + + +class TestGetPCT: + """Tests for the get_pct convenience method.""" + + @pytest.fixture + def mock_pct_file_wrapper(self) -> PatentFileWrapper: + """Provides a mock PatentFileWrapper with a sanitized PCT application number.""" + return PatentFileWrapper(application_number_text="PCTUS2412345") + + def test_get_pct_with_app_number( + self, + client_with_mocked_request: tuple[PatentDataClient, MagicMock], + mock_pct_file_wrapper: PatentFileWrapper, + ) -> None: + """Test get_pct with PCT application number uses direct lookup.""" + client, mock_make_request = client_with_mocked_request + mock_make_request.return_value = PatentDataResponse( + count=1, patent_file_wrapper_data_bag=[mock_pct_file_wrapper] + ) + + pct_app = "PCT/US2024/012345" + result = client.get_pct(pct_app) + + # Should call get_application_by_number (direct lookup) + assert mock_make_request.call_args == call( + method="GET", + endpoint="api/v1/patent/applications/PCTUS2412345", + response_class=PatentDataResponse, + ) + assert result is not None + assert result.application_number_text == mock_pct_file_wrapper.application_number_text + + def test_get_pct_with_pub_number( + self, + client_with_mocked_request: tuple[PatentDataClient, MagicMock], + mock_patent_file_wrapper: PatentFileWrapper, + ) -> None: + """Test get_pct with PCT publication number uses search.""" + client, mock_make_request = client_with_mocked_request + mock_make_request.return_value = PatentDataResponse( + count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper] + ) + + pct_pub = "WO2024012345A1" + result = client.get_pct(pct_pub) + + assert mock_make_request.call_args == call( + method="GET", + endpoint="api/v1/patent/applications/search", + params={ + "q": f"applicationMetaData.pctPublicationNumber:{pct_pub}", + "limit": 1, + "offset": 0, + }, + response_class=PatentDataResponse, + ) + assert result is not None + assert result.application_number_text == mock_patent_file_wrapper.application_number_text + + def test_get_pct_not_found( + self, + client_with_mocked_request: tuple[PatentDataClient, MagicMock], + ) -> None: + """Test get_pct returns None when PCT number is not found.""" + client, mock_make_request = client_with_mocked_request + mock_make_request.return_value = PatentDataResponse( + count=0, patent_file_wrapper_data_bag=[] + ) + + result = client.get_pct("WO9999999999") + assert result is None + + class TestDownloadArchive: """Tests for downloading archive files.""" diff --git a/tests/integration/test_patent_data_integration.py b/tests/integration/test_patent_data_integration.py index 924fd46..d34639b 100644 --- a/tests/integration/test_patent_data_integration.py +++ b/tests/integration/test_patent_data_integration.py @@ -857,3 +857,52 @@ def test_invalid_application_number_handling( ) except Exception as e: pytest.fail(f"Unexpected exception for invalid app number: {e}") + + +class TestHelperMethodsIntegration: + """Integration tests for get_patent, get_publication, and get_pct helper methods.""" + + KNOWN_PATENT_NUMBER = "12565253" + KNOWN_PUBLICATION_NUMBER = "US20260054762A1" + KNOWN_PCT_APP_NUMBER = "PCT/US2025/047756" + KNOWN_PCT_PUB_NUMBER = "WO 2026/044302" + + def test_get_patent(self, patent_data_client: PatentDataClient) -> None: + """Test get_patent returns a PatentFileWrapper for a known patent number.""" + result = patent_data_client.get_patent(self.KNOWN_PATENT_NUMBER) + + assert isinstance(result, PatentFileWrapper) + assert result.application_number_text == "19378371" + assert result.application_meta_data is not None + assert isinstance(result.application_meta_data, ApplicationMetaData) + + def test_get_publication(self, patent_data_client: PatentDataClient) -> None: + """Test get_publication returns a PatentFileWrapper for a known publication number.""" + result = patent_data_client.get_publication(self.KNOWN_PUBLICATION_NUMBER) + + assert isinstance(result, PatentFileWrapper) + assert result.application_number_text == "19378371" + assert result.application_meta_data is not None + assert isinstance(result.application_meta_data, ApplicationMetaData) + + def test_get_pct_with_app_number( + self, patent_data_client: PatentDataClient + ) -> None: + """Test get_pct with a PCT application number (direct lookup).""" + result = patent_data_client.get_pct(self.KNOWN_PCT_APP_NUMBER) + + assert isinstance(result, PatentFileWrapper) + assert result.application_number_text == "PCTUS2547756" + assert result.application_meta_data is not None + assert result.application_meta_data.pct_publication_number == "WO2026044302" + + def test_get_pct_with_pub_number( + self, patent_data_client: PatentDataClient + ) -> None: + """Test get_pct with a PCT publication number.""" + result = patent_data_client.get_pct(self.KNOWN_PCT_PUB_NUMBER) + + assert isinstance(result, PatentFileWrapper) + assert result.application_number_text == "PCTUS2547756" + assert result.application_meta_data is not None + assert result.application_meta_data.pct_publication_number == "WO2026044302"