Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/publish-to-test-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ on:

jobs:
build:
name: Build distribution 📦
name: Build distribution
runs-on: ubuntu-latest

steps:
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# pyUSPTO
# USPTO Open Data Portal Client Library

[![PyPI version](https://badge.fury.io/py/pyUSPTO.svg)](https://badge.fury.io/py/pyUSPTO)
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
[![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
[![Read the Docs](https://img.shields.io/readthedocs/pyuspto)](https://pyuspto.readthedocs.io/en/latest/)
[![PyPI Downloads](https://static.pepy.tech/personalized-badge/pyuspto?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLUE&right_color=GREY&left_text=downloads)](https://pepy.tech/projects/pyuspto)

A Python client library for interacting with the United Stated Patent and Trademark Office (USPTO) [Open Data Portal](https://data.uspto.gov/home) APIs.

Expand Down
9 changes: 7 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ classifiers = [
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Operating System :: OS Independent",
"Development Status :: 4 - Beta",
"Development Status :: 5 - Production/Stable",
"Environment :: Console",
"Intended Audience :: Developers",
"Intended Audience :: Education",
Expand Down Expand Up @@ -51,7 +51,12 @@ dependencies = [
dynamic = ["version"]

[project.optional-dependencies]
test = ["pytest>=9.0.2", "pytest-cov>=7.0.0", "pytest-mock>=3.15.1", "typing_extensions>=4.15.0"]
test = [
"pytest>=9.0.2",
"pytest-cov>=7.0.0",
"pytest-mock>=3.15.1",
"typing_extensions>=4.15.0",
]
docs = [
"sphinx>=9.1.0",
"sphinx-rtd-theme>=3.1.0",
Expand Down
76 changes: 75 additions & 1 deletion src/pyUSPTO/clients/patent_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def sanitize_application_number(self, input_number: str) -> str:
if not serial.isdigit():
raise ValueError(f"Invalid PCT serial: {serial}. Must be numeric.")

return f"PCT{country}{year}{serial}"
return f"PCT{country}{year}{serial.lstrip('0')}"

# Strip whitespace and remove commas/spaces
cleaned = raw.replace(",", "").replace(" ", "")
Expand Down Expand Up @@ -1039,6 +1039,80 @@ def download_document(
overwrite=overwrite,
)

def _resolve_by_search(self, **search_kwargs: Any) -> PatentFileWrapper | None:
"""Search for an application and return the first matching wrapper.

This is a shared helper for convenience methods that resolve
non-application-number identifiers to a PatentFileWrapper.

Args:
**search_kwargs: Keyword arguments passed to search_applications().

Returns:
Optional[PatentFileWrapper]: The first matching wrapper, or None.
"""
pdr = self.search_applications(**search_kwargs, limit=1)
if pdr.patent_file_wrapper_data_bag:
return pdr.patent_file_wrapper_data_bag[0]
return None

def get_patent(self, patent_number: str) -> PatentFileWrapper | None:
"""Retrieve application metadata by patent number.

Searches the USPTO API for the given patent number and returns
the corresponding PatentFileWrapper. This is a lightweight lookup
that does not fetch the full document bag.

Args:
patent_number (str): The USPTO patent number (e.g., "11000000").

Returns:
Optional[PatentFileWrapper]: The matching patent file wrapper,
or None if not found.
"""
return self._resolve_by_search(patent_number_q=patent_number)

def get_publication(self, publication_number: str) -> PatentFileWrapper | None:
"""Retrieve application metadata by publication number.

Searches the USPTO API for the given pre-grant publication number
and returns the corresponding PatentFileWrapper. This is a lightweight
lookup that does not fetch the full document bag.

Args:
publication_number (str): The USPTO publication number
(e.g., "20230123456").

Returns:
Optional[PatentFileWrapper]: The matching patent file wrapper,
or None if not found.
"""
return self._resolve_by_search(earliestPublicationNumber_q=publication_number)

def get_pct(self, pct_number: str) -> PatentFileWrapper | None:
"""Retrieve application metadata by PCT number.

Accepts both PCT application numbers and PCT publication numbers.
The format is auto-detected:

- PCT application numbers (starting with "PCT") are resolved via
direct lookup using get_application_by_number.
- PCT publication numbers (e.g., "WO2024012345A1") are resolved
via search.

Args:
pct_number (str): A PCT application number (e.g.,
"PCT/US2024/012345") or PCT publication number
(e.g., "WO2024012345A1").

Returns:
Optional[PatentFileWrapper]: The matching patent file wrapper,
or None if not found.
"""
if pct_number.strip().upper().startswith("PCT"):
return self.get_application_by_number(application_number=pct_number)
return self._resolve_by_search(pctPublicationNumber_q=pct_number)

def get_IFW_metadata(
self,
*,
Expand Down
166 changes: 163 additions & 3 deletions tests/clients/test_patent_data_clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -1334,7 +1334,7 @@ def test_get_ifw_by_pct_app_number(
# Should call get_application_by_number first
assert mock_make_request.call_args_list[0] == call(
method="GET",
endpoint="api/v1/patent/applications/PCTUS24012345",
endpoint="api/v1/patent/applications/PCTUS2412345",
response_class=PatentDataResponse,
)
assert result.application_number_text == mock_patent_file_wrapper.application_number_text
Expand All @@ -1348,7 +1348,7 @@ def test_get_ifw_by_short_pct_app_number(
"""Test PCT application number sanitization with 2-digit year format (US24 vs US2024).

Verifies that PCT numbers with short year format (PCT/US24/012345) are correctly
sanitized to PCTUS24012345 before making API request.
sanitized to PCTUS2412345 before making API request.

Note: This will trigger a data mismatch warning because the mock_patent_file_wrapper
has application_number_text='12345678' but we're requesting a PCT number.
Expand All @@ -1369,7 +1369,7 @@ def test_get_ifw_by_short_pct_app_number(
# Should call get_application_by_number first
assert mock_make_request.call_args_list[0] == call(
method="GET",
endpoint="api/v1/patent/applications/PCTUS24012345",
endpoint="api/v1/patent/applications/PCTUS2412345",
response_class=PatentDataResponse,
)
assert result.application_number_text == mock_patent_file_wrapper.application_number_text
Expand Down Expand Up @@ -1550,6 +1550,166 @@ def test_get_ifw_prioritizes_first_parameter(
assert isinstance(result.document_bag, DocumentBag)


class TestGetPatent:
"""Tests for the get_patent convenience method."""

def test_get_patent_found(
self,
client_with_mocked_request: tuple[PatentDataClient, MagicMock],
mock_patent_file_wrapper: PatentFileWrapper,
) -> None:
"""Test get_patent returns wrapper when patent is found."""
client, mock_make_request = client_with_mocked_request
mock_make_request.return_value = PatentDataResponse(
count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]
)

patent_num = "11000000"
result = client.get_patent(patent_num)

assert mock_make_request.call_args == call(
method="GET",
endpoint="api/v1/patent/applications/search",
params={
"q": f"applicationMetaData.patentNumber:{patent_num}",
"limit": 1,
"offset": 0,
},
response_class=PatentDataResponse,
)
assert result is not None
assert result.application_number_text == mock_patent_file_wrapper.application_number_text

def test_get_patent_not_found(
self,
client_with_mocked_request: tuple[PatentDataClient, MagicMock],
) -> None:
"""Test get_patent returns None when patent is not found."""
client, mock_make_request = client_with_mocked_request
mock_make_request.return_value = PatentDataResponse(
count=0, patent_file_wrapper_data_bag=[]
)

result = client.get_patent("nonexistent")
assert result is None


class TestGetPublication:
"""Tests for the get_publication convenience method."""

def test_get_publication_found(
self,
client_with_mocked_request: tuple[PatentDataClient, MagicMock],
mock_patent_file_wrapper: PatentFileWrapper,
) -> None:
"""Test get_publication returns wrapper when publication is found."""
client, mock_make_request = client_with_mocked_request
mock_make_request.return_value = PatentDataResponse(
count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]
)

pub_num = "20230123456"
result = client.get_publication(pub_num)

assert mock_make_request.call_args == call(
method="GET",
endpoint="api/v1/patent/applications/search",
params={
"q": f"applicationMetaData.earliestPublicationNumber:{pub_num}",
"limit": 1,
"offset": 0,
},
response_class=PatentDataResponse,
)
assert result is not None
assert result.application_number_text == mock_patent_file_wrapper.application_number_text

def test_get_publication_not_found(
self,
client_with_mocked_request: tuple[PatentDataClient, MagicMock],
) -> None:
"""Test get_publication returns None when publication is not found."""
client, mock_make_request = client_with_mocked_request
mock_make_request.return_value = PatentDataResponse(
count=0, patent_file_wrapper_data_bag=[]
)

result = client.get_publication("nonexistent")
assert result is None


class TestGetPCT:
"""Tests for the get_pct convenience method."""

@pytest.fixture
def mock_pct_file_wrapper(self) -> PatentFileWrapper:
"""Provides a mock PatentFileWrapper with a sanitized PCT application number."""
return PatentFileWrapper(application_number_text="PCTUS2412345")

def test_get_pct_with_app_number(
self,
client_with_mocked_request: tuple[PatentDataClient, MagicMock],
mock_pct_file_wrapper: PatentFileWrapper,
) -> None:
"""Test get_pct with PCT application number uses direct lookup."""
client, mock_make_request = client_with_mocked_request
mock_make_request.return_value = PatentDataResponse(
count=1, patent_file_wrapper_data_bag=[mock_pct_file_wrapper]
)

pct_app = "PCT/US2024/012345"
result = client.get_pct(pct_app)

# Should call get_application_by_number (direct lookup)
assert mock_make_request.call_args == call(
method="GET",
endpoint="api/v1/patent/applications/PCTUS2412345",
response_class=PatentDataResponse,
)
assert result is not None
assert result.application_number_text == mock_pct_file_wrapper.application_number_text

def test_get_pct_with_pub_number(
self,
client_with_mocked_request: tuple[PatentDataClient, MagicMock],
mock_patent_file_wrapper: PatentFileWrapper,
) -> None:
"""Test get_pct with PCT publication number uses search."""
client, mock_make_request = client_with_mocked_request
mock_make_request.return_value = PatentDataResponse(
count=1, patent_file_wrapper_data_bag=[mock_patent_file_wrapper]
)

pct_pub = "WO2024012345A1"
result = client.get_pct(pct_pub)

assert mock_make_request.call_args == call(
method="GET",
endpoint="api/v1/patent/applications/search",
params={
"q": f"applicationMetaData.pctPublicationNumber:{pct_pub}",
"limit": 1,
"offset": 0,
},
response_class=PatentDataResponse,
)
assert result is not None
assert result.application_number_text == mock_patent_file_wrapper.application_number_text

def test_get_pct_not_found(
self,
client_with_mocked_request: tuple[PatentDataClient, MagicMock],
) -> None:
"""Test get_pct returns None when PCT number is not found."""
client, mock_make_request = client_with_mocked_request
mock_make_request.return_value = PatentDataResponse(
count=0, patent_file_wrapper_data_bag=[]
)

result = client.get_pct("WO9999999999")
assert result is None


class TestDownloadArchive:
"""Tests for downloading archive files."""

Expand Down
49 changes: 49 additions & 0 deletions tests/integration/test_patent_data_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,3 +857,52 @@ def test_invalid_application_number_handling(
)
except Exception as e:
pytest.fail(f"Unexpected exception for invalid app number: {e}")


class TestHelperMethodsIntegration:
"""Integration tests for get_patent, get_publication, and get_pct helper methods."""

KNOWN_PATENT_NUMBER = "12565253"
KNOWN_PUBLICATION_NUMBER = "US20260054762A1"
KNOWN_PCT_APP_NUMBER = "PCT/US2025/047756"
KNOWN_PCT_PUB_NUMBER = "WO 2026/044302"

def test_get_patent(self, patent_data_client: PatentDataClient) -> None:
"""Test get_patent returns a PatentFileWrapper for a known patent number."""
result = patent_data_client.get_patent(self.KNOWN_PATENT_NUMBER)

assert isinstance(result, PatentFileWrapper)
assert result.application_number_text == "19378371"
assert result.application_meta_data is not None
assert isinstance(result.application_meta_data, ApplicationMetaData)

def test_get_publication(self, patent_data_client: PatentDataClient) -> None:
"""Test get_publication returns a PatentFileWrapper for a known publication number."""
result = patent_data_client.get_publication(self.KNOWN_PUBLICATION_NUMBER)

assert isinstance(result, PatentFileWrapper)
assert result.application_number_text == "19378371"
assert result.application_meta_data is not None
assert isinstance(result.application_meta_data, ApplicationMetaData)

def test_get_pct_with_app_number(
self, patent_data_client: PatentDataClient
) -> None:
"""Test get_pct with a PCT application number (direct lookup)."""
result = patent_data_client.get_pct(self.KNOWN_PCT_APP_NUMBER)

assert isinstance(result, PatentFileWrapper)
assert result.application_number_text == "PCTUS2547756"
assert result.application_meta_data is not None
assert result.application_meta_data.pct_publication_number == "WO2026044302"

def test_get_pct_with_pub_number(
self, patent_data_client: PatentDataClient
) -> None:
"""Test get_pct with a PCT publication number."""
result = patent_data_client.get_pct(self.KNOWN_PCT_PUB_NUMBER)

assert isinstance(result, PatentFileWrapper)
assert result.application_number_text == "PCTUS2547756"
assert result.application_meta_data is not None
assert result.application_meta_data.pct_publication_number == "WO2026044302"
Loading