From 4fedf9f3aa963e6395375929b7656356ddd544fd Mon Sep 17 00:00:00 2001 From: Andrew <3300522+dpieski@users.noreply.github.com> Date: Fri, 27 Mar 2026 09:14:21 -0500 Subject: [PATCH 1/5] feat: init enriched citations --- src/pyUSPTO/__init__.py | 17 +- src/pyUSPTO/clients/__init__.py | 2 + src/pyUSPTO/clients/enriched_citations.py | 254 +++++++++++ src/pyUSPTO/config.py | 6 + src/pyUSPTO/models/__init__.py | 11 + src/pyUSPTO/models/enriched_citations.py | 356 +++++++++++++++ .../clients/test_enriched_citation_clients.py | 394 ++++++++++++++++ .../test_enriched_citations_integration.py | 85 ++++ tests/models/test_enriched_citation_models.py | 420 ++++++++++++++++++ tests/test_config.py | 7 + 10 files changed, 1550 insertions(+), 2 deletions(-) create mode 100644 src/pyUSPTO/clients/enriched_citations.py create mode 100644 src/pyUSPTO/models/enriched_citations.py create mode 100644 tests/clients/test_enriched_citation_clients.py create mode 100644 tests/integration/test_enriched_citations_integration.py create mode 100644 tests/models/test_enriched_citation_models.py diff --git a/src/pyUSPTO/__init__.py b/src/pyUSPTO/__init__.py index 065374d..14a5608 100644 --- a/src/pyUSPTO/__init__.py +++ b/src/pyUSPTO/__init__.py @@ -12,6 +12,7 @@ pass from pyUSPTO.clients.bulk_data import BulkDataClient +from pyUSPTO.clients.enriched_citations import EnrichedCitationsClient from pyUSPTO.clients.patent_data import PatentDataClient from pyUSPTO.clients.petition_decisions import FinalPetitionDecisionsClient from pyUSPTO.clients.ptab_appeals import PTABAppealsClient @@ -28,14 +29,20 @@ USPTOTimeout, ) from pyUSPTO.http_config import HTTPConfig - -# Import model implementations from pyUSPTO.models.bulk_data import ( BulkDataProduct, BulkDataResponse, FileData, ProductFileBag, ) + +# Import model implementations +from pyUSPTO.models.enriched_citations import ( + CitationCategoryCode, + EnrichedCitation, + EnrichedCitationFieldsResponse, + EnrichedCitationResponse, +) from pyUSPTO.models.patent_data import ( ApplicationContinuityData, PatentDataResponse, @@ -82,6 +89,12 @@ "USPTOTimezoneWarning", "USPTOEnumParseWarning", "USPTODataMismatchWarning", + # Enriched Citations API + "EnrichedCitationsClient", + "CitationCategoryCode", + "EnrichedCitation", + "EnrichedCitationResponse", + "EnrichedCitationFieldsResponse", # Bulk Data API "BulkDataClient", "BulkDataResponse", diff --git a/src/pyUSPTO/clients/__init__.py b/src/pyUSPTO/clients/__init__.py index 2aab87b..dc2810d 100644 --- a/src/pyUSPTO/clients/__init__.py +++ b/src/pyUSPTO/clients/__init__.py @@ -4,6 +4,7 @@ """ from pyUSPTO.clients.bulk_data import BulkDataClient +from pyUSPTO.clients.enriched_citations import EnrichedCitationsClient from pyUSPTO.clients.patent_data import PatentDataClient from pyUSPTO.clients.petition_decisions import FinalPetitionDecisionsClient from pyUSPTO.clients.ptab_appeals import PTABAppealsClient @@ -12,6 +13,7 @@ __all__ = [ "BulkDataClient", + "EnrichedCitationsClient", "PatentDataClient", "FinalPetitionDecisionsClient", "PTABTrialsClient", diff --git a/src/pyUSPTO/clients/enriched_citations.py b/src/pyUSPTO/clients/enriched_citations.py new file mode 100644 index 0000000..be4d3db --- /dev/null +++ b/src/pyUSPTO/clients/enriched_citations.py @@ -0,0 +1,254 @@ +"""clients.enriched_citations - Client for USPTO Enriched Citations API. + +This module provides a client for interacting with the USPTO Enriched Cited +Reference Metadata API (v3). It allows users to search for enriched citation +data extracted from patent office actions using AI/NLP algorithms. +""" + +from collections.abc import Iterator +from typing import Any + +from pyUSPTO.clients.base import BaseUSPTOClient +from pyUSPTO.config import USPTOConfig +from pyUSPTO.models.enriched_citations import ( + EnrichedCitation, + EnrichedCitationFieldsResponse, + EnrichedCitationResponse, +) + + +class EnrichedCitationsClient(BaseUSPTOClient[EnrichedCitationResponse]): + """Client for interacting with the USPTO Enriched Citations API. + + This client provides methods to search for enriched citation data from + office actions mailed from October 1, 2017 to 30 days prior to the current + date. The data is extracted using AI/NLP algorithms and includes bibliographic + information, rejected claims, and passage locations from cited prior art. + """ + + ENDPOINTS = { + "search_citations": "api/v1/patent/oa/enriched_cited_reference_metadata/v3/records", + "get_fields": "api/v1/patent/oa/enriched_cited_reference_metadata/v3/fields", + } + + def __init__( + self, + config: USPTOConfig | None = None, + base_url: str | None = None, + ): + """Initialize the EnrichedCitationsClient. + + Args: + config: USPTOConfig instance containing API key and settings. If not provided, + creates config from environment variables (requires USPTO_API_KEY). + base_url: Optional base URL override for the USPTO Enriched Citations API. + If not provided, uses config.enriched_citations_base_url or default. + """ + # Use provided config or create from environment + if config is None: + self.config = USPTOConfig.from_env() + else: + self.config = config + + # Determine effective base URL + effective_base_url = base_url or self.config.enriched_citations_base_url + + # Initialize base client + super().__init__( + base_url=effective_base_url, + config=self.config, + ) + + def search_citations( + self, + query: str | None = None, + sort: str | None = None, + offset: int | None = 0, + limit: int | None = 25, + post_body: dict[str, Any] | None = None, + # Convenience query parameters + patent_application_number_q: str | None = None, + cited_document_identifier_q: str | None = None, + office_action_category_q: str | None = None, + citation_category_code_q: str | None = None, + tech_center_q: str | None = None, + group_art_unit_number_q: str | None = None, + examiner_cited_q: bool | None = None, + office_action_date_from_q: str | None = None, + office_action_date_to_q: str | None = None, + additional_query_params: dict[str, Any] | None = None, + ) -> EnrichedCitationResponse: + """Return enriched citations matching the given criteria. + + This method performs a POST request to search for enriched citation records. + You can provide either a direct post_body, a query string, or use convenience + parameters that will be automatically combined into a query. + + Args: + query: Direct query string in USPTO search syntax. + sort: Sort order for results. + offset: Number of records to skip (pagination). + limit: Maximum number of records to return. + post_body: Optional POST body for complex queries. When provided, + all other parameters are ignored. + patent_application_number_q: Filter by patent application number. + cited_document_identifier_q: Filter by cited document identifier. + office_action_category_q: Filter by office action category (e.g., "CTNF"). + citation_category_code_q: Filter by citation category code (e.g., "X", "Y"). + tech_center_q: Filter by technology center code. + group_art_unit_number_q: Filter by group art unit number. + examiner_cited_q: Filter by whether the examiner cited the reference. + office_action_date_from_q: Filter from this date (YYYY-MM-DD). + office_action_date_to_q: Filter to this date (YYYY-MM-DD). + additional_query_params: Additional custom query parameters. + + Returns: + EnrichedCitationResponse: Response containing matching enriched citations. + + Examples: + # Search with direct query + >>> response = client.search_citations( + ... query="patentApplicationNumber:15739603" + ... ) + + # Search with convenience parameters + >>> response = client.search_citations( + ... tech_center_q="2800", + ... citation_category_code_q="X", + ... limit=50, + ... ) + + # Search with POST body + >>> response = client.search_citations( + ... post_body={"q": "techCenter:2800", "rows": 100} + ... ) + """ + endpoint = self.ENDPOINTS["search_citations"] + + if post_body is not None: + # POST request with user-provided body + return self._get_model( + method="POST", + endpoint=endpoint, + response_class=EnrichedCitationResponse, + json_data=post_body, + params=additional_query_params, + ) + + # Build POST body from parameters + body: dict[str, Any] = {} + + # Build query from convenience parameters + final_q = query + if final_q is None: + q_parts = [] + if patent_application_number_q: + q_parts.append(f"patentApplicationNumber:{patent_application_number_q}") + if cited_document_identifier_q: + v = ( + f'"{cited_document_identifier_q}"' + if " " in cited_document_identifier_q + else cited_document_identifier_q + ) + q_parts.append(f"citedDocumentIdentifier:{v}") + if office_action_category_q: + q_parts.append(f"officeActionCategory:{office_action_category_q}") + if citation_category_code_q: + q_parts.append(f"citationCategoryCode:{citation_category_code_q}") + if tech_center_q: + q_parts.append(f"techCenter:{tech_center_q}") + if group_art_unit_number_q: + q_parts.append(f"groupArtUnitNumber:{group_art_unit_number_q}") + if examiner_cited_q is not None: + q_parts.append( + f"examinerCitedReferenceIndicator:{str(examiner_cited_q).lower()}" + ) + + # Handle office action date range + if office_action_date_from_q and office_action_date_to_q: + q_parts.append( + f"officeActionDate:[{office_action_date_from_q} TO {office_action_date_to_q}]" + ) + elif office_action_date_from_q: + q_parts.append(f"officeActionDate:>={office_action_date_from_q}") + elif office_action_date_to_q: + q_parts.append(f"officeActionDate:<={office_action_date_to_q}") + + if q_parts: + final_q = " AND ".join(q_parts) + + if final_q is not None: + body["q"] = final_q + if sort is not None: + body["sort"] = sort + if offset is not None: + body["offset"] = offset + if limit is not None: + body["limit"] = limit + + if additional_query_params: + body.update(additional_query_params) + + return self._get_model( + method="POST", + endpoint=endpoint, + response_class=EnrichedCitationResponse, + json_data=body, + ) + + def get_fields(self) -> EnrichedCitationFieldsResponse: + """Retrieve available fields and API metadata for the Enriched Citations API. + + Returns: + EnrichedCitationFieldsResponse: API metadata including available field + names and last data update timestamp. + + Examples: + >>> fields_response = client.get_fields() + >>> print(fields_response.fields) + ['officeActionDate', 'relatedClaimNumberText', ...] + >>> print(fields_response.last_data_updated_date) + '2024-07-11 11:33:41.0' + """ + endpoint = self.ENDPOINTS["get_fields"] + return self._get_model( + method="GET", + endpoint=endpoint, + response_class=EnrichedCitationFieldsResponse, + ) + + def paginate_citations( + self, post_body: dict[str, Any] | None = None, **kwargs: Any + ) -> Iterator[EnrichedCitation]: + """Provide an iterator to paginate through enriched citation search results. + + This method simplifies fetching all enriched citations matching a search query + by automatically handling pagination. + + The offset parameter is managed by the pagination logic; setting it directly + in kwargs or post_body will raise a ValueError. + + Args: + post_body: Optional POST body for complex search queries. + **kwargs: Keyword arguments passed to search_citations. + + Returns: + Iterator[EnrichedCitation]: An iterator yielding EnrichedCitation objects. + + Examples: + # Paginate through all citations for a tech center + >>> for citation in client.paginate_citations(tech_center_q="2800"): + ... print(f"{citation.patent_application_number}: {citation.citation_category_code}") + + # Paginate with POST body + >>> for citation in client.paginate_citations( + ... post_body={"q": "techCenter:2800", "limit": 50} + ... ): + ... process_citation(citation) + """ + return self.paginate_results( + method_name="search_citations", + response_container_attr="docs", + post_body=post_body, + **kwargs, + ) diff --git a/src/pyUSPTO/config.py b/src/pyUSPTO/config.py index 0fb703b..6972d91 100644 --- a/src/pyUSPTO/config.py +++ b/src/pyUSPTO/config.py @@ -29,6 +29,7 @@ def __init__( patent_data_base_url: str = DEFAULT_BASE_URL, petition_decisions_base_url: str = DEFAULT_BASE_URL, ptab_base_url: str = DEFAULT_BASE_URL, + enriched_citations_base_url: str = DEFAULT_BASE_URL, http_config: HTTPConfig | None = None, include_raw_data: bool = False, ): @@ -40,6 +41,7 @@ def __init__( patent_data_base_url: Base URL for the Patent Data API petition_decisions_base_url: Base URL for the Final Petition Decisions API ptab_base_url: Base URL for the PTAB (Patent Trial and Appeal Board) API + enriched_citations_base_url: Base URL for the Enriched Citations API http_config: Optional HTTPConfig for request handling (uses defaults if None) include_raw_data: If True, store raw JSON in response objects for debugging (default: False) """ @@ -51,6 +53,7 @@ def __init__( self.patent_data_base_url = patent_data_base_url self.petition_decisions_base_url = petition_decisions_base_url self.ptab_base_url = ptab_base_url + self.enriched_citations_base_url = enriched_citations_base_url # Use provided HTTPConfig or create default self.http_config = http_config if http_config is not None else HTTPConfig() @@ -80,6 +83,9 @@ def from_env(cls) -> "USPTOConfig": "USPTO_PETITION_DECISIONS_BASE_URL", DEFAULT_BASE_URL ), ptab_base_url=os.environ.get("USPTO_PTAB_BASE_URL", DEFAULT_BASE_URL), + enriched_citations_base_url=os.environ.get( + "USPTO_ENRICHED_CITATIONS_BASE_URL", DEFAULT_BASE_URL + ), # Also read HTTP config from environment http_config=HTTPConfig.from_env(), ) diff --git a/src/pyUSPTO/models/__init__.py b/src/pyUSPTO/models/__init__.py index 2210a6b..aef6888 100644 --- a/src/pyUSPTO/models/__init__.py +++ b/src/pyUSPTO/models/__init__.py @@ -10,6 +10,12 @@ FileData, ProductFileBag, ) +from pyUSPTO.models.enriched_citations import ( + CitationCategoryCode, + EnrichedCitation, + EnrichedCitationFieldsResponse, + EnrichedCitationResponse, +) from pyUSPTO.models.petition_decisions import ( DocumentDownloadOption, PetitionDecision, @@ -28,6 +34,11 @@ __all__ = [ "FromDictProtocol", + # Enriched Citations Models + "CitationCategoryCode", + "EnrichedCitation", + "EnrichedCitationResponse", + "EnrichedCitationFieldsResponse", "FileData", "ProductFileBag", "BulkDataProduct", diff --git a/src/pyUSPTO/models/enriched_citations.py b/src/pyUSPTO/models/enriched_citations.py new file mode 100644 index 0000000..ccd72fc --- /dev/null +++ b/src/pyUSPTO/models/enriched_citations.py @@ -0,0 +1,356 @@ +"""models.enriched_citations - Data models for USPTO Enriched Citations API. + +This module provides data models for representing responses from the USPTO +Enriched Cited Reference Metadata API (v3). These models cover enriched citation +records extracted from patent office actions using AI/NLP algorithms. +""" + +import json +from dataclasses import dataclass, field +from datetime import datetime +from enum import Enum +from typing import Any + +from pyUSPTO.models.utils import ( + parse_to_datetime_utc, + serialize_datetime_as_naive, +) + + +# --- Enums for Categorical Data --- +class CitationCategoryCode(Enum): + """Citation category codes indicating the relevance of cited documents. + + These are standard patent citation categories used in search reports: + X - Particularly relevant if taken alone + Y - Particularly relevant if combined with another document + A - Technological background + E - Earlier patent document published on or after the filing date + L - Document cited for other reasons + O - Non-written disclosure + T - Theory or principle underlying the invention + P - Intermediate document + & - Member of the same patent family + D - Document cited in the application + """ + + X = "X" + Y = "Y" + A = "A" + E = "E" + L = "L" + O = "O" # noqa: E741 + T = "T" + P = "P" + AMPERSAND = "&" + D = "D" + + @classmethod + def _missing_(cls, value: Any) -> "CitationCategoryCode": + """Handle case-insensitive lookup and ampersand alias.""" + if isinstance(value, str): + val_upper = value.upper() + for member in cls: + if member.value.upper() == val_upper: + return member + if value == "&": + return cls.AMPERSAND + raise ValueError(f"{value!r} is not a valid {cls.__name__}") + + +# --- Data Models --- +@dataclass(frozen=True) +class EnrichedCitation: + """Represent a single enriched citation record from an office action. + + Attributes: + id: Unique identifier for this citation record. + patent_application_number: The application number (series code + serial number). + cited_document_identifier: Identification of the cited patent document. + publication_number: Publication number of the cited document. + kind_code: Kind code of the cited document (e.g., "A1", "B2"). + country_code: Country code of the cited document. + inventor_name_text: Inventor or owner name from the cited document. + office_action_date: The date the office action was recorded. + office_action_category: Category of the office action (e.g., "CTNF", "CTFR"). + citation_category_code: Relevance category code (X, Y, A, E, L, O, T, P, &, D). + related_claim_number_text: Comma-separated claim numbers related to this citation. + examiner_cited_reference_indicator: Whether the reference was cited by the examiner (Form PTO-892). + applicant_cited_examiner_reference_indicator: Whether the citation was from Form PTO-1449. + npl_indicator: Whether this is a non-patent literature citation. + work_group_number: The work group number. + group_art_unit_number: Four-digit art unit code for examiner assignment. + tech_center: Technology center code (first two digits of art unit). + quality_summary_text: Quality summary of the review status. + passage_location_text: Pipe-delimited passage locations related to the citation. + obsolete_document_identifier: Legacy document identifier from the IFW repository. + create_user_identifier: Job identifier that created this record. + create_date_time: Date and time the record was inserted in the database. + """ + + id: str = "" + patent_application_number: str | None = None + cited_document_identifier: str | None = None + publication_number: str | None = None + kind_code: str | None = None + country_code: str | None = None + inventor_name_text: str | None = None + office_action_date: datetime | None = None + office_action_category: str | None = None + citation_category_code: str | None = None + related_claim_number_text: str | None = None + examiner_cited_reference_indicator: bool | None = None + applicant_cited_examiner_reference_indicator: bool | None = None + npl_indicator: bool | None = None + work_group_number: str | None = None + group_art_unit_number: str | None = None + tech_center: str | None = None + quality_summary_text: str | None = None + passage_location_text: list[str] = field(default_factory=list) + obsolete_document_identifier: str | None = None + create_user_identifier: str | None = None + create_date_time: datetime | None = None + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "EnrichedCitation": + """Create an EnrichedCitation instance from a dictionary. + + Args: + data: Dictionary containing enriched citation data from API response. + + Returns: + EnrichedCitation: An instance of EnrichedCitation. + """ + # Defensive handling for passage_location_text + passage_location = data.get("passageLocationText", []) + if not isinstance(passage_location, list): + passage_location = [] + + return cls( + id=data.get("id", ""), + patent_application_number=data.get("patentApplicationNumber"), + cited_document_identifier=data.get("citedDocumentIdentifier"), + publication_number=data.get("publicationNumber"), + kind_code=data.get("kindCode"), + country_code=data.get("countryCode"), + inventor_name_text=data.get("inventorNameText"), + office_action_date=parse_to_datetime_utc(data.get("officeActionDate")), + office_action_category=data.get("officeActionCategory"), + citation_category_code=data.get("citationCategoryCode"), + related_claim_number_text=data.get("relatedClaimNumberText"), + examiner_cited_reference_indicator=data.get( + "examinerCitedReferenceIndicator" + ), + applicant_cited_examiner_reference_indicator=data.get( + "applicantCitedExaminerReferenceIndicator" + ), + npl_indicator=data.get("nplIndicator"), + work_group_number=data.get("workGroupNumber"), + group_art_unit_number=data.get("groupArtUnitNumber"), + tech_center=data.get("techCenter"), + quality_summary_text=data.get("qualitySummaryText"), + passage_location_text=passage_location, + obsolete_document_identifier=data.get("obsoleteDocumentIdentifier"), + create_user_identifier=data.get("createUserIdentifier"), + create_date_time=parse_to_datetime_utc(data.get("createDateTime")), + ) + + def to_dict(self) -> dict[str, Any]: + """Convert the EnrichedCitation instance to a dictionary. + + Returns: + Dict[str, Any]: Dictionary representation with camelCase keys. + """ + d = { + "id": self.id, + "patentApplicationNumber": self.patent_application_number, + "citedDocumentIdentifier": self.cited_document_identifier, + "publicationNumber": self.publication_number, + "kindCode": self.kind_code, + "countryCode": self.country_code, + "inventorNameText": self.inventor_name_text, + "officeActionDate": ( + serialize_datetime_as_naive(self.office_action_date) + if self.office_action_date + else None + ), + "officeActionCategory": self.office_action_category, + "citationCategoryCode": self.citation_category_code, + "relatedClaimNumberText": self.related_claim_number_text, + "examinerCitedReferenceIndicator": self.examiner_cited_reference_indicator, + "applicantCitedExaminerReferenceIndicator": self.applicant_cited_examiner_reference_indicator, + "nplIndicator": self.npl_indicator, + "workGroupNumber": self.work_group_number, + "groupArtUnitNumber": self.group_art_unit_number, + "techCenter": self.tech_center, + "qualitySummaryText": self.quality_summary_text, + "passageLocationText": self.passage_location_text, + "obsoleteDocumentIdentifier": self.obsolete_document_identifier, + "createUserIdentifier": self.create_user_identifier, + "createDateTime": ( + serialize_datetime_as_naive(self.create_date_time) + if self.create_date_time + else None + ), + } + return { + k: v + for k, v in d.items() + if v is not None and (not isinstance(v, list) or v) + } + + +@dataclass(frozen=True) +class EnrichedCitationResponse: + """Response from the Enriched Citations API search endpoint. + + The API returns a Solr-style response with `start`, `numFound`, and `docs`. + The outer envelope key is `"response"`. + + Attributes: + num_found: Total number of matching records. + start: The offset of the first result in this page. + docs: List of enriched citation records in this page. + raw_data: Optional raw JSON data from the API response (for debugging). + """ + + num_found: int = 0 + start: int = 0 + docs: list[EnrichedCitation] = field(default_factory=list) + raw_data: str | None = field(default=None, compare=False, repr=False) + + @property + def count(self) -> int: + """Return total result count for pagination compatibility.""" + return self.num_found + + @classmethod + def from_dict( + cls, data: dict[str, Any], include_raw_data: bool = False + ) -> "EnrichedCitationResponse": + """Create an EnrichedCitationResponse instance from a dictionary. + + Handles both the raw API envelope (``{"response": {...}}``) and + a pre-unwrapped dictionary. + + Args: + data: Dictionary containing API response data. + include_raw_data: If True, store the raw JSON for debugging. + + Returns: + EnrichedCitationResponse: An instance of EnrichedCitationResponse. + """ + # Unwrap the outer "response" envelope if present + inner = data.get("response", data) + + # Parse citation docs + docs_data = inner.get("docs", []) + docs = ( + [ + EnrichedCitation.from_dict(doc) + for doc in docs_data + if isinstance(doc, dict) + ] + if isinstance(docs_data, list) + else [] + ) + + return cls( + num_found=inner.get("numFound", 0), + start=inner.get("start", 0), + docs=docs, + raw_data=json.dumps(data) if include_raw_data else None, + ) + + def to_dict(self) -> dict[str, Any]: + """Convert the EnrichedCitationResponse instance to a dictionary. + + Returns: + Dict[str, Any]: Dictionary representation with camelCase keys, + wrapped in the ``"response"`` envelope matching the API format. + """ + return { + "response": { + "numFound": self.num_found, + "start": self.start, + "docs": [doc.to_dict() for doc in self.docs], + } + } + + +@dataclass(frozen=True) +class EnrichedCitationFieldsResponse: + """Response from the Enriched Citations API fields endpoint. + + Contains metadata about the API including available field names + and the last data update timestamp. + + Attributes: + api_key: The dataset key (e.g., "enriched_cited_reference_metadata"). + api_version_number: API version (e.g., "v3"). + api_url: The URL of this fields endpoint. + api_documentation_url: URL to the Swagger documentation. + api_status: Publication status (e.g., "PUBLISHED"). + field_count: Number of available fields. + fields: List of available field names. + last_data_updated_date: Timestamp of the last data update (non-standard format). + """ + + api_key: str | None = None + api_version_number: str | None = None + api_url: str | None = None + api_documentation_url: str | None = None + api_status: str | None = None + field_count: int = 0 + fields: list[str] = field(default_factory=list) + last_data_updated_date: str | None = None + + @classmethod + def from_dict( + cls, data: dict[str, Any], include_raw_data: bool = False + ) -> "EnrichedCitationFieldsResponse": + """Create an EnrichedCitationFieldsResponse instance from a dictionary. + + Args: + data: Dictionary containing API response data. + include_raw_data: Unused. Present for FromDictProtocol conformance. + + Returns: + EnrichedCitationFieldsResponse: An instance of EnrichedCitationFieldsResponse. + """ + fields_data = data.get("fields", []) + if not isinstance(fields_data, list): + fields_data = [] + + return cls( + api_key=data.get("apiKey"), + api_version_number=data.get("apiVersionNumber"), + api_url=data.get("apiUrl"), + api_documentation_url=data.get("apiDocumentationUrl"), + api_status=data.get("apiStatus"), + field_count=data.get("fieldCount", 0), + fields=fields_data, + last_data_updated_date=data.get("lastDataUpdatedDate"), + ) + + def to_dict(self) -> dict[str, Any]: + """Convert the EnrichedCitationFieldsResponse instance to a dictionary. + + Returns: + Dict[str, Any]: Dictionary representation with camelCase keys. + """ + d = { + "apiKey": self.api_key, + "apiVersionNumber": self.api_version_number, + "apiUrl": self.api_url, + "apiDocumentationUrl": self.api_documentation_url, + "apiStatus": self.api_status, + "fieldCount": self.field_count, + "fields": self.fields, + "lastDataUpdatedDate": self.last_data_updated_date, + } + return { + k: v + for k, v in d.items() + if v is not None and (not isinstance(v, list) or v) + } diff --git a/tests/clients/test_enriched_citation_clients.py b/tests/clients/test_enriched_citation_clients.py new file mode 100644 index 0000000..f5d2ae6 --- /dev/null +++ b/tests/clients/test_enriched_citation_clients.py @@ -0,0 +1,394 @@ +"""Tests for the pyUSPTO.clients.enriched_citations.EnrichedCitationsClient. + +This module contains comprehensive tests for initialization, search functionality, +field retrieval, and pagination. +""" + +from collections.abc import Iterator +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest + +from pyUSPTO.clients.enriched_citations import EnrichedCitationsClient +from pyUSPTO.config import USPTOConfig +from pyUSPTO.models.enriched_citations import ( + EnrichedCitation, + EnrichedCitationFieldsResponse, + EnrichedCitationResponse, +) + + +# --- Fixtures --- +@pytest.fixture +def api_key_fixture() -> str: + """Provides a test API key.""" + return "test_key" + + +@pytest.fixture +def uspto_config(api_key_fixture: str) -> USPTOConfig: + """Provides a USPTOConfig instance with test API key.""" + return USPTOConfig(api_key=api_key_fixture) + + +@pytest.fixture +def enriched_client(uspto_config: USPTOConfig) -> EnrichedCitationsClient: + """Provides an EnrichedCitationsClient instance.""" + return EnrichedCitationsClient(config=uspto_config) + + +@pytest.fixture +def mock_enriched_citation() -> EnrichedCitation: + """Provides a mock EnrichedCitation instance.""" + return EnrichedCitation( + id="d7e95803517f677b3875dc476a61a817", + patent_application_number="15739603", + cited_document_identifier="US 20190165601 A1", + citation_category_code="Y", + tech_center="2800", + examiner_cited_reference_indicator=True, + ) + + +@pytest.fixture +def mock_enriched_response_with_data( + mock_enriched_citation: EnrichedCitation, +) -> EnrichedCitationResponse: + """Provides a mock EnrichedCitationResponse with data.""" + return EnrichedCitationResponse( + num_found=1, + start=0, + docs=[mock_enriched_citation], + ) + + +@pytest.fixture +def mock_enriched_response_empty() -> EnrichedCitationResponse: + """Provides an empty mock EnrichedCitationResponse.""" + return EnrichedCitationResponse(num_found=0, start=0, docs=[]) + + +@pytest.fixture +def client_with_mocked_request( + enriched_client: EnrichedCitationsClient, +) -> Iterator[tuple[EnrichedCitationsClient, MagicMock]]: + """Provides a client with mocked _get_model method.""" + with patch.object( + enriched_client, "_get_model", autospec=True + ) as mock_get_model: + yield enriched_client, mock_get_model + + +# --- Test Classes --- + + +class TestEnrichedCitationsClientInit: + """Tests for initialization of EnrichedCitationsClient.""" + + def test_init_with_config( + self, enriched_client: EnrichedCitationsClient, uspto_config: USPTOConfig + ) -> None: + """Test initialization with config.""" + assert enriched_client._api_key == uspto_config.api_key + assert enriched_client.base_url == "https://api.uspto.gov" + + def test_init_with_custom_base_url(self, uspto_config: USPTOConfig) -> None: + """Test initialization with custom base URL.""" + custom_url = "https://custom.api.test.com" + client = EnrichedCitationsClient(config=uspto_config, base_url=custom_url) + assert client.base_url == custom_url + + def test_init_with_config_base_url(self) -> None: + """Test initialization uses config's enriched_citations_base_url.""" + config_url = "https://config.api.test.com" + config = USPTOConfig( + api_key="config_key", enriched_citations_base_url=config_url + ) + client = EnrichedCitationsClient(config=config) + assert client._api_key == "config_key" + assert client.base_url == config_url + assert client.config is config + + def test_init_without_config(self, monkeypatch: Any) -> None: + """Test initialization without config uses environment.""" + monkeypatch.setenv("USPTO_API_KEY", "env_key") + client = EnrichedCitationsClient() + assert client.config.api_key == "env_key" + + +class TestEnrichedCitationsClientSearch: + """Tests for search_citations method.""" + + def test_search_with_post_body( + self, + client_with_mocked_request: tuple[EnrichedCitationsClient, MagicMock], + mock_enriched_response_with_data: EnrichedCitationResponse, + ) -> None: + """Test search with POST body passes it directly.""" + client, mock_get_model = client_with_mocked_request + mock_get_model.return_value = mock_enriched_response_with_data + + post_body = {"q": "techCenter:2800", "rows": 100} + result = client.search_citations(post_body=post_body) + + mock_get_model.assert_called_once_with( + method="POST", + endpoint="api/v1/patent/oa/enriched_cited_reference_metadata/v3/records", + json_data=post_body, + params=None, + response_class=EnrichedCitationResponse, + ) + assert result is mock_enriched_response_with_data + + def test_search_with_query( + self, + client_with_mocked_request: tuple[EnrichedCitationsClient, MagicMock], + mock_enriched_response_with_data: EnrichedCitationResponse, + ) -> None: + """Test search with direct query string.""" + client, mock_get_model = client_with_mocked_request + mock_get_model.return_value = mock_enriched_response_with_data + + result = client.search_citations(query="patentApplicationNumber:15739603") + + call_args = mock_get_model.call_args + json_data = call_args[1]["json_data"] + assert json_data["q"] == "patentApplicationNumber:15739603" + assert json_data["offset"] == 0 + assert json_data["limit"] == 25 + + def test_search_with_application_number( + self, + client_with_mocked_request: tuple[EnrichedCitationsClient, MagicMock], + mock_enriched_response_with_data: EnrichedCitationResponse, + ) -> None: + """Test search with patent_application_number_q convenience parameter.""" + client, mock_get_model = client_with_mocked_request + mock_get_model.return_value = mock_enriched_response_with_data + + client.search_citations(patent_application_number_q="15739603") + + call_args = mock_get_model.call_args + json_data = call_args[1]["json_data"] + assert "patentApplicationNumber:15739603" in json_data["q"] + + def test_search_with_cited_document_identifier( + self, + client_with_mocked_request: tuple[EnrichedCitationsClient, MagicMock], + mock_enriched_response_with_data: EnrichedCitationResponse, + ) -> None: + """Test search with cited_document_identifier_q (auto-quotes spaces).""" + client, mock_get_model = client_with_mocked_request + mock_get_model.return_value = mock_enriched_response_with_data + + client.search_citations(cited_document_identifier_q="US 20190165601 A1") + + call_args = mock_get_model.call_args + json_data = call_args[1]["json_data"] + assert 'citedDocumentIdentifier:"US 20190165601 A1"' in json_data["q"] + + def test_search_with_multiple_params( + self, + client_with_mocked_request: tuple[EnrichedCitationsClient, MagicMock], + mock_enriched_response_with_data: EnrichedCitationResponse, + ) -> None: + """Test search combining multiple convenience parameters.""" + client, mock_get_model = client_with_mocked_request + mock_get_model.return_value = mock_enriched_response_with_data + + client.search_citations( + tech_center_q="2800", + citation_category_code_q="X", + examiner_cited_q=True, + limit=50, + ) + + call_args = mock_get_model.call_args + json_data = call_args[1]["json_data"] + query = json_data["q"] + assert "techCenter:2800" in query + assert "citationCategoryCode:X" in query + assert "examinerCitedReferenceIndicator:true" in query + assert " AND " in query + assert json_data["limit"] == 50 + + def test_search_with_date_range( + self, + client_with_mocked_request: tuple[EnrichedCitationsClient, MagicMock], + mock_enriched_response_with_data: EnrichedCitationResponse, + ) -> None: + """Test search with office action date range.""" + client, mock_get_model = client_with_mocked_request + mock_get_model.return_value = mock_enriched_response_with_data + + client.search_citations( + office_action_date_from_q="2019-01-01", + office_action_date_to_q="2019-12-31", + ) + + call_args = mock_get_model.call_args + json_data = call_args[1]["json_data"] + assert "officeActionDate:[2019-01-01 TO 2019-12-31]" in json_data["q"] + + def test_search_with_date_from_only( + self, + client_with_mocked_request: tuple[EnrichedCitationsClient, MagicMock], + mock_enriched_response_with_data: EnrichedCitationResponse, + ) -> None: + """Test search with only a from date.""" + client, mock_get_model = client_with_mocked_request + mock_get_model.return_value = mock_enriched_response_with_data + + client.search_citations(office_action_date_from_q="2019-01-01") + + call_args = mock_get_model.call_args + json_data = call_args[1]["json_data"] + assert "officeActionDate:>=2019-01-01" in json_data["q"] + + def test_search_with_date_to_only( + self, + client_with_mocked_request: tuple[EnrichedCitationsClient, MagicMock], + mock_enriched_response_with_data: EnrichedCitationResponse, + ) -> None: + """Test search with only a to date.""" + client, mock_get_model = client_with_mocked_request + mock_get_model.return_value = mock_enriched_response_with_data + + client.search_citations(office_action_date_to_q="2019-12-31") + + call_args = mock_get_model.call_args + json_data = call_args[1]["json_data"] + assert "officeActionDate:<=2019-12-31" in json_data["q"] + + def test_search_default_offset_limit( + self, + client_with_mocked_request: tuple[EnrichedCitationsClient, MagicMock], + mock_enriched_response_with_data: EnrichedCitationResponse, + ) -> None: + """Test search applies default offset and limit.""" + client, mock_get_model = client_with_mocked_request + mock_get_model.return_value = mock_enriched_response_with_data + + client.search_citations(query="*:*") + + call_args = mock_get_model.call_args + json_data = call_args[1]["json_data"] + assert json_data["offset"] == 0 + assert json_data["limit"] == 25 + + def test_search_with_sort( + self, + client_with_mocked_request: tuple[EnrichedCitationsClient, MagicMock], + mock_enriched_response_with_data: EnrichedCitationResponse, + ) -> None: + """Test search with sort parameter.""" + client, mock_get_model = client_with_mocked_request + mock_get_model.return_value = mock_enriched_response_with_data + + client.search_citations(query="*:*", sort="officeActionDate desc") + + call_args = mock_get_model.call_args + json_data = call_args[1]["json_data"] + assert json_data["sort"] == "officeActionDate desc" + + def test_search_with_group_art_unit( + self, + client_with_mocked_request: tuple[EnrichedCitationsClient, MagicMock], + mock_enriched_response_with_data: EnrichedCitationResponse, + ) -> None: + """Test search with group_art_unit_number_q convenience parameter.""" + client, mock_get_model = client_with_mocked_request + mock_get_model.return_value = mock_enriched_response_with_data + + client.search_citations(group_art_unit_number_q="2837") + + call_args = mock_get_model.call_args + json_data = call_args[1]["json_data"] + assert "groupArtUnitNumber:2837" in json_data["q"] + + +class TestEnrichedCitationsClientGetFields: + """Tests for get_fields method.""" + + def test_get_fields( + self, + client_with_mocked_request: tuple[EnrichedCitationsClient, MagicMock], + ) -> None: + """Test get_fields sends a GET request.""" + client, mock_get_model = client_with_mocked_request + mock_response = EnrichedCitationFieldsResponse( + api_key="enriched_cited_reference_metadata", + api_version_number="v3", + field_count=22, + fields=["officeActionDate", "patentApplicationNumber"], + ) + mock_get_model.return_value = mock_response + + result = client.get_fields() + + mock_get_model.assert_called_once_with( + method="GET", + endpoint="api/v1/patent/oa/enriched_cited_reference_metadata/v3/fields", + response_class=EnrichedCitationFieldsResponse, + ) + assert result is mock_response + assert result.api_key == "enriched_cited_reference_metadata" + + +class TestEnrichedCitationsClientPaginate: + """Tests for paginate_citations method.""" + + def test_paginate_calls_paginate_results( + self, + enriched_client: EnrichedCitationsClient, + ) -> None: + """Test paginate_citations delegates to paginate_results.""" + with patch.object( + enriched_client, "paginate_results", autospec=True + ) as mock_paginate: + mock_paginate.return_value = iter([]) + + result = enriched_client.paginate_citations(tech_center_q="2800") + + mock_paginate.assert_called_once_with( + method_name="search_citations", + response_container_attr="docs", + post_body=None, + tech_center_q="2800", + ) + + def test_paginate_with_post_body( + self, + enriched_client: EnrichedCitationsClient, + ) -> None: + """Test paginate_citations passes post_body to paginate_results.""" + with patch.object( + enriched_client, "paginate_results", autospec=True + ) as mock_paginate: + mock_paginate.return_value = iter([]) + post_body = {"q": "techCenter:2800", "limit": 50} + + result = enriched_client.paginate_citations(post_body=post_body) + + mock_paginate.assert_called_once_with( + method_name="search_citations", + response_container_attr="docs", + post_body=post_body, + ) + + def test_paginate_yields_citations( + self, + enriched_client: EnrichedCitationsClient, + mock_enriched_citation: EnrichedCitation, + ) -> None: + """Test paginate_citations yields EnrichedCitation objects.""" + with patch.object( + enriched_client, "paginate_results", autospec=True + ) as mock_paginate: + mock_paginate.return_value = iter([mock_enriched_citation]) + + citations = list(enriched_client.paginate_citations(query="*:*")) + + assert len(citations) == 1 + assert citations[0] is mock_enriched_citation diff --git a/tests/integration/test_enriched_citations_integration.py b/tests/integration/test_enriched_citations_integration.py new file mode 100644 index 0000000..6f5cca6 --- /dev/null +++ b/tests/integration/test_enriched_citations_integration.py @@ -0,0 +1,85 @@ +""" +Integration tests for the USPTO Enriched Citations API client. + +This module contains integration tests that make real API calls to the USPTO Enriched Citations API. +These tests are optional and are skipped by default unless the ENABLE_INTEGRATION_TESTS +environment variable is set to 'true'. +""" + +import os + +import pytest + +from pyUSPTO.clients import EnrichedCitationsClient +from pyUSPTO.config import USPTOConfig +from pyUSPTO.models.enriched_citations import ( + EnrichedCitation, + EnrichedCitationFieldsResponse, + EnrichedCitationResponse, +) + +# Skip all tests in this module unless ENABLE_INTEGRATION_TESTS is set to 'true' +pytestmark = pytest.mark.skipif( + os.environ.get("ENABLE_INTEGRATION_TESTS", "").lower() != "true", + reason="Integration tests are disabled. Set ENABLE_INTEGRATION_TESTS=true to enable.", +) + + +@pytest.fixture(scope="module") +def enriched_citations_client(config: USPTOConfig) -> EnrichedCitationsClient: + """ + Create an EnrichedCitationsClient instance for integration tests. + + Args: + config: The configuration instance + + Returns: + EnrichedCitationsClient: A client instance + """ + return EnrichedCitationsClient(config=config) + + +class TestEnrichedCitationsSearch: + """Integration tests for search_citations.""" + + def test_search_by_application_number( + self, enriched_citations_client: EnrichedCitationsClient + ) -> None: + """Test searching citations by application number.""" + response = enriched_citations_client.search_citations( + patent_application_number_q="15739603", + limit=5, + ) + assert isinstance(response, EnrichedCitationResponse) + assert response.num_found > 0 + assert len(response.docs) > 0 + assert response.docs[0].patent_application_number == "15739603" + + def test_search_by_tech_center( + self, enriched_citations_client: EnrichedCitationsClient + ) -> None: + """Test searching citations by technology center.""" + response = enriched_citations_client.search_citations( + tech_center_q="2800", + limit=3, + ) + assert isinstance(response, EnrichedCitationResponse) + assert response.num_found > 0 + for doc in response.docs: + assert doc.tech_center == "2800" + + +class TestEnrichedCitationsGetFields: + """Integration tests for get_fields.""" + + def test_get_fields( + self, enriched_citations_client: EnrichedCitationsClient + ) -> None: + """Test retrieving API field metadata.""" + response = enriched_citations_client.get_fields() + assert isinstance(response, EnrichedCitationFieldsResponse) + assert response.api_status == "PUBLISHED" + assert response.field_count == 22 + assert len(response.fields) == 22 + assert "patentApplicationNumber" in response.fields + assert "citedDocumentIdentifier" in response.fields diff --git a/tests/models/test_enriched_citation_models.py b/tests/models/test_enriched_citation_models.py new file mode 100644 index 0000000..a4d9ccb --- /dev/null +++ b/tests/models/test_enriched_citation_models.py @@ -0,0 +1,420 @@ +"""Tests for the enriched_citations models. + +This module contains comprehensive tests for all classes in pyUSPTO.models.enriched_citations. +""" + +from datetime import datetime +from typing import Any + +import pytest + +from pyUSPTO.models.enriched_citations import ( + CitationCategoryCode, + EnrichedCitation, + EnrichedCitationFieldsResponse, + EnrichedCitationResponse, +) + + +@pytest.fixture +def sample_enriched_citation_dict() -> dict[str, Any]: + """Provide a sample enriched citation dictionary matching the API response.""" + return { + "relatedClaimNumberText": "1,7", + "officeActionDate": "2019-10-21T00:00:00", + "createUserIdentifier": "ETL_SYS", + "applicantCitedExaminerReferenceIndicator": False, + "kindCode": "A1", + "nplIndicator": False, + "workGroupNumber": "2830", + "patentApplicationNumber": "15739603", + "officeActionCategory": "CTNF", + "inventorNameText": "Supriya; Amrit", + "groupArtUnitNumber": "2837", + "qualitySummaryText": "AOK", + "createDateTime": "2026-03-02T21:36:52", + "techCenter": "2800", + "citedDocumentIdentifier": "US 20190165601 A1", + "countryCode": "US", + "passageLocationText": [ + "c. 112|figure 3|claim 9|claims 1-23|c. 103|claim 1" + ], + "obsoleteDocumentIdentifier": "K1V5RMZ8RXEAPX0", + "id": "d7e95803517f677b3875dc476a61a817", + "citationCategoryCode": "Y", + "examinerCitedReferenceIndicator": True, + "publicationNumber": "20190165601", + } + + +@pytest.fixture +def sample_enriched_citation_response_dict( + sample_enriched_citation_dict: dict[str, Any], +) -> dict[str, Any]: + """Provide a sample response dictionary with the outer envelope.""" + return { + "response": { + "start": 0, + "numFound": 3, + "docs": [ + sample_enriched_citation_dict, + { + "id": "06cce55e4608f90c57ff7fdf2d6cc031", + "patentApplicationNumber": "15739603", + "citationCategoryCode": "Y", + }, + { + "id": "fa9ab8672e7cdc4f08b3ae43f8dd794b", + "patentApplicationNumber": "15739603", + "citationCategoryCode": "X", + }, + ], + } + } + + +@pytest.fixture +def sample_fields_response_dict() -> dict[str, Any]: + """Provide a sample fields response dictionary.""" + return { + "apiKey": "enriched_cited_reference_metadata", + "apiVersionNumber": "v3", + "apiUrl": "https://api.uspto.gov/api/v1/patent/oa/enriched_cited_reference_metadata/v3/fields", + "apiDocumentationUrl": "https://data.uspto.gov/swagger/index.html?urls.primaryName=USPTO%20Enriched%20Citation%20API%20v3", + "apiStatus": "PUBLISHED", + "fieldCount": 22, + "fields": [ + "officeActionDate", + "relatedClaimNumberText", + "applicantCitedExaminerReferenceIndicator", + "createUserIdentifier", + "kindCode", + "nplIndicator", + "workGroupNumber", + "officeActionCategory", + "patentApplicationNumber", + "inventorNameText", + "groupArtUnitNumber", + "qualitySummaryText", + "createDateTime", + "techCenter", + "citedDocumentIdentifier", + "countryCode", + "passageLocationText", + "obsoleteDocumentIdentifier", + "citationCategoryCode", + "id", + "examinerCitedReferenceIndicator", + "publicationNumber", + ], + "lastDataUpdatedDate": "2024-07-11 11:33:41.0", + } + + +class TestCitationCategoryCode: + """Tests for CitationCategoryCode enum.""" + + def test_standard_values(self) -> None: + """Test that standard citation category codes are valid.""" + assert CitationCategoryCode("X") == CitationCategoryCode.X + assert CitationCategoryCode("Y") == CitationCategoryCode.Y + assert CitationCategoryCode("A") == CitationCategoryCode.A + assert CitationCategoryCode("E") == CitationCategoryCode.E + assert CitationCategoryCode("L") == CitationCategoryCode.L + assert CitationCategoryCode("O") == CitationCategoryCode.O + assert CitationCategoryCode("T") == CitationCategoryCode.T + assert CitationCategoryCode("P") == CitationCategoryCode.P + assert CitationCategoryCode("D") == CitationCategoryCode.D + + def test_ampersand(self) -> None: + """Test ampersand citation category code.""" + assert CitationCategoryCode("&") == CitationCategoryCode.AMPERSAND + assert CitationCategoryCode.AMPERSAND.value == "&" + + def test_case_insensitive(self) -> None: + """Test case-insensitive lookup via _missing_.""" + assert CitationCategoryCode("x") == CitationCategoryCode.X + assert CitationCategoryCode("y") == CitationCategoryCode.Y + assert CitationCategoryCode("a") == CitationCategoryCode.A + + def test_invalid_value_raises(self) -> None: + """Test that invalid values raise ValueError.""" + with pytest.raises(ValueError): + CitationCategoryCode("Z") + with pytest.raises(ValueError): + CitationCategoryCode("invalid") + + +class TestEnrichedCitationFromDict: + """Tests for EnrichedCitation.from_dict method.""" + + def test_from_dict_complete( + self, sample_enriched_citation_dict: dict[str, Any] + ) -> None: + """Test from_dict with complete data.""" + citation = EnrichedCitation.from_dict(sample_enriched_citation_dict) + + # Check string fields + assert citation.id == "d7e95803517f677b3875dc476a61a817" + assert citation.patent_application_number == "15739603" + assert citation.cited_document_identifier == "US 20190165601 A1" + assert citation.publication_number == "20190165601" + assert citation.kind_code == "A1" + assert citation.country_code == "US" + assert citation.inventor_name_text == "Supriya; Amrit" + assert citation.office_action_category == "CTNF" + assert citation.citation_category_code == "Y" + assert citation.related_claim_number_text == "1,7" + assert citation.work_group_number == "2830" + assert citation.group_art_unit_number == "2837" + assert citation.tech_center == "2800" + assert citation.quality_summary_text == "AOK" + assert citation.obsolete_document_identifier == "K1V5RMZ8RXEAPX0" + assert citation.create_user_identifier == "ETL_SYS" + + # Check boolean fields + assert citation.examiner_cited_reference_indicator is True + assert citation.applicant_cited_examiner_reference_indicator is False + assert citation.npl_indicator is False + + # Check datetime fields + assert citation.office_action_date is not None + assert isinstance(citation.office_action_date, datetime) + assert citation.create_date_time is not None + assert isinstance(citation.create_date_time, datetime) + + # Check list fields + assert len(citation.passage_location_text) == 1 + assert "c. 112|figure 3|claim 9" in citation.passage_location_text[0] + + def test_from_dict_minimal(self) -> None: + """Test from_dict with minimal data (only id).""" + data = {"id": "test-id-123"} + citation = EnrichedCitation.from_dict(data) + assert citation.id == "test-id-123" + assert citation.patent_application_number is None + assert citation.cited_document_identifier is None + assert citation.office_action_date is None + assert citation.examiner_cited_reference_indicator is None + assert len(citation.passage_location_text) == 0 + + def test_from_dict_empty(self) -> None: + """Test from_dict with empty dictionary.""" + citation = EnrichedCitation.from_dict({}) + assert citation.id == "" + assert citation.patent_application_number is None + assert len(citation.passage_location_text) == 0 + + def test_from_dict_passage_location_not_list(self) -> None: + """Test from_dict when passageLocationText is not a list (defensive check).""" + data = { + "id": "test-id", + "passageLocationText": "Not a list", + } + citation = EnrichedCitation.from_dict(data) + assert len(citation.passage_location_text) == 0 + + def test_from_dict_passage_location_none(self) -> None: + """Test from_dict when passageLocationText is None.""" + data = { + "id": "test-id", + "passageLocationText": None, + } + citation = EnrichedCitation.from_dict(data) + assert len(citation.passage_location_text) == 0 + + def test_from_dict_empty_strings(self) -> None: + """Test from_dict with empty string values.""" + data = { + "id": "test-id", + "kindCode": "", + "countryCode": "", + "publicationNumber": "", + } + citation = EnrichedCitation.from_dict(data) + assert citation.kind_code == "" + assert citation.country_code == "" + assert citation.publication_number == "" + + +class TestEnrichedCitationToDict: + """Tests for EnrichedCitation.to_dict method.""" + + def test_to_dict_complete( + self, sample_enriched_citation_dict: dict[str, Any] + ) -> None: + """Test to_dict with complete data.""" + citation = EnrichedCitation.from_dict(sample_enriched_citation_dict) + result = citation.to_dict() + + assert result["id"] == "d7e95803517f677b3875dc476a61a817" + assert result["patentApplicationNumber"] == "15739603" + assert result["citedDocumentIdentifier"] == "US 20190165601 A1" + assert result["citationCategoryCode"] == "Y" + assert result["examinerCitedReferenceIndicator"] is True + assert result["nplIndicator"] is False + assert "passageLocationText" in result + assert len(result["passageLocationText"]) == 1 + + def test_to_dict_filters_none_and_empty_lists(self) -> None: + """Test to_dict filters out None values and empty lists.""" + citation = EnrichedCitation( + id="test-id", + patent_application_number="15739603", + cited_document_identifier=None, + passage_location_text=[], + ) + result = citation.to_dict() + assert "id" in result + assert "patentApplicationNumber" in result + assert "citedDocumentIdentifier" not in result + assert "passageLocationText" not in result + + +class TestEnrichedCitationResponseFromDict: + """Tests for EnrichedCitationResponse.from_dict method.""" + + def test_from_dict_with_envelope( + self, sample_enriched_citation_response_dict: dict[str, Any] + ) -> None: + """Test from_dict unwraps the outer 'response' envelope.""" + response = EnrichedCitationResponse.from_dict( + sample_enriched_citation_response_dict + ) + assert response.num_found == 3 + assert response.start == 0 + assert len(response.docs) == 3 + assert response.docs[0].id == "d7e95803517f677b3875dc476a61a817" + assert response.docs[1].id == "06cce55e4608f90c57ff7fdf2d6cc031" + assert response.docs[2].id == "fa9ab8672e7cdc4f08b3ae43f8dd794b" + + def test_from_dict_without_envelope(self) -> None: + """Test from_dict works when already unwrapped.""" + data = { + "start": 10, + "numFound": 50, + "docs": [ + {"id": "abc123", "patentApplicationNumber": "12345678"}, + ], + } + response = EnrichedCitationResponse.from_dict(data) + assert response.num_found == 50 + assert response.start == 10 + assert len(response.docs) == 1 + + def test_count_property_returns_num_found(self) -> None: + """Test that count property returns num_found for pagination compatibility.""" + response = EnrichedCitationResponse(num_found=42, start=0) + assert response.count == 42 + + def test_from_dict_empty(self) -> None: + """Test from_dict with empty data.""" + response = EnrichedCitationResponse.from_dict({}) + assert response.num_found == 0 + assert response.start == 0 + assert len(response.docs) == 0 + + def test_from_dict_empty_docs(self) -> None: + """Test from_dict with empty docs list.""" + data = {"response": {"start": 0, "numFound": 0, "docs": []}} + response = EnrichedCitationResponse.from_dict(data) + assert response.num_found == 0 + assert len(response.docs) == 0 + + def test_from_dict_include_raw_data( + self, sample_enriched_citation_response_dict: dict[str, Any] + ) -> None: + """Test from_dict with include_raw_data=True stores raw JSON.""" + response = EnrichedCitationResponse.from_dict( + sample_enriched_citation_response_dict, include_raw_data=True + ) + assert response.raw_data is not None + assert "d7e95803517f677b3875dc476a61a817" in response.raw_data + + def test_from_dict_include_raw_data_false(self) -> None: + """Test from_dict with include_raw_data=False (default) has no raw data.""" + data = {"response": {"start": 0, "numFound": 0, "docs": []}} + response = EnrichedCitationResponse.from_dict(data) + assert response.raw_data is None + + +class TestEnrichedCitationResponseToDict: + """Tests for EnrichedCitationResponse.to_dict method.""" + + def test_to_dict_wraps_in_envelope( + self, sample_enriched_citation_response_dict: dict[str, Any] + ) -> None: + """Test to_dict wraps output in 'response' envelope.""" + response = EnrichedCitationResponse.from_dict( + sample_enriched_citation_response_dict + ) + result = response.to_dict() + assert "response" in result + assert result["response"]["numFound"] == 3 + assert result["response"]["start"] == 0 + assert len(result["response"]["docs"]) == 3 + + +class TestEnrichedCitationFieldsResponseFromDict: + """Tests for EnrichedCitationFieldsResponse.from_dict method.""" + + def test_from_dict_complete( + self, sample_fields_response_dict: dict[str, Any] + ) -> None: + """Test from_dict with complete data.""" + response = EnrichedCitationFieldsResponse.from_dict( + sample_fields_response_dict + ) + assert response.api_key == "enriched_cited_reference_metadata" + assert response.api_version_number == "v3" + assert response.api_status == "PUBLISHED" + assert response.field_count == 22 + assert len(response.fields) == 22 + assert "officeActionDate" in response.fields + assert "citedDocumentIdentifier" in response.fields + assert response.last_data_updated_date == "2024-07-11 11:33:41.0" + assert response.api_url is not None + assert response.api_documentation_url is not None + + def test_from_dict_empty(self) -> None: + """Test from_dict with empty data.""" + response = EnrichedCitationFieldsResponse.from_dict({}) + assert response.api_key is None + assert response.api_version_number is None + assert response.field_count == 0 + assert len(response.fields) == 0 + + def test_from_dict_fields_not_list(self) -> None: + """Test from_dict when fields is not a list (defensive check).""" + data = {"fields": "not a list", "fieldCount": 1} + response = EnrichedCitationFieldsResponse.from_dict(data) + assert len(response.fields) == 0 + + +class TestEnrichedCitationFieldsResponseToDict: + """Tests for EnrichedCitationFieldsResponse.to_dict method.""" + + def test_to_dict_complete( + self, sample_fields_response_dict: dict[str, Any] + ) -> None: + """Test to_dict with complete data.""" + response = EnrichedCitationFieldsResponse.from_dict( + sample_fields_response_dict + ) + result = response.to_dict() + assert result["apiKey"] == "enriched_cited_reference_metadata" + assert result["apiVersionNumber"] == "v3" + assert result["fieldCount"] == 22 + assert len(result["fields"]) == 22 + + def test_to_dict_filters_none_and_empty_lists(self) -> None: + """Test to_dict filters out None values and empty lists.""" + response = EnrichedCitationFieldsResponse( + api_key="test", + api_version_number=None, + fields=[], + ) + result = response.to_dict() + assert "apiKey" in result + assert "apiVersionNumber" not in result + assert "fields" not in result diff --git a/tests/test_config.py b/tests/test_config.py index 2b9a714..33925a5 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -16,6 +16,7 @@ def test_default_values(self): assert config.bulk_data_base_url == "https://api.uspto.gov" assert config.patent_data_base_url == "https://api.uspto.gov" assert config.petition_decisions_base_url == "https://api.uspto.gov" + assert config.enriched_citations_base_url == "https://api.uspto.gov" assert config.http_config is not None assert isinstance(config.http_config, HTTPConfig) @@ -64,10 +65,12 @@ def test_config_custom_base_urls(self): bulk_data_base_url="https://bulk.example.com", patent_data_base_url="https://patent.example.com", petition_decisions_base_url="https://petition.example.com", + enriched_citations_base_url="https://citations.example.com", ) assert config.bulk_data_base_url == "https://bulk.example.com" assert config.patent_data_base_url == "https://patent.example.com" assert config.petition_decisions_base_url == "https://petition.example.com" + assert config.enriched_citations_base_url == "https://citations.example.com" def test_config_from_env_custom_urls(self, monkeypatch): """Test USPTOConfig.from_env() reads custom URLs""" @@ -77,11 +80,15 @@ def test_config_from_env_custom_urls(self, monkeypatch): monkeypatch.setenv( "USPTO_PETITION_DECISIONS_BASE_URL", "https://petition.example.com" ) + monkeypatch.setenv( + "USPTO_ENRICHED_CITATIONS_BASE_URL", "https://citations.example.com" + ) config = USPTOConfig.from_env() assert config.bulk_data_base_url == "https://bulk.example.com" assert config.patent_data_base_url == "https://patent.example.com" assert config.petition_decisions_base_url == "https://petition.example.com" + assert config.enriched_citations_base_url == "https://citations.example.com" def test_http_config_sharing(self): """Test HTTPConfig can be shared across multiple USPTOConfig instances""" From 59fcdbfe9e81c92db2c1d2965a841d780268dc7d Mon Sep 17 00:00:00 2001 From: Andrew <3300522+dpieski@users.noreply.github.com> Date: Fri, 27 Mar 2026 13:19:12 -0500 Subject: [PATCH 2/5] fix: improve tests, add form_urlencoded to requests for citations --- src/pyUSPTO/clients/base.py | 44 +++++-- src/pyUSPTO/clients/enriched_citations.py | 20 ++-- src/pyUSPTO/config.py | 1 + src/pyUSPTO/models/enriched_citations.py | 4 +- .../clients/test_enriched_citation_clients.py | 110 ++++++++++++++---- 5 files changed, 132 insertions(+), 47 deletions(-) diff --git a/src/pyUSPTO/clients/base.py b/src/pyUSPTO/clients/base.py index cf900e6..64045ac 100644 --- a/src/pyUSPTO/clients/base.py +++ b/src/pyUSPTO/clients/base.py @@ -14,6 +14,8 @@ runtime_checkable, ) +from pyUSPTO.models.enriched_citations import EnrichedCitationResponse + try: from typing import Self except ImportError: @@ -201,6 +203,7 @@ def _execute_request( params: dict[str, Any] | None = None, json_data: dict[str, Any] | None = None, stream: bool = False, + form_urlencoded: bool = False, ) -> requests.Response: """Execute an HTTP request and return the raw Response. @@ -213,6 +216,7 @@ def _execute_request( params: Optional query parameters json_data: Optional JSON body for POST requests stream: Whether to stream the response + form_urlencoded: Whether to send POST body as application/x-www-form-urlencoded instead of JSON Returns: The raw requests.Response after raise_for_status(). @@ -231,13 +235,22 @@ def _execute_request( url=url, params=params, stream=stream, timeout=timeout ) elif method.upper() == "POST": - response = self.session.post( - url=url, - params=params, - json=json_data, - stream=stream, - timeout=timeout, - ) + if form_urlencoded: + response = self.session.post( + url=url, + params=params, + data=json_data, # Send as form data + stream=stream, + timeout=timeout, + ) + else: + response = self.session.post( + url=url, + params=params, + json=json_data, + stream=stream, + timeout=timeout, + ) else: raise ValueError(f"Unsupported HTTP method: {method}") @@ -343,9 +356,20 @@ def _get_model( url = self._build_url( endpoint, custom_url=custom_url, custom_base_url=custom_base_url ) - response = self._execute_request( - method=method, url=url, params=params, json_data=json_data - ) + + if response_class == EnrichedCitationResponse: + # Handling for EnrichedCitationResponse to support form-urlencoded POST requests + response = self._execute_request( + method=method, + url=url, + params=params, + json_data=json_data, + form_urlencoded=True, + ) + else: + response = self._execute_request( + method=method, url=url, params=params, json_data=json_data + ) data = self._parse_json_response(response, url) ret = response_class.from_dict( diff --git a/src/pyUSPTO/clients/enriched_citations.py b/src/pyUSPTO/clients/enriched_citations.py index be4d3db..69cb9c7 100644 --- a/src/pyUSPTO/clients/enriched_citations.py +++ b/src/pyUSPTO/clients/enriched_citations.py @@ -63,8 +63,8 @@ def search_citations( self, query: str | None = None, sort: str | None = None, - offset: int | None = 0, - limit: int | None = 25, + start: int | None = 0, + rows: int | None = 25, post_body: dict[str, Any] | None = None, # Convenience query parameters patent_application_number_q: str | None = None, @@ -87,8 +87,8 @@ def search_citations( Args: query: Direct query string in USPTO search syntax. sort: Sort order for results. - offset: Number of records to skip (pagination). - limit: Maximum number of records to return. + start: Starting index for pagination. + rows: Maximum number of records to return. post_body: Optional POST body for complex queries. When provided, all other parameters are ignored. patent_application_number_q: Filter by patent application number. @@ -115,7 +115,7 @@ def search_citations( >>> response = client.search_citations( ... tech_center_q="2800", ... citation_category_code_q="X", - ... limit=50, + ... rows=50, ... ) # Search with POST body @@ -178,13 +178,13 @@ def search_citations( final_q = " AND ".join(q_parts) if final_q is not None: - body["q"] = final_q + body["criteria"] = final_q if sort is not None: body["sort"] = sort - if offset is not None: - body["offset"] = offset - if limit is not None: - body["limit"] = limit + if start is not None: + body["start"] = start + if rows is not None: + body["rows"] = rows if additional_query_params: body.update(additional_query_params) diff --git a/src/pyUSPTO/config.py b/src/pyUSPTO/config.py index 6972d91..3ec4040 100644 --- a/src/pyUSPTO/config.py +++ b/src/pyUSPTO/config.py @@ -120,6 +120,7 @@ def _create_session(self) -> "requests.Session": # Set API key header if self.api_key: session.headers["X-API-KEY"] = self.api_key + session.headers["Accept"] = "application/json" # Apply custom headers from HTTP config if self.http_config.custom_headers: diff --git a/src/pyUSPTO/models/enriched_citations.py b/src/pyUSPTO/models/enriched_citations.py index ccd72fc..18dab65 100644 --- a/src/pyUSPTO/models/enriched_citations.py +++ b/src/pyUSPTO/models/enriched_citations.py @@ -53,8 +53,6 @@ def _missing_(cls, value: Any) -> "CitationCategoryCode": for member in cls: if member.value.upper() == val_upper: return member - if value == "&": - return cls.AMPERSAND raise ValueError(f"{value!r} is not a valid {cls.__name__}") @@ -209,7 +207,7 @@ class EnrichedCitationResponse: Attributes: num_found: Total number of matching records. - start: The offset of the first result in this page. + start: The start index of the first result in this page. docs: List of enriched citation records in this page. raw_data: Optional raw JSON data from the API response (for debugging). """ diff --git a/tests/clients/test_enriched_citation_clients.py b/tests/clients/test_enriched_citation_clients.py index f5d2ae6..d16195a 100644 --- a/tests/clients/test_enriched_citation_clients.py +++ b/tests/clients/test_enriched_citation_clients.py @@ -74,12 +74,24 @@ def client_with_mocked_request( enriched_client: EnrichedCitationsClient, ) -> Iterator[tuple[EnrichedCitationsClient, MagicMock]]: """Provides a client with mocked _get_model method.""" - with patch.object( - enriched_client, "_get_model", autospec=True - ) as mock_get_model: + with patch.object(enriched_client, "_get_model", autospec=True) as mock_get_model: yield enriched_client, mock_get_model +@pytest.fixture +def client_with_mocked_session( + enriched_client: EnrichedCitationsClient, +) -> Iterator[tuple[EnrichedCitationsClient, MagicMock]]: + """Provides a client with mocked session.post method.""" + # We patch the session's post method specifically + with patch.object(enriched_client.session, "post", autospec=True) as mock_post: + # We must return a mock response to prevent EnrichedCitationResponse from crashing + mock_response = MagicMock() + mock_response.status_code = 200 + mock_post.return_value = mock_response + yield enriched_client, mock_post + + # --- Test Classes --- @@ -115,6 +127,7 @@ def test_init_without_config(self, monkeypatch: Any) -> None: monkeypatch.setenv("USPTO_API_KEY", "env_key") client = EnrichedCitationsClient() assert client.config.api_key == "env_key" + assert client.base_url == "https://api.uspto.gov" class TestEnrichedCitationsClientSearch: @@ -130,13 +143,16 @@ def test_search_with_post_body( mock_get_model.return_value = mock_enriched_response_with_data post_body = {"q": "techCenter:2800", "rows": 100} - result = client.search_citations(post_body=post_body) + additional_q_params = {"debug": "true"} + result = client.search_citations( + post_body=post_body, additional_query_params=additional_q_params + ) mock_get_model.assert_called_once_with( method="POST", endpoint="api/v1/patent/oa/enriched_cited_reference_metadata/v3/records", json_data=post_body, - params=None, + params=additional_q_params, response_class=EnrichedCitationResponse, ) assert result is mock_enriched_response_with_data @@ -152,11 +168,27 @@ def test_search_with_query( result = client.search_citations(query="patentApplicationNumber:15739603") + assert result == mock_enriched_response_with_data + call_args = mock_get_model.call_args json_data = call_args[1]["json_data"] - assert json_data["q"] == "patentApplicationNumber:15739603" - assert json_data["offset"] == 0 - assert json_data["limit"] == 25 + assert json_data["criteria"] == "patentApplicationNumber:15739603" + assert json_data["start"] == 0 + assert json_data["rows"] == 25 + + def test_search_with_q( + self, client_with_mocked_session: tuple[EnrichedCitationsClient, MagicMock] + ) -> None: + """Test search with direct query string and verify POST payload.""" + client, mock_post = client_with_mocked_session + + client.search_citations(query="patentApplicationNumber:15739603") + + sent_payload = mock_post.call_args.kwargs["data"] + + assert sent_payload["criteria"] == "patentApplicationNumber:15739603" + assert sent_payload["start"] == 0 + assert sent_payload["rows"] == 25 def test_search_with_application_number( self, @@ -171,7 +203,7 @@ def test_search_with_application_number( call_args = mock_get_model.call_args json_data = call_args[1]["json_data"] - assert "patentApplicationNumber:15739603" in json_data["q"] + assert "patentApplicationNumber:15739603" in json_data["criteria"] def test_search_with_cited_document_identifier( self, @@ -186,7 +218,7 @@ def test_search_with_cited_document_identifier( call_args = mock_get_model.call_args json_data = call_args[1]["json_data"] - assert 'citedDocumentIdentifier:"US 20190165601 A1"' in json_data["q"] + assert 'citedDocumentIdentifier:"US 20190165601 A1"' in json_data["criteria"] def test_search_with_multiple_params( self, @@ -201,17 +233,20 @@ def test_search_with_multiple_params( tech_center_q="2800", citation_category_code_q="X", examiner_cited_q=True, - limit=50, + office_action_category_q="CTNF", + rows=50, ) call_args = mock_get_model.call_args json_data = call_args[1]["json_data"] - query = json_data["q"] + query = json_data["criteria"] assert "techCenter:2800" in query assert "citationCategoryCode:X" in query + assert "officeActionCategory:CTNF" in query assert "examinerCitedReferenceIndicator:true" in query assert " AND " in query - assert json_data["limit"] == 50 + assert query.count(" AND ") == 3 + assert json_data["rows"] == 50 def test_search_with_date_range( self, @@ -229,7 +264,7 @@ def test_search_with_date_range( call_args = mock_get_model.call_args json_data = call_args[1]["json_data"] - assert "officeActionDate:[2019-01-01 TO 2019-12-31]" in json_data["q"] + assert "officeActionDate:[2019-01-01 TO 2019-12-31]" in json_data["criteria"] def test_search_with_date_from_only( self, @@ -244,7 +279,7 @@ def test_search_with_date_from_only( call_args = mock_get_model.call_args json_data = call_args[1]["json_data"] - assert "officeActionDate:>=2019-01-01" in json_data["q"] + assert "officeActionDate:>=2019-01-01" in json_data["criteria"] def test_search_with_date_to_only( self, @@ -259,14 +294,41 @@ def test_search_with_date_to_only( call_args = mock_get_model.call_args json_data = call_args[1]["json_data"] - assert "officeActionDate:<=2019-12-31" in json_data["q"] + assert "officeActionDate:<=2019-12-31" in json_data["criteria"] - def test_search_default_offset_limit( + def test_search_with_additional_q_params( self, client_with_mocked_request: tuple[EnrichedCitationsClient, MagicMock], mock_enriched_response_with_data: EnrichedCitationResponse, ) -> None: - """Test search applies default offset and limit.""" + """Test search with additional_query_params.""" + client, mock_get_model = client_with_mocked_request + + mock_get_model.return_value = mock_enriched_response_with_data + + custom_q = 'citedDocumentIdentifier:"US 20190165601 A1"' + client.search_citations( + additional_query_params={ + "criteria": custom_q, + "custom_field": "custom_value", + } + ) + + _, kwargs = mock_get_model.call_args + json_data = kwargs["json_data"] + + assert json_data["criteria"] == custom_q + assert json_data["custom_field"] == "custom_value" + # Verify default pagination is still preserved unless overridden + assert json_data["rows"] == 25 + assert json_data["start"] == 0 + + def test_search_default_start_rows( + self, + client_with_mocked_request: tuple[EnrichedCitationsClient, MagicMock], + mock_enriched_response_with_data: EnrichedCitationResponse, + ) -> None: + """Test search applies default start and rows.""" client, mock_get_model = client_with_mocked_request mock_get_model.return_value = mock_enriched_response_with_data @@ -274,8 +336,8 @@ def test_search_default_offset_limit( call_args = mock_get_model.call_args json_data = call_args[1]["json_data"] - assert json_data["offset"] == 0 - assert json_data["limit"] == 25 + assert json_data["start"] == 0 + assert json_data["rows"] == 25 def test_search_with_sort( self, @@ -305,7 +367,7 @@ def test_search_with_group_art_unit( call_args = mock_get_model.call_args json_data = call_args[1]["json_data"] - assert "groupArtUnitNumber:2837" in json_data["q"] + assert "groupArtUnitNumber:2837" in json_data["criteria"] class TestEnrichedCitationsClientGetFields: @@ -350,7 +412,7 @@ def test_paginate_calls_paginate_results( mock_paginate.return_value = iter([]) result = enriched_client.paginate_citations(tech_center_q="2800") - + assert list(result) == [] mock_paginate.assert_called_once_with( method_name="search_citations", response_container_attr="docs", @@ -367,10 +429,10 @@ def test_paginate_with_post_body( enriched_client, "paginate_results", autospec=True ) as mock_paginate: mock_paginate.return_value = iter([]) - post_body = {"q": "techCenter:2800", "limit": 50} + post_body = {"q": "techCenter:2800", "rows": 50} result = enriched_client.paginate_citations(post_body=post_body) - + assert list(result) == [] mock_paginate.assert_called_once_with( method_name="search_citations", response_container_attr="docs", From 92de5687f65af7410d8d96172261e7c4b905cde9 Mon Sep 17 00:00:00 2001 From: Andrew <3300522+dpieski@users.noreply.github.com> Date: Fri, 27 Mar 2026 13:42:16 -0500 Subject: [PATCH 3/5] fix: paginate results for solr endpoints --- src/pyUSPTO/clients/base.py | 173 ++++++++--- src/pyUSPTO/clients/enriched_citations.py | 6 +- tests/clients/test_base.py | 271 ++++++++++++++++++ .../clients/test_enriched_citation_clients.py | 12 +- 4 files changed, 410 insertions(+), 52 deletions(-) diff --git a/src/pyUSPTO/clients/base.py b/src/pyUSPTO/clients/base.py index 64045ac..654f34b 100644 --- a/src/pyUSPTO/clients/base.py +++ b/src/pyUSPTO/clients/base.py @@ -408,76 +408,64 @@ def _get_json( ) return self._parse_json_response(response, url) - def paginate_results( + def _paginate_core( self, method_name: str, response_container_attr: str, post_body: dict[str, Any] | None = None, + *, + offset_key: str, + limit_key: str, + supports_nested_pagination: bool = True, **kwargs: Any, ) -> Generator[Any, None, None]: - """Paginate through all results of a method, supporting both GET and POST. + """Core pagination loop parameterized on key names. Args: method_name: Name of the method to call response_container_attr: Attribute name of the container in the response - post_body: Optional POST body for POST-based pagination. If provided, - pagination parameters (offset, limit) will be injected into this body. + post_body: Optional POST body for POST-based pagination + offset_key: Key name for the position parameter (e.g. "offset" or "start") + limit_key: Key name for the page-size parameter (e.g. "limit" or "rows") + supports_nested_pagination: Whether to check for a nested + ``post_body["pagination"]`` dict. Set to False for Solr-style APIs. **kwargs: Keyword arguments to pass to the method (for GET pagination) Yields: Items from the response container - - Raises: - ValueError: If offset is provided in kwargs or post_body (offset is managed - automatically by pagination) - - Examples: - # GET pagination - for app in client.paginate_results( - "search_applications", - "patent_file_wrapper_data_bag", - query="test" - ): - print(app) - - # POST pagination with custom limit - for app in client.paginate_results( - "search_applications", - "patent_file_wrapper_data_bag", - post_body={"q": "test", "limit": 50} - ): - print(app) """ # Determine if POST body uses nested pagination structure uses_nested_pagination = False - if post_body is not None: + if supports_nested_pagination and post_body is not None: uses_nested_pagination = "pagination" in post_body and isinstance( post_body["pagination"], dict ) - # Validate that offset is not provided by the user + # Validate that the position key is not provided by the user if post_body is not None: if uses_nested_pagination: # Check nested pagination object - if "offset" in post_body["pagination"]: + if offset_key in post_body["pagination"]: raise ValueError( - "Cannot specify 'offset' in post_body['pagination']. " - "Pagination manages offset automatically." + f"Cannot specify '{offset_key}' in post_body['pagination']. " + f"Pagination manages {offset_key} automatically." ) - limit = post_body["pagination"].get("limit", 25) + limit = post_body["pagination"].get(limit_key, 25) else: # Check top-level - if "offset" in post_body: + if offset_key in post_body: raise ValueError( - "Cannot specify 'offset' in post_body. Pagination manages offset automatically." + f"Cannot specify '{offset_key}' in post_body. " + f"Pagination manages {offset_key} automatically." ) - limit = post_body.get("limit", 25) + limit = post_body.get(limit_key, 25) else: - if "offset" in kwargs: + if offset_key in kwargs: raise ValueError( - "Cannot specify 'offset' in kwargs. Pagination manages offset automatically." + f"Cannot specify '{offset_key}' in kwargs. " + f"Pagination manages {offset_key} automatically." ) - limit = kwargs.get("limit", 25) + limit = kwargs.get(limit_key, 25) offset = 0 @@ -490,19 +478,19 @@ def paginate_results( if uses_nested_pagination: # Update nested pagination object current_body["pagination"] = current_body["pagination"].copy() - current_body["pagination"]["offset"] = offset - current_body["pagination"]["limit"] = limit + current_body["pagination"][offset_key] = offset + current_body["pagination"][limit_key] = limit else: # Update top-level pagination params - current_body["offset"] = offset - current_body["limit"] = limit + current_body[offset_key] = offset + current_body[limit_key] = limit method = getattr(self, method_name) response = method(post_body=current_body, **kwargs) else: # GET request: update kwargs with pagination params - kwargs["offset"] = offset - kwargs["limit"] = limit + kwargs[offset_key] = offset + kwargs[limit_key] = limit method = getattr(self, method_name) response = method(**kwargs) @@ -543,6 +531,105 @@ def paginate_results( offset += limit + def paginate_results( + self, + method_name: str, + response_container_attr: str, + post_body: dict[str, Any] | None = None, + **kwargs: Any, + ) -> Generator[Any, None, None]: + """Paginate through all results using offset/limit style. + + For APIs that use ``start``/``rows`` pagination (e.g. Solr-style), + use :meth:`paginate_solr_results` instead. + + Args: + method_name: Name of the method to call + response_container_attr: Attribute name of the container in the response + post_body: Optional POST body for POST-based pagination. If provided, + pagination parameters (offset, limit) will be injected into this body. + **kwargs: Keyword arguments to pass to the method (for GET pagination) + + Yields: + Items from the response container + + Raises: + ValueError: If offset is provided in kwargs or post_body (offset is managed + automatically by pagination) + + Examples: + # GET pagination + for app in client.paginate_results( + "search_applications", + "patent_file_wrapper_data_bag", + query="test" + ): + print(app) + + # POST pagination with custom limit + for app in client.paginate_results( + "search_applications", + "patent_file_wrapper_data_bag", + post_body={"q": "test", "limit": 50} + ): + print(app) + """ + return self._paginate_core( + method_name=method_name, + response_container_attr=response_container_attr, + post_body=post_body, + offset_key="offset", + limit_key="limit", + supports_nested_pagination=True, + **kwargs, + ) + + def paginate_solr_results( + self, + method_name: str, + response_container_attr: str, + post_body: dict[str, Any] | None = None, + **kwargs: Any, + ) -> Generator[Any, None, None]: + """Paginate through all results using Solr start/rows style. + + For APIs that use ``offset``/``limit`` pagination (most USPTO APIs), + use :meth:`paginate_results` instead. + + Args: + method_name: Name of the method to call + response_container_attr: Attribute name of the container in the response + post_body: Optional POST body for POST-based pagination. If provided, + pagination parameters (start, rows) will be injected at the top level. + Nested pagination is not supported for Solr-style APIs. + **kwargs: Keyword arguments to pass to the method (for GET pagination) + + Yields: + Items from the response container + + Raises: + ValueError: If start is provided in kwargs or post_body (start is managed + automatically by pagination) + + Examples: + # POST pagination with custom rows + for citation in client.paginate_solr_results( + "search_citations", + "docs", + post_body={"criteria": "techCenter:2800", "rows": 50} + ): + print(citation) + """ + return self._paginate_core( + method_name=method_name, + response_container_attr=response_container_attr, + post_body=post_body, + offset_key="start", + limit_key="rows", + supports_nested_pagination=False, + **kwargs, + ) + @staticmethod def _extract_filename_from_content_disposition( content_disposition: str | None, diff --git a/src/pyUSPTO/clients/enriched_citations.py b/src/pyUSPTO/clients/enriched_citations.py index 69cb9c7..fce7ec7 100644 --- a/src/pyUSPTO/clients/enriched_citations.py +++ b/src/pyUSPTO/clients/enriched_citations.py @@ -225,7 +225,7 @@ def paginate_citations( This method simplifies fetching all enriched citations matching a search query by automatically handling pagination. - The offset parameter is managed by the pagination logic; setting it directly + The start parameter is managed by the pagination logic; setting it directly in kwargs or post_body will raise a ValueError. Args: @@ -242,11 +242,11 @@ def paginate_citations( # Paginate with POST body >>> for citation in client.paginate_citations( - ... post_body={"q": "techCenter:2800", "limit": 50} + ... post_body={"q": "techCenter:2800", "rows": 50} ... ): ... process_citation(citation) """ - return self.paginate_results( + return self.paginate_solr_results( method_name="search_citations", response_container_attr="docs", post_body=post_body, diff --git a/tests/clients/test_base.py b/tests/clients/test_base.py index aec1f52..62154a4 100644 --- a/tests/clients/test_base.py +++ b/tests/clients/test_base.py @@ -1115,6 +1115,277 @@ def test_paginate_results_rejects_offset_in_flat_post_body( ) +class TestPaginateSolrResults: + """Tests for paginate_solr_results method (start/rows style).""" + + def test_paginate_solr_results(self, mock_session: MagicMock) -> None: + """Test multi-page Solr pagination with start/rows keys.""" + client: BaseUSPTOClient[Any] = BaseUSPTOClient(base_url="https://api.test.com") + client.config._session = mock_session + + first_response = MagicMock() + first_response.count = 3 + first_response.docs = ["doc1", "doc2"] + + second_response = MagicMock() + second_response.count = 3 + second_response.docs = ["doc3"] + + received_bodies: list[dict[str, Any]] = [] + + def mock_search( + post_body: dict[str, Any] | None = None, **kwargs: Any + ) -> Any: + if post_body: + received_bodies.append(post_body.copy()) + start = post_body["start"] if post_body else 0 + if start == 0: + return first_response + elif start == 2: + return second_response + return MagicMock(count=0, docs=[]) + + client.search = mock_search # type: ignore[attr-defined] + + results = list( + client.paginate_solr_results( + method_name="search", + response_container_attr="docs", + post_body={"criteria": "test", "rows": 2}, + ) + ) + + assert results == ["doc1", "doc2", "doc3"] + assert len(received_bodies) == 2 + + # First request: start=0, rows=2 + assert received_bodies[0]["start"] == 0 + assert received_bodies[0]["rows"] == 2 + assert received_bodies[0]["criteria"] == "test" + # Should NOT have offset/limit keys + assert "offset" not in received_bodies[0] + assert "limit" not in received_bodies[0] + + # Second request: start=2, rows=2 + assert received_bodies[1]["start"] == 2 + assert received_bodies[1]["rows"] == 2 + + def test_paginate_solr_results_get(self, mock_session: MagicMock) -> None: + """Test Solr pagination via GET kwargs.""" + response = MagicMock() + response.count = 1 + response.docs = ["doc1"] + + class TestClient(BaseUSPTOClient[Any]): + def search(self, **kwargs: Any) -> Any: + return response + + test_client = TestClient(base_url="https://api.test.com") + test_client.config._session = mock_session + + with patch.object( + test_client, "search", wraps=test_client.search + ) as spy: + results = list( + test_client.paginate_solr_results( + method_name="search", + response_container_attr="docs", + query="test", + rows=10, + ) + ) + + assert results == ["doc1"] + spy.assert_called_once_with(query="test", start=0, rows=10) + + def test_paginate_solr_results_custom_rows( + self, mock_session: MagicMock + ) -> None: + """Test that rows is extracted from post_body and used as page size.""" + response = MagicMock() + response.count = 1 + response.docs = ["doc1"] + + received_bodies: list[dict[str, Any]] = [] + + def mock_search( + post_body: dict[str, Any] | None = None, **kwargs: Any + ) -> Any: + if post_body: + received_bodies.append(post_body.copy()) + return response + + client: BaseUSPTOClient[Any] = BaseUSPTOClient(base_url="https://api.test.com") + client.config._session = mock_session + client.search = mock_search # type: ignore[attr-defined] + + results = list( + client.paginate_solr_results( + method_name="search", + response_container_attr="docs", + post_body={"criteria": "test", "rows": 100}, + ) + ) + + assert results == ["doc1"] + assert received_bodies[0]["rows"] == 100 + assert received_bodies[0]["start"] == 0 + + def test_paginate_solr_results_default_rows( + self, mock_session: MagicMock + ) -> None: + """Test that rows defaults to 25 when not specified.""" + response = MagicMock() + response.count = 1 + response.docs = ["doc1"] + + received_bodies: list[dict[str, Any]] = [] + + def mock_search( + post_body: dict[str, Any] | None = None, **kwargs: Any + ) -> Any: + if post_body: + received_bodies.append(post_body.copy()) + return response + + client: BaseUSPTOClient[Any] = BaseUSPTOClient(base_url="https://api.test.com") + client.config._session = mock_session + client.search = mock_search # type: ignore[attr-defined] + + list( + client.paginate_solr_results( + method_name="search", + response_container_attr="docs", + post_body={"criteria": "test"}, + ) + ) + + assert received_bodies[0]["rows"] == 25 + + def test_paginate_solr_results_rejects_start_in_post_body( + self, mock_session: MagicMock + ) -> None: + """Test that start is rejected when provided in POST body.""" + client: BaseUSPTOClient[Any] = BaseUSPTOClient(base_url="https://api.test.com") + client.config._session = mock_session + + post_body = {"criteria": "test", "start": 10, "rows": 50} + + with pytest.raises( + ValueError, match="Cannot specify 'start' in post_body" + ): + list( + client.paginate_solr_results( + method_name="search", + response_container_attr="docs", + post_body=post_body, + ) + ) + + def test_paginate_solr_results_rejects_start_in_kwargs( + self, mock_session: MagicMock + ) -> None: + """Test that start is rejected when provided in kwargs.""" + client: BaseUSPTOClient[Any] = BaseUSPTOClient(base_url="https://api.test.com") + client.config._session = mock_session + + with pytest.raises( + ValueError, match="Cannot specify 'start' in kwargs" + ): + list( + client.paginate_solr_results( + method_name="search", + response_container_attr="docs", + start=10, + ) + ) + + def test_paginate_solr_results_stop_count_zero( + self, mock_session: MagicMock + ) -> None: + """Test Solr pagination stops on count=0.""" + response = MagicMock() + response.count = 0 + response.docs = [] + + class TestClient(BaseUSPTOClient[Any]): + def search(self, **kwargs: Any) -> Any: + return response + + test_client = TestClient(base_url="https://api.test.com") + test_client.config._session = mock_session + + results = list( + test_client.paginate_solr_results( + method_name="search", response_container_attr="docs" + ) + ) + assert results == [] + + def test_paginate_solr_results_stop_count_none( + self, mock_session: MagicMock + ) -> None: + """Test Solr pagination stops on count=None.""" + response = MagicMock() + response.count = None + + class TestClient(BaseUSPTOClient[Any]): + def search(self, **kwargs: Any) -> Any: + return response + + test_client = TestClient(base_url="https://api.test.com") + test_client.config._session = mock_session + + results = list( + test_client.paginate_solr_results( + method_name="search", response_container_attr="docs" + ) + ) + assert results == [] + + def test_paginate_solr_results_no_nested_pagination( + self, mock_session: MagicMock + ) -> None: + """Test that nested 'pagination' key is treated as normal data, not special.""" + response = MagicMock() + response.count = 1 + response.docs = ["doc1"] + + received_bodies: list[dict[str, Any]] = [] + + def mock_search( + post_body: dict[str, Any] | None = None, **kwargs: Any + ) -> Any: + if post_body: + received_bodies.append(post_body.copy()) + return response + + client: BaseUSPTOClient[Any] = BaseUSPTOClient(base_url="https://api.test.com") + client.config._session = mock_session + client.search = mock_search # type: ignore[attr-defined] + + # Even though "pagination" dict is present, Solr style should ignore it + # and treat the body as flat + post_body: dict[str, Any] = { + "criteria": "test", + "pagination": {"rows": 10}, + } + + list( + client.paginate_solr_results( + method_name="search", + response_container_attr="docs", + post_body=post_body, + ) + ) + + # start/rows should be at top level, not inside pagination + assert received_bodies[0]["start"] == 0 + assert received_bodies[0]["rows"] == 25 # default, not from pagination dict + # The pagination key should still be there as data + assert received_bodies[0]["pagination"] == {"rows": 10} + + class TestContentDispositionParsing: """Tests for Content-Disposition header parsing.""" diff --git a/tests/clients/test_enriched_citation_clients.py b/tests/clients/test_enriched_citation_clients.py index d16195a..7fc7f2a 100644 --- a/tests/clients/test_enriched_citation_clients.py +++ b/tests/clients/test_enriched_citation_clients.py @@ -401,13 +401,13 @@ def test_get_fields( class TestEnrichedCitationsClientPaginate: """Tests for paginate_citations method.""" - def test_paginate_calls_paginate_results( + def test_paginate_calls_paginate_solr_results( self, enriched_client: EnrichedCitationsClient, ) -> None: - """Test paginate_citations delegates to paginate_results.""" + """Test paginate_citations delegates to paginate_solr_results.""" with patch.object( - enriched_client, "paginate_results", autospec=True + enriched_client, "paginate_solr_results", autospec=True ) as mock_paginate: mock_paginate.return_value = iter([]) @@ -424,9 +424,9 @@ def test_paginate_with_post_body( self, enriched_client: EnrichedCitationsClient, ) -> None: - """Test paginate_citations passes post_body to paginate_results.""" + """Test paginate_citations passes post_body to paginate_solr_results.""" with patch.object( - enriched_client, "paginate_results", autospec=True + enriched_client, "paginate_solr_results", autospec=True ) as mock_paginate: mock_paginate.return_value = iter([]) post_body = {"q": "techCenter:2800", "rows": 50} @@ -446,7 +446,7 @@ def test_paginate_yields_citations( ) -> None: """Test paginate_citations yields EnrichedCitation objects.""" with patch.object( - enriched_client, "paginate_results", autospec=True + enriched_client, "paginate_solr_results", autospec=True ) as mock_paginate: mock_paginate.return_value = iter([mock_enriched_citation]) From c808946d5bdfc71f82b728fdaaf0ae05ff05fe8a Mon Sep 17 00:00:00 2001 From: Andrew <3300522+dpieski@users.noreply.github.com> Date: Fri, 27 Mar 2026 13:52:02 -0500 Subject: [PATCH 4/5] docs: update docs with new endpoint. --- docs/source/api/clients/enriched_citations.rst | 7 +++++++ docs/source/api/clients/index.rst | 1 + docs/source/api/models/enriched_citations.rst | 7 +++++++ docs/source/api/models/index.rst | 1 + tests/integration/test_enriched_citations_integration.py | 5 ++--- 5 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 docs/source/api/clients/enriched_citations.rst create mode 100644 docs/source/api/models/enriched_citations.rst diff --git a/docs/source/api/clients/enriched_citations.rst b/docs/source/api/clients/enriched_citations.rst new file mode 100644 index 0000000..c36b419 --- /dev/null +++ b/docs/source/api/clients/enriched_citations.rst @@ -0,0 +1,7 @@ +Enriched Citations Client +========================= + +.. automodule:: pyUSPTO.clients.enriched_citations + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/clients/index.rst b/docs/source/api/clients/index.rst index e771d8c..a3cbbc5 100644 --- a/docs/source/api/clients/index.rst +++ b/docs/source/api/clients/index.rst @@ -5,6 +5,7 @@ Clients :maxdepth: 2 bulk_data + enriched_citations patent_data petition_decisions ptab_appeals diff --git a/docs/source/api/models/enriched_citations.rst b/docs/source/api/models/enriched_citations.rst new file mode 100644 index 0000000..3a5a668 --- /dev/null +++ b/docs/source/api/models/enriched_citations.rst @@ -0,0 +1,7 @@ +Enriched Citations +================== + +.. automodule:: pyUSPTO.models.enriched_citations + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/api/models/index.rst b/docs/source/api/models/index.rst index a375743..48ec994 100644 --- a/docs/source/api/models/index.rst +++ b/docs/source/api/models/index.rst @@ -6,6 +6,7 @@ Models base bulk_data + enriched_citations patent_data petition_decisions ptab diff --git a/tests/integration/test_enriched_citations_integration.py b/tests/integration/test_enriched_citations_integration.py index 6f5cca6..1180523 100644 --- a/tests/integration/test_enriched_citations_integration.py +++ b/tests/integration/test_enriched_citations_integration.py @@ -13,7 +13,6 @@ from pyUSPTO.clients import EnrichedCitationsClient from pyUSPTO.config import USPTOConfig from pyUSPTO.models.enriched_citations import ( - EnrichedCitation, EnrichedCitationFieldsResponse, EnrichedCitationResponse, ) @@ -48,7 +47,7 @@ def test_search_by_application_number( """Test searching citations by application number.""" response = enriched_citations_client.search_citations( patent_application_number_q="15739603", - limit=5, + rows=10, ) assert isinstance(response, EnrichedCitationResponse) assert response.num_found > 0 @@ -61,7 +60,7 @@ def test_search_by_tech_center( """Test searching citations by technology center.""" response = enriched_citations_client.search_citations( tech_center_q="2800", - limit=3, + rows=3, ) assert isinstance(response, EnrichedCitationResponse) assert response.num_found > 0 From b245a591f4562a6ce560017fb4ae3b45dd0effee Mon Sep 17 00:00:00 2001 From: Andrew <3300522+dpieski@users.noreply.github.com> Date: Fri, 27 Mar 2026 13:54:03 -0500 Subject: [PATCH 5/5] chore: add examples --- docs/source/examples/enriched_citations.rst | 6 + docs/source/examples/index.rst | 1 + examples/enriched_citations_example.py | 132 +++++++++++++ .../test_enriched_citations_integration.py | 184 ++++++++++++++++-- 4 files changed, 310 insertions(+), 13 deletions(-) create mode 100644 docs/source/examples/enriched_citations.rst create mode 100644 examples/enriched_citations_example.py diff --git a/docs/source/examples/enriched_citations.rst b/docs/source/examples/enriched_citations.rst new file mode 100644 index 0000000..02e0846 --- /dev/null +++ b/docs/source/examples/enriched_citations.rst @@ -0,0 +1,6 @@ +Enriched Citations Example +========================== + +.. literalinclude:: ../../../examples/enriched_citations_example.py + :language: python + :linenos: diff --git a/docs/source/examples/index.rst b/docs/source/examples/index.rst index 029a8c4..0a7aa9b 100644 --- a/docs/source/examples/index.rst +++ b/docs/source/examples/index.rst @@ -5,6 +5,7 @@ Examples :maxdepth: 2 bulk_data + enriched_citations patent_data ifw_example petition_decisions diff --git a/examples/enriched_citations_example.py b/examples/enriched_citations_example.py new file mode 100644 index 0000000..bc48203 --- /dev/null +++ b/examples/enriched_citations_example.py @@ -0,0 +1,132 @@ +"""Example usage of pyUSPTO for Enriched Cited Reference Metadata. + +Demonstrates the EnrichedCitationsClient for searching citation data +extracted from patent office actions, filtering by various criteria, +and paginating through results. +""" + +import os + +from pyUSPTO import EnrichedCitationsClient, USPTOConfig + +# --- Client Initialization --- +api_key = os.environ.get("USPTO_API_KEY", "YOUR_API_KEY_HERE") +if api_key == "YOUR_API_KEY_HERE": + raise ValueError( + "API key is not set. Set the USPTO_API_KEY environment variable." + ) +config = USPTOConfig(api_key=api_key) +client = EnrichedCitationsClient(config=config) + +print("-" * 40) +print("Example 1: Search by application number") +print("-" * 40) + +response = client.search_citations(patent_application_number_q="15061308") +print(f"Found {response.num_found} citations for application 15061308.") +for citation in response.docs[:5]: + print(f"\n Cited Document: {citation.cited_document_identifier}") + print(f" Category Code: {citation.citation_category_code}") + print(f" Office Action Date: {citation.office_action_date}") + print(f" Office Action Type: {citation.office_action_category}") + if citation.examiner_cited_reference_indicator: + print(" Cited by: Examiner") + if citation.passage_location_text: + print(f" Passages: {citation.passage_location_text}") + +print("-" * 40) +print("Example 2: Search by tech center and citation category") +print("-" * 40) + +response = client.search_citations( + tech_center_q="2800", + citation_category_code_q="X", + rows=5, +) +print(f"Found {response.num_found} 'X' citations in tech center 2800.") +for citation in response.docs: + print( + f" App {citation.patent_application_number}: " + f"{citation.cited_document_identifier} " + f"(claims: {citation.related_claim_number_text})" + ) + +print("-" * 40) +print("Example 3: Search by date range") +print("-" * 40) + +response = client.search_citations( + office_action_date_from_q="2019-01-01", + office_action_date_to_q="2019-12-31", + rows=5, +) +print(f"Found {response.num_found} citations from 2019.") + +print("-" * 40) +print("Example 4: Combined filters") +print("-" * 40) + +response = client.search_citations( + tech_center_q="2800", + citation_category_code_q="Y", + examiner_cited_q=True, + rows=5, +) +print( + f"Found {response.num_found} examiner-cited 'Y' citations in tech center 2800." +) +for citation in response.docs: + print( + f" App {citation.patent_application_number}: " + f"{citation.cited_document_identifier} " + f"(art unit: {citation.group_art_unit_number})" + ) + +print("-" * 40) +print("Example 5: Search with sort") +print("-" * 40) + +response = client.search_citations( + tech_center_q="2800", + sort="officeActionDate desc", + rows=5, +) +print(f"Found {response.num_found} citations, sorted by date descending.") +for citation in response.docs: + print(f" {citation.office_action_date}: {citation.cited_document_identifier}") + +print("-" * 40) +print("Example 6: Search by cited document identifier") +print("-" * 40) + +response = client.search_citations( + cited_document_identifier_q="US 20190165601 A1", + rows=5, +) +print(f"Found {response.num_found} citations of US 20190165601 A1.") + +print("-" * 40) +print("Example 7: Paginate through results") +print("-" * 40) + +max_items = 30 +count = 0 +for citation in client.paginate_citations( + tech_center_q="2800", rows=10 +): + count += 1 + if count >= max_items: + print(f" ... (stopping at {max_items} items)") + break + +print(f"Retrieved {count} citations via pagination") + +print("-" * 40) +print("Example 8: Get available fields") +print("-" * 40) + +fields_response = client.get_fields() +print(f"API Status: {fields_response.api_status}") +print(f"Field Count: {fields_response.field_count}") +print(f"Fields: {fields_response.fields}") +print(f"Last Updated: {fields_response.last_data_updated_date}") diff --git a/tests/integration/test_enriched_citations_integration.py b/tests/integration/test_enriched_citations_integration.py index 1180523..af6f456 100644 --- a/tests/integration/test_enriched_citations_integration.py +++ b/tests/integration/test_enriched_citations_integration.py @@ -13,6 +13,7 @@ from pyUSPTO.clients import EnrichedCitationsClient from pyUSPTO.config import USPTOConfig from pyUSPTO.models.enriched_citations import ( + EnrichedCitation, EnrichedCitationFieldsResponse, EnrichedCitationResponse, ) @@ -26,15 +27,7 @@ @pytest.fixture(scope="module") def enriched_citations_client(config: USPTOConfig) -> EnrichedCitationsClient: - """ - Create an EnrichedCitationsClient instance for integration tests. - - Args: - config: The configuration instance - - Returns: - EnrichedCitationsClient: A client instance - """ + """Create an EnrichedCitationsClient instance for integration tests.""" return EnrichedCitationsClient(config=config) @@ -46,13 +39,27 @@ def test_search_by_application_number( ) -> None: """Test searching citations by application number.""" response = enriched_citations_client.search_citations( - patent_application_number_q="15739603", - rows=10, + patent_application_number_q="15061308", ) assert isinstance(response, EnrichedCitationResponse) assert response.num_found > 0 assert len(response.docs) > 0 - assert response.docs[0].patent_application_number == "15739603" + for doc in response.docs: + assert doc.patent_application_number == "15061308" + + def test_search_by_citation_category_code( + self, enriched_citations_client: EnrichedCitationsClient + ) -> None: + """Test searching citations by citation category code.""" + response = enriched_citations_client.search_citations( + citation_category_code_q="X", + rows=5, + ) + assert isinstance(response, EnrichedCitationResponse) + assert response.num_found > 0 + assert len(response.docs) <= 5 + for doc in response.docs: + assert doc.citation_category_code == "X" def test_search_by_tech_center( self, enriched_citations_client: EnrichedCitationsClient @@ -60,13 +67,141 @@ def test_search_by_tech_center( """Test searching citations by technology center.""" response = enriched_citations_client.search_citations( tech_center_q="2800", - rows=3, + rows=5, ) assert isinstance(response, EnrichedCitationResponse) assert response.num_found > 0 for doc in response.docs: assert doc.tech_center == "2800" + def test_search_by_group_art_unit( + self, enriched_citations_client: EnrichedCitationsClient + ) -> None: + """Test searching citations by group art unit number.""" + response = enriched_citations_client.search_citations( + group_art_unit_number_q="2837", + rows=5, + ) + assert isinstance(response, EnrichedCitationResponse) + assert response.num_found > 0 + for doc in response.docs: + assert doc.group_art_unit_number == "2837" + + def test_search_by_office_action_category( + self, enriched_citations_client: EnrichedCitationsClient + ) -> None: + """Test searching citations by office action category.""" + response = enriched_citations_client.search_citations( + office_action_category_q="CTNF", + rows=5, + ) + assert isinstance(response, EnrichedCitationResponse) + assert response.num_found > 0 + for doc in response.docs: + assert doc.office_action_category == "CTNF" + + def test_search_by_examiner_cited( + self, enriched_citations_client: EnrichedCitationsClient + ) -> None: + """Test searching citations by examiner-cited indicator.""" + response = enriched_citations_client.search_citations( + examiner_cited_q=True, + rows=5, + ) + assert isinstance(response, EnrichedCitationResponse) + assert response.num_found > 0 + for doc in response.docs: + assert doc.examiner_cited_reference_indicator is True + + def test_search_by_cited_document_identifier( + self, enriched_citations_client: EnrichedCitationsClient + ) -> None: + """Test searching citations by cited document identifier.""" + response = enriched_citations_client.search_citations( + cited_document_identifier_q="US 20190165601 A1", + rows=5, + ) + assert isinstance(response, EnrichedCitationResponse) + assert response.num_found > 0 + for doc in response.docs: + assert doc.cited_document_identifier == "US 20190165601 A1" + + def test_search_by_date_range( + self, enriched_citations_client: EnrichedCitationsClient + ) -> None: + """Test searching citations by office action date range.""" + response = enriched_citations_client.search_citations( + office_action_date_from_q="2019-01-01", + office_action_date_to_q="2019-12-31", + rows=5, + ) + assert isinstance(response, EnrichedCitationResponse) + assert response.num_found > 0 + for doc in response.docs: + assert doc.office_action_date is not None + assert doc.office_action_date.year == 2019 + + def test_search_combined_params( + self, enriched_citations_client: EnrichedCitationsClient + ) -> None: + """Test searching citations with multiple convenience params.""" + response = enriched_citations_client.search_citations( + tech_center_q="2800", + citation_category_code_q="Y", + examiner_cited_q=True, + rows=5, + ) + assert isinstance(response, EnrichedCitationResponse) + assert response.num_found > 0 + for doc in response.docs: + assert doc.tech_center == "2800" + assert doc.citation_category_code == "Y" + assert doc.examiner_cited_reference_indicator is True + + def test_search_direct_query( + self, enriched_citations_client: EnrichedCitationsClient + ) -> None: + """Test searching citations with a direct query string.""" + response = enriched_citations_client.search_citations( + query="patentApplicationNumber:15739603", + ) + assert isinstance(response, EnrichedCitationResponse) + assert response.num_found > 0 + for doc in response.docs: + assert doc.patent_application_number == "15739603" + + def test_search_with_sort( + self, enriched_citations_client: EnrichedCitationsClient + ) -> None: + """Test searching citations with sort order.""" + response = enriched_citations_client.search_citations( + tech_center_q="2800", + sort="officeActionDate desc", + rows=5, + ) + assert isinstance(response, EnrichedCitationResponse) + assert response.num_found > 0 + assert len(response.docs) <= 5 + + def test_citation_fields_populated( + self, enriched_citations_client: EnrichedCitationsClient + ) -> None: + """Test that returned citations have expected fields populated.""" + response = enriched_citations_client.search_citations( + patent_application_number_q="15061308", + ) + assert response.num_found > 0 + citation = response.docs[0] + assert isinstance(citation, EnrichedCitation) + assert citation.id != "" + assert citation.patent_application_number == "15061308" + assert citation.cited_document_identifier is not None + assert citation.office_action_date is not None + assert citation.office_action_category is not None + assert citation.citation_category_code is not None + assert citation.tech_center is not None + assert citation.group_art_unit_number is not None + class TestEnrichedCitationsGetFields: """Integration tests for get_fields.""" @@ -82,3 +217,26 @@ def test_get_fields( assert len(response.fields) == 22 assert "patentApplicationNumber" in response.fields assert "citedDocumentIdentifier" in response.fields + assert "officeActionDate" in response.fields + assert "citationCategoryCode" in response.fields + + +class TestEnrichedCitationsPaginate: + """Integration tests for paginate_citations.""" + + def test_paginate_citations( + self, enriched_citations_client: EnrichedCitationsClient + ) -> None: + """Test paginating through citation results.""" + count = 0 + for citation in enriched_citations_client.paginate_citations( + tech_center_q="2800", + rows=10, + ): + assert isinstance(citation, EnrichedCitation) + assert citation.tech_center == "2800" + count += 1 + if count >= 25: + break + + assert count == 25