From 6332cf994ffc2ebe5a078c4b26aadd2ddc24f140 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Thu, 18 Dec 2025 14:28:47 -0600 Subject: [PATCH 01/61] Add Linearize PDF Assisted-by: Codex --- src/pdfrest/client.py | 53 +++++ src/pdfrest/models/_internal.py | 24 +++ tests/live/test_live_linearize_pdf.py | 72 +++++++ tests/test_linearize_pdf.py | 282 ++++++++++++++++++++++++++ 4 files changed, 431 insertions(+) create mode 100644 tests/live/test_live_linearize_pdf.py create mode 100644 tests/test_linearize_pdf.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 8818899f..0b8aeb1c 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -79,6 +79,7 @@ JpegPdfRestPayload, PdfCompressPayload, PdfFlattenFormsPayload, + PdfLinearizePayload, PdfInfoPayload, PdfMergePayload, PdfRedactionApplyPayload, @@ -2305,6 +2306,32 @@ def compress_pdf( extra_body=extra_body, timeout=timeout, ) + + def linearize_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Linearize a PDF for optimized fast web view.""" + + payload: dict[str, Any] = {"files": file} + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/linearized-pdf", + payload=payload, + payload_model=PdfLinearizePayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) def convert_to_pdfx( self, @@ -2848,6 +2875,32 @@ async def compress_pdf( extra_body=extra_body, timeout=timeout, ) + + async def linearize_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously linearize a PDF for optimized fast web view.""" + + payload: dict[str, Any] = {"files": file} + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/linearized-pdf", + payload=payload, + payload_model=PdfLinearizePayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) async def convert_to_pdfx( self, diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 33cb8747..126914e1 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -626,6 +626,30 @@ def _validate_profile_dependency(self) -> PdfCompressPayload: return self +class PdfLinearizePayload(BaseModel): + """Adapt caller options into a pdfRest-ready linearize PDF request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + class BmpPdfRestPayload(BasePdfRestGraphicPayload[Literal["rgb", "gray"]]): """Adapt caller options into a pdfRest-ready BMP request payload.""" diff --git a/tests/live/test_live_linearize_pdf.py b/tests/live/test_live_linearize_pdf.py new file mode 100644 index 00000000..59612691 --- /dev/null +++ b/tests/live/test_live_linearize_pdf.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +import pytest + +from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_pdf_for_linearize( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + "output_name", + [ + pytest.param(None, id="default-output"), + pytest.param("linearized-live", id="custom-output"), + ], +) +def test_live_linearize_pdf( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_linearize: PdfRestFile, + output_name: str | None, +) -> None: + kwargs: dict[str, str] = {} + if output_name is not None: + kwargs["output"] = output_name + + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.linearize_pdf(uploaded_pdf_for_linearize, **kwargs) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_linearize.id) + if output_name is not None: + assert output_file.name.startswith(output_name) + else: + assert output_file.name.endswith(".pdf") + + +def test_live_linearize_pdf_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_linearize: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError), + ): + client.linearize_pdf( + uploaded_pdf_for_linearize, + extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, + ) diff --git a/tests/test_linearize_pdf.py b/tests/test_linearize_pdf.py new file mode 100644 index 00000000..6b212437 --- /dev/null +++ b/tests/test_linearize_pdf.py @@ -0,0 +1,282 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import PdfLinearizePayload + +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) + + +def test_linearize_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfLinearizePayload.model_validate( + {"files": [input_file]} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/linearized-pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "linearized.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.linearize_pdf(input_file) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "linearized.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + assert response.warning is None + + +def test_linearize_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/linearized-pdf": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["id"] == str(input_file.id) + assert payload["output"] == "linearized" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "linearized-out.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.linearize_pdf( + input_file, + output="linearized", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": "yes"}, + timeout=0.61, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "linearized-out.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.61) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.61) + + +@pytest.mark.asyncio +async def test_async_linearize_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfLinearizePayload.model_validate( + {"files": [input_file]} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/linearized-pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-linearized.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.linearize_pdf(input_file) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-linearized.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_linearize_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/linearized-pdf": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["flags"] == ["a", "b"] + assert payload["id"] == str(input_file.id) + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-linearized-custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.linearize_pdf( + input_file, + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"flags": ["a", "b"]}, + timeout=0.83, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-linearized-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.83) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.83) + + +@pytest.mark.parametrize( + ("files", "match"), + [ + pytest.param( + "png", + "Must be a PDF file", + id="non-pdf-file", + ), + pytest.param( + "multiple", + "List should have at most 1 item after validation", + id="multiple-files", + ), + ], +) +def test_linearize_pdf_validation( + monkeypatch: pytest.MonkeyPatch, + files: str, + match: str, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + png_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + files_argument = ( + png_file + if files == "png" + else [pdf_file, make_pdf_file(PdfRestFileID.generate())] + ) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match=match), + ): + client.linearize_pdf(files_argument) From d7c815eb654efa1a662c63e54e684c56c7652cad Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Thu, 18 Dec 2025 15:08:31 -0600 Subject: [PATCH 02/61] Add Summarize PDF Assisted-by: Codex --- src/pdfrest/client.py | 93 +++++++++ src/pdfrest/models/__init__.py | 2 + src/pdfrest/models/_internal.py | 58 +++++- src/pdfrest/models/public.py | 38 ++++ src/pdfrest/types/__init__.py | 6 + src/pdfrest/types/public.py | 18 ++ tests/live/test_live_summarize_pdf_text.py | 47 +++++ tests/test_summarize_pdf_text.py | 207 +++++++++++++++++++++ 8 files changed, 468 insertions(+), 1 deletion(-) create mode 100644 tests/live/test_live_summarize_pdf_text.py create mode 100644 tests/test_summarize_pdf_text.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 0b8aeb1c..e46198ed 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -66,6 +66,7 @@ PdfRestFileBasedResponse, PdfRestFileID, PdfRestInfoResponse, + SummarizePdfTextResponse, UpResponse, ) @@ -89,6 +90,7 @@ PdfToPdfxPayload, PdfToWordPayload, PngPdfRestPayload, + SummarizePdfTextPayload, TiffPdfRestPayload, UploadURLs, ) @@ -100,6 +102,9 @@ PdfRedactionInstruction, PdfRGBColor, PdfXType, + SummaryFormat, + SummaryOutputFormat, + SummaryOutputType, ) DEFAULT_BASE_URL = "https://api.pdfrest.com" @@ -2106,6 +2111,50 @@ def query_pdf_info( raw_payload = self._send_request(request) return PdfRestInfoResponse.model_validate(raw_payload) + def summarize_pdf_text( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + target_word_count: int | None = 400, + summary_format: SummaryFormat = "overview", + pages: PdfPageSelection | None = None, + output_format: SummaryOutputFormat = "markdown", + output_type: SummaryOutputType = "json", + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> SummarizePdfTextResponse: + """Summarize the textual content of a PDF, Markdown, or text document.""" + + payload: dict[str, Any] = { + "files": file, + "target_word_count": target_word_count, + "summary_format": summary_format, + "output_format": output_format, + "output_type": output_type, + } + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + validated_payload = SummarizePdfTextPayload.model_validate(payload) + request = self.prepare_request( + "POST", + "/summarized-pdf-text", + json_body=validated_payload.model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ), + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + raw_payload = self._send_request(request) + return SummarizePdfTextResponse.model_validate(raw_payload) + def preview_redactions( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -2633,6 +2682,50 @@ async def query_pdf_info( raw_payload = await self._send_request(request) return PdfRestInfoResponse.model_validate(raw_payload) + async def summarize_pdf_text( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + target_word_count: int | None = 400, + summary_format: SummaryFormat = "overview", + pages: PdfPageSelection | None = None, + output_format: SummaryOutputFormat = "markdown", + output_type: SummaryOutputType = "json", + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> SummarizePdfTextResponse: + """Summarize the textual content of a PDF, Markdown, or text document.""" + + payload: dict[str, Any] = { + "files": file, + "target_word_count": target_word_count, + "summary_format": summary_format, + "output_format": output_format, + "output_type": output_type, + } + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + validated_payload = SummarizePdfTextPayload.model_validate(payload) + request = self.prepare_request( + "POST", + "/summarized-pdf-text", + json_body=validated_payload.model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ), + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + raw_payload = await self._send_request(request) + return SummarizePdfTextResponse.model_validate(raw_payload) + async def preview_redactions( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/__init__.py b/src/pdfrest/models/__init__.py index 54c9aeb4..f81577e1 100644 --- a/src/pdfrest/models/__init__.py +++ b/src/pdfrest/models/__init__.py @@ -5,6 +5,7 @@ PdfRestFileBasedResponse, PdfRestFileID, PdfRestInfoResponse, + SummarizePdfTextResponse, UpResponse, ) @@ -15,5 +16,6 @@ "PdfRestFileBasedResponse", "PdfRestFileID", "PdfRestInfoResponse", + "SummarizePdfTextResponse", "UpResponse", ] diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 126914e1..fb8349dc 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -21,7 +21,13 @@ from pdfrest.types.public import PdfRedactionPreset -from ..types import PdfInfoQuery, PdfXType +from ..types import ( + PdfInfoQuery, + PdfXType, + SummaryFormat, + SummaryOutputFormat, + SummaryOutputType, +) from . import PdfRestFile from .public import PdfRestFileID @@ -248,6 +254,56 @@ class PdfInfoPayload(BaseModel): ] +class SummarizePdfTextPayload(BaseModel): + """Adapt caller options into a pdfRest-ready summarize request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types( + "application/pdf", + "text/markdown", + "text/plain", + error_msg="Must be a PDF, Markdown, or plain text file", + ) + ), + PlainSerializer(_serialize_as_first_file_id), + ] + target_word_count: Annotated[ + int | None, Field(serialization_alias="target_word_count", ge=1, default=400) + ] = 400 + summary_format: Annotated[ + SummaryFormat, Field(serialization_alias="summary_format", default="overview") + ] = "overview" + pages: Annotated[ + list[AscendingPageRange] | None, + Field(serialization_alias="pages", min_length=1, default=None), + BeforeValidator(_ensure_list), + BeforeValidator(_split_comma_list), + BeforeValidator(_int_to_string), + PlainSerializer(_serialize_page_ranges), + ] = None + output_format: Annotated[ + SummaryOutputFormat, + Field(serialization_alias="output_format", default="markdown"), + ] = "markdown" + output_type: Annotated[ + SummaryOutputType, Field(serialization_alias="output_type", default="json") + ] = "json" + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + RgbChannel = Annotated[int, Field(ge=0, le=255)] diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index 3de11476..59a96ec8 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -26,6 +26,7 @@ "PdfRestFileBasedResponse", "PdfRestFileID", "PdfRestInfoResponse", + "SummarizePdfTextResponse", "UpResponse", ) @@ -310,6 +311,43 @@ class PdfRestDeletionResponse(BaseModel): min_length=1, ), ] +class SummarizePdfTextResponse(BaseModel): + """Response returned by the summarize-pdf-text tool.""" + + model_config = ConfigDict(extra="allow") + + summary: Annotated[ + str | None, + Field( + description="Inline summary content when output_type is json.", + default=None, + ), + ] = None + input_id: Annotated[ + PdfRestFileID, + Field( + validation_alias=AliasChoices("input_id", "inputId"), + description="The id of the input file.", + ), + ] + output_url: Annotated[ + HttpUrl | None, + Field( + alias="outputUrl", + validation_alias=AliasChoices("output_url", "outputUrl"), + description="Download URL for file output.", + default=None, + ), + ] = None + output_id: Annotated[ + PdfRestFileID | None, + Field( + alias="outputId", + validation_alias=AliasChoices("output_id", "outputId"), + description="The id of the generated output when output_type is file.", + default=None, + ), + ] = None class PdfRestInfoResponse(BaseModel): diff --git a/src/pdfrest/types/__init__.py b/src/pdfrest/types/__init__.py index 9bc36a87..d1c16809 100644 --- a/src/pdfrest/types/__init__.py +++ b/src/pdfrest/types/__init__.py @@ -11,6 +11,9 @@ PdfRedactionType, PdfRGBColor, PdfXType, + SummaryFormat, + SummaryOutputFormat, + SummaryOutputType, ) __all__ = [ @@ -24,4 +27,7 @@ "PdfRedactionPreset", "PdfRedactionType", "PdfXType", + "SummaryFormat", + "SummaryOutputFormat", + "SummaryOutputType", ] diff --git a/src/pdfrest/types/public.py b/src/pdfrest/types/public.py index 1df53284..10fc2028 100644 --- a/src/pdfrest/types/public.py +++ b/src/pdfrest/types/public.py @@ -23,6 +23,9 @@ "PdfRedactionPreset", "PdfRedactionType", "PdfXType", + "SummaryFormat", + "SummaryOutputFormat", + "SummaryOutputType", ) PdfInfoQuery = Literal[ @@ -99,3 +102,18 @@ class PdfMergeSource(TypedDict, total=False): PdfMergeInput = PdfRestFile | PdfMergeSource | tuple[PdfRestFile, PdfPageSelection] PdfXType = Literal["PDF/X-1a", "PDF/X-3", "PDF/X-4", "PDF/X-6"] + +SummaryFormat = Literal[ + "overview", + "highlight", + "abstract", + "bullet_points", + "numbered_list", + "table_of_contents", + "outline", + "question_answer", + "action_items", +] + +SummaryOutputFormat = Literal["plaintext", "markdown"] +SummaryOutputType = Literal["json", "file"] diff --git a/tests/live/test_live_summarize_pdf_text.py b/tests/live/test_live_summarize_pdf_text.py new file mode 100644 index 00000000..25d287b0 --- /dev/null +++ b/tests/live/test_live_summarize_pdf_text.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import pytest + +from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest.models import SummarizePdfTextResponse + +from ..resources import get_test_resource_path + + +def test_live_summarize_pdf_text_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = client.files.create_from_paths([resource])[0] + response = client.summarize_pdf_text( + uploaded, + target_word_count=40, + output_type="json", + summary_format="overview", + ) + + assert isinstance(response, SummarizePdfTextResponse) + assert response.summary + assert response.input_id == uploaded.id + + +def test_live_summarize_pdf_text_invalid_format( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = client.files.create_from_paths([resource])[0] + with pytest.raises(PdfRestApiError, match="error"): + client.summarize_pdf_text( + uploaded, + extra_body={"summary_format": "invalid-style"}, + ) diff --git a/tests/test_summarize_pdf_text.py b/tests/test_summarize_pdf_text.py new file mode 100644 index 00000000..99f481f9 --- /dev/null +++ b/tests/test_summarize_pdf_text.py @@ -0,0 +1,207 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileID, SummarizePdfTextResponse +from pdfrest.models._internal import SummarizePdfTextPayload + +from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, make_pdf_file + + +def _make_text_file(file_id: str) -> PdfRestFile: + return PdfRestFile.model_validate( + { + "id": file_id, + "name": "notes.txt", + "url": f"https://api.pdfrest.com/resource/{file_id}", + "type": "text/plain", + "size": 64, + "modified": "2024-01-01T00:00:00Z", + "scheduledDeletionTimeUtc": None, + } + ) + + +def test_summarize_payload_rejects_invalid_mime() -> None: + file_id = str(PdfRestFileID.generate()) + image_file = PdfRestFile.model_validate( + { + "id": file_id, + "name": "image.png", + "url": f"https://api.pdfrest.com/resource/{file_id}", + "type": "image/png", + "size": 10, + "modified": "2024-01-01T00:00:00Z", + "scheduledDeletionTimeUtc": None, + } + ) + + with pytest.raises( + ValidationError, match="Must be a PDF, Markdown, or plain text file" + ): + SummarizePdfTextPayload.model_validate({"files": [image_file]}) + + +def test_summarize_payload_invalid_page_range() -> None: + file_repr = make_pdf_file(PdfRestFileID.generate(1)) + + with pytest.raises( + ValidationError, match="The start page must be less than or equal to the end" + ): + SummarizePdfTextPayload.model_validate({"files": [file_repr], "pages": ["5-2"]}) + + +def test_summarize_pdf_text_json_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = _make_text_file(str(PdfRestFileID.generate(1))) + payload_dump = SummarizePdfTextPayload.model_validate( + { + "files": [input_file], + "target_word_count": 120, + "summary_format": "bullet_points", + "pages": ["1-3"], + "output_format": "plaintext", + "output_type": "json", + "output": "summary", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/summarized-pdf-text": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "summary": "Key points...", + "inputId": str(input_file.id), + }, + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.summarize_pdf_text( + input_file, + target_word_count=120, + summary_format="bullet_points", + pages=["1-3"], + output_format="plaintext", + output_type="json", + output="summary", + ) + + assert seen == {"post": 1} + assert isinstance(response, SummarizePdfTextResponse) + assert response.summary == "Key points..." + assert response.input_id == input_file.id + assert response.output_id is None + assert response.output_url is None + + +def test_summarize_pdf_text_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + payload_dump = SummarizePdfTextPayload.model_validate( + { + "files": [input_file], + "output_type": "file", + "output_format": "markdown", + "summary_format": "overview", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + output_id = str(PdfRestFileID.generate()) + + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/summarized-pdf-text": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + for key, value in payload_dump.items(): + assert payload[key] == value + assert payload["debug"] is True + return httpx.Response( + 200, + json={ + "outputUrl": f"https://api.pdfrest.com/resource/{output_id}?format=file", + "outputId": output_id, + "inputId": str(input_file.id), + }, + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.summarize_pdf_text( + input_file, + output_type="file", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": True}, + timeout=0.25, + ) + + assert isinstance(response, SummarizePdfTextResponse) + assert response.output_id == output_id + assert response.output_url + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.25) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.25) + + +@pytest.mark.asyncio +async def test_async_summarize_pdf_text_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + payload_dump = SummarizePdfTextPayload.model_validate( + {"files": [input_file], "output_type": "json"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/summarized-pdf-text": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + for key, value in payload_dump.items(): + assert payload[key] == value + return httpx.Response( + 200, + json={ + "summary": "Async summary", + "inputId": str(input_file.id), + }, + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.summarize_pdf_text(input_file, output_type="json") + + assert seen == {"post": 1} + assert isinstance(response, SummarizePdfTextResponse) + assert response.summary == "Async summary" + assert response.input_id == input_file.id From a7de4256f09a7ebc8306534f09bb40f0cae91fb9 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Thu, 18 Dec 2025 15:29:55 -0600 Subject: [PATCH 03/61] Add Translate PDF Assisted-by: Codex --- src/pdfrest/client.py | 94 ++++++++++ src/pdfrest/models/__init__.py | 2 + src/pdfrest/models/_internal.py | 53 ++++++ src/pdfrest/models/public.py | 40 ++++ src/pdfrest/types/__init__.py | 4 + src/pdfrest/types/public.py | 5 + tests/live/test_live_translate_pdf_text.py | 48 +++++ tests/test_translate_pdf_text.py | 208 +++++++++++++++++++++ 8 files changed, 454 insertions(+) create mode 100644 tests/live/test_live_translate_pdf_text.py create mode 100644 tests/test_translate_pdf_text.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index e46198ed..f046a4de 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -67,6 +67,7 @@ PdfRestFileID, PdfRestInfoResponse, SummarizePdfTextResponse, + TranslatePdfTextResponse, UpResponse, ) @@ -92,6 +93,7 @@ PngPdfRestPayload, SummarizePdfTextPayload, TiffPdfRestPayload, + TranslatePdfTextPayload, UploadURLs, ) from .types import ( @@ -105,6 +107,8 @@ SummaryFormat, SummaryOutputFormat, SummaryOutputType, + TranslateOutputFormat, + TranslateOutputType, ) DEFAULT_BASE_URL = "https://api.pdfrest.com" @@ -2155,6 +2159,51 @@ def summarize_pdf_text( raw_payload = self._send_request(request) return SummarizePdfTextResponse.model_validate(raw_payload) + def translate_pdf_text( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + target_language: str, + source_language: str | None = None, + pages: PdfPageSelection | None = None, + output_format: TranslateOutputFormat = "markdown", + output_type: TranslateOutputType = "json", + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> TranslatePdfTextResponse: + """Translate the textual content of a PDF, Markdown, or text document.""" + + payload: dict[str, Any] = { + "files": file, + "target_language": target_language, + "output_format": output_format, + "output_type": output_type, + } + if source_language is not None: + payload["source_language"] = source_language + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + validated_payload = TranslatePdfTextPayload.model_validate(payload) + request = self.prepare_request( + "POST", + "/translated-pdf-text", + json_body=validated_payload.model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ), + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + raw_payload = self._send_request(request) + return TranslatePdfTextResponse.model_validate(raw_payload) + def preview_redactions( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -2726,6 +2775,51 @@ async def summarize_pdf_text( raw_payload = await self._send_request(request) return SummarizePdfTextResponse.model_validate(raw_payload) + async def translate_pdf_text( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + target_language: str, + source_language: str | None = None, + pages: PdfPageSelection | None = None, + output_format: TranslateOutputFormat = "markdown", + output_type: TranslateOutputType = "json", + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> TranslatePdfTextResponse: + """Translate the textual content of a PDF, Markdown, or text document.""" + + payload: dict[str, Any] = { + "files": file, + "target_language": target_language, + "output_format": output_format, + "output_type": output_type, + } + if source_language is not None: + payload["source_language"] = source_language + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + validated_payload = TranslatePdfTextPayload.model_validate(payload) + request = self.prepare_request( + "POST", + "/translated-pdf-text", + json_body=validated_payload.model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ), + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + raw_payload = await self._send_request(request) + return TranslatePdfTextResponse.model_validate(raw_payload) + async def preview_redactions( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/__init__.py b/src/pdfrest/models/__init__.py index f81577e1..92907075 100644 --- a/src/pdfrest/models/__init__.py +++ b/src/pdfrest/models/__init__.py @@ -6,6 +6,7 @@ PdfRestFileID, PdfRestInfoResponse, SummarizePdfTextResponse, + TranslatePdfTextResponse, UpResponse, ) @@ -17,5 +18,6 @@ "PdfRestFileID", "PdfRestInfoResponse", "SummarizePdfTextResponse", + "TranslatePdfTextResponse", "UpResponse", ] diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index fb8349dc..8c79046a 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -27,6 +27,8 @@ SummaryFormat, SummaryOutputFormat, SummaryOutputType, + TranslateOutputFormat, + TranslateOutputType, ) from . import PdfRestFile from .public import PdfRestFileID @@ -304,6 +306,57 @@ class SummarizePdfTextPayload(BaseModel): ] = None +class TranslatePdfTextPayload(BaseModel): + """Adapt caller options into a pdfRest-ready translate request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types( + "application/pdf", + "text/markdown", + "text/plain", + error_msg="Must be a PDF, Markdown, or plain text file", + ) + ), + PlainSerializer(_serialize_as_first_file_id), + ] + target_language: Annotated[ + str, Field(serialization_alias="target_language", min_length=1) + ] + source_language: Annotated[ + str | None, + Field(serialization_alias="source_language", min_length=1, default=None), + ] = None + pages: Annotated[ + list[AscendingPageRange] | None, + Field(serialization_alias="pages", min_length=1, default=None), + BeforeValidator(_ensure_list), + BeforeValidator(_split_comma_list), + BeforeValidator(_int_to_string), + PlainSerializer(_serialize_page_ranges), + ] = None + output_format: Annotated[ + TranslateOutputFormat, + Field(serialization_alias="output_format", default="markdown"), + ] = "markdown" + output_type: Annotated[ + TranslateOutputType, Field(serialization_alias="output_type", default="json") + ] = "json" + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + RgbChannel = Annotated[int, Field(ge=0, le=255)] diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index 59a96ec8..01b168bc 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -27,6 +27,7 @@ "PdfRestFileID", "PdfRestInfoResponse", "SummarizePdfTextResponse", + "TranslatePdfTextResponse", "UpResponse", ) @@ -350,6 +351,45 @@ class SummarizePdfTextResponse(BaseModel): ] = None +class TranslatePdfTextResponse(BaseModel): + """Response returned by the translated-pdf-text tool.""" + + model_config = ConfigDict(extra="allow") + + translation: Annotated[ + str | None, + Field( + description="Inline translation content when output_type is json.", + default=None, + ), + ] = None + input_id: Annotated[ + PdfRestFileID, + Field( + validation_alias=AliasChoices("input_id", "inputId"), + description="The id of the input file.", + ), + ] + output_url: Annotated[ + HttpUrl | None, + Field( + alias="outputUrl", + validation_alias=AliasChoices("output_url", "outputUrl"), + description="Download URL for file output.", + default=None, + ), + ] = None + output_id: Annotated[ + PdfRestFileID | None, + Field( + alias="outputId", + validation_alias=AliasChoices("output_id", "outputId"), + description="The id of the generated output when output_type is file.", + default=None, + ), + ] = None + + class PdfRestInfoResponse(BaseModel): """A response containing the output from the /info route.""" diff --git a/src/pdfrest/types/__init__.py b/src/pdfrest/types/__init__.py index d1c16809..87cb53b8 100644 --- a/src/pdfrest/types/__init__.py +++ b/src/pdfrest/types/__init__.py @@ -14,6 +14,8 @@ SummaryFormat, SummaryOutputFormat, SummaryOutputType, + TranslateOutputFormat, + TranslateOutputType, ) __all__ = [ @@ -30,4 +32,6 @@ "SummaryFormat", "SummaryOutputFormat", "SummaryOutputType", + "TranslateOutputFormat", + "TranslateOutputType", ] diff --git a/src/pdfrest/types/public.py b/src/pdfrest/types/public.py index 10fc2028..1c692c5e 100644 --- a/src/pdfrest/types/public.py +++ b/src/pdfrest/types/public.py @@ -26,6 +26,8 @@ "SummaryFormat", "SummaryOutputFormat", "SummaryOutputType", + "TranslateOutputFormat", + "TranslateOutputType", ) PdfInfoQuery = Literal[ @@ -117,3 +119,6 @@ class PdfMergeSource(TypedDict, total=False): SummaryOutputFormat = Literal["plaintext", "markdown"] SummaryOutputType = Literal["json", "file"] + +TranslateOutputFormat = Literal["plaintext", "markdown"] +TranslateOutputType = Literal["json", "file"] diff --git a/tests/live/test_live_translate_pdf_text.py b/tests/live/test_live_translate_pdf_text.py new file mode 100644 index 00000000..da35d638 --- /dev/null +++ b/tests/live/test_live_translate_pdf_text.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +import pytest + +from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest.models import TranslatePdfTextResponse + +from ..resources import get_test_resource_path + + +def test_live_translate_pdf_text_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = client.files.create_from_paths([resource])[0] + response = client.translate_pdf_text( + uploaded, + target_language="fr", + output_type="json", + output_format="plaintext", + ) + + assert isinstance(response, TranslatePdfTextResponse) + assert response.translation + assert response.input_id == uploaded.id + + +def test_live_translate_pdf_text_invalid_output_format( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = client.files.create_from_paths([resource])[0] + with pytest.raises(PdfRestApiError, match="error"): + client.translate_pdf_text( + uploaded, + target_language="es", + extra_body={"output_format": "invalid-format"}, + ) diff --git a/tests/test_translate_pdf_text.py b/tests/test_translate_pdf_text.py new file mode 100644 index 00000000..5f2fd1b3 --- /dev/null +++ b/tests/test_translate_pdf_text.py @@ -0,0 +1,208 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileID, TranslatePdfTextResponse +from pdfrest.models._internal import TranslatePdfTextPayload + +from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, make_pdf_file + + +def _make_markdown_file(file_id: str) -> PdfRestFile: + return PdfRestFile.model_validate( + { + "id": file_id, + "name": "notes.md", + "url": f"https://api.pdfrest.com/resource/{file_id}", + "type": "text/markdown", + "size": 64, + "modified": "2024-01-01T00:00:00Z", + "scheduledDeletionTimeUtc": None, + } + ) + + +def test_translate_payload_rejects_invalid_mime() -> None: + file_id = str(PdfRestFileID.generate()) + image_file = PdfRestFile.model_validate( + { + "id": file_id, + "name": "image.png", + "url": f"https://api.pdfrest.com/resource/{file_id}", + "type": "image/png", + "size": 10, + "modified": "2024-01-01T00:00:00Z", + "scheduledDeletionTimeUtc": None, + } + ) + + with pytest.raises( + ValidationError, match="Must be a PDF, Markdown, or plain text file" + ): + TranslatePdfTextPayload.model_validate( + {"files": [image_file], "target_language": "fr"} + ) + + +def test_translate_payload_requires_target_language() -> None: + file_repr = make_pdf_file(PdfRestFileID.generate(1)) + with pytest.raises(ValidationError): + TranslatePdfTextPayload.model_validate({"files": [file_repr]}) + + +def test_translate_pdf_text_json_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = _make_markdown_file(str(PdfRestFileID.generate(1))) + payload_dump = TranslatePdfTextPayload.model_validate( + { + "files": [input_file], + "target_language": "fr", + "source_language": "en", + "pages": ["1-2"], + "output_format": "plaintext", + "output_type": "json", + "output": "translation", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/translated-pdf-text": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "translation": "Bonjour", + "inputId": str(input_file.id), + }, + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.translate_pdf_text( + input_file, + target_language="fr", + source_language="en", + pages=["1-2"], + output_format="plaintext", + output_type="json", + output="translation", + ) + + assert seen == {"post": 1} + assert isinstance(response, TranslatePdfTextResponse) + assert response.translation == "Bonjour" + assert response.input_id == input_file.id + assert response.output_id is None + assert response.output_url is None + + +def test_translate_pdf_text_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + payload_dump = TranslatePdfTextPayload.model_validate( + { + "files": [input_file], + "target_language": "es", + "output_type": "file", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + output_id = str(PdfRestFileID.generate()) + + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/translated-pdf-text": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + for key, value in payload_dump.items(): + assert payload[key] == value + assert payload["debug"] is True + return httpx.Response( + 200, + json={ + "outputUrl": f"https://api.pdfrest.com/resource/{output_id}?format=file", + "outputId": output_id, + "inputId": str(input_file.id), + }, + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.translate_pdf_text( + input_file, + target_language="es", + output_type="file", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": True}, + timeout=0.3, + ) + + assert isinstance(response, TranslatePdfTextResponse) + assert response.output_id == output_id + assert response.output_url + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.3) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.3) + + +@pytest.mark.asyncio +async def test_async_translate_pdf_text_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + payload_dump = TranslatePdfTextPayload.model_validate( + {"files": [input_file], "target_language": "de", "output_type": "json"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/translated-pdf-text": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + for key, value in payload_dump.items(): + assert payload[key] == value + return httpx.Response( + 200, + json={ + "translation": "Hallo", + "inputId": str(input_file.id), + }, + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.translate_pdf_text( + input_file, target_language="de", output_type="json" + ) + + assert seen == {"post": 1} + assert isinstance(response, TranslatePdfTextResponse) + assert response.translation == "Hallo" + assert response.input_id == input_file.id From 935fda2d79b15b4d36fbf9f4b6db6666e0f86e0c Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Thu, 18 Dec 2025 15:34:23 -0600 Subject: [PATCH 04/61] Add Extract Images Assisted-by: Codex --- src/pdfrest/client.py | 117 ++++++++++++++ src/pdfrest/models/__init__.py | 2 + src/pdfrest/models/_internal.py | 32 ++++ src/pdfrest/models/public.py | 26 +++ tests/live/test_live_extract_images.py | 42 +++++ tests/test_extract_images.py | 211 +++++++++++++++++++++++++ 6 files changed, 430 insertions(+) create mode 100644 tests/live/test_live_extract_images.py create mode 100644 tests/test_extract_images.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index f046a4de..6efbfe8d 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -61,6 +61,7 @@ ) from .models import ( PdfRestDeletionResponse, + ExtractImagesResponse, PdfRestErrorResponse, PdfRestFile, PdfRestFileBasedResponse, @@ -77,6 +78,7 @@ BasePdfRestGraphicPayload, BmpPdfRestPayload, DeletePayload, + ExtractImagesPayload, GifPdfRestPayload, JpegPdfRestPayload, PdfCompressPayload, @@ -2204,6 +2206,60 @@ def translate_pdf_text( raw_payload = self._send_request(request) return TranslatePdfTextResponse.model_validate(raw_payload) + def extract_images( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + pages: PdfPageSelection | None = None, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> ExtractImagesResponse: + """Extract embedded images from a PDF.""" + + payload: dict[str, Any] = {"files": file} + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + validated_payload = ExtractImagesPayload.model_validate(payload) + request = self.prepare_request( + "POST", + "/extracted-images", + json_body=validated_payload.model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ), + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + raw_payload = self._send_request(request) + raw_response = PdfRestRawFileResponse.model_validate(raw_payload) + output_ids = raw_response.ids or [] + output_files = [ + self.fetch_file_info( + str(file_id), + extra_query=extra_query, + extra_headers=extra_headers, + timeout=timeout, + ) + for file_id in output_ids + ] + input_id = raw_response.input_id[0] if raw_response.input_id else "" + return ExtractImagesResponse.model_validate( + { + "input_id": input_id, + "output_files": [ + file.model_dump(mode="json", by_alias=True) for file in output_files + ], + "warning": raw_response.warning, + } + ) + def preview_redactions( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -2820,6 +2876,67 @@ async def translate_pdf_text( raw_payload = await self._send_request(request) return TranslatePdfTextResponse.model_validate(raw_payload) + async def extract_images( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + pages: PdfPageSelection | None = None, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> ExtractImagesResponse: + """Extract embedded images from a PDF.""" + + payload: dict[str, Any] = {"files": file} + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + validated_payload = ExtractImagesPayload.model_validate(payload) + request = self.prepare_request( + "POST", + "/extracted-images", + json_body=validated_payload.model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ), + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + raw_payload = await self._send_request(request) + raw_response = PdfRestRawFileResponse.model_validate(raw_payload) + output_ids = raw_response.ids or [] + semaphore = asyncio.Semaphore(DEFAULT_FILE_INFO_CONCURRENCY) + + async def fetch(file_id: str) -> PdfRestFile: + async with semaphore: + return await self.fetch_file_info( + file_id, + extra_query=extra_query, + extra_headers=extra_headers, + timeout=timeout, + ) + + output_files: list[PdfRestFile] = [] + if output_ids: + output_files = list( + await asyncio.gather(*(fetch(fid) for fid in output_ids)) + ) + input_id = raw_response.input_id[0] if raw_response.input_id else "" + return ExtractImagesResponse.model_validate( + { + "input_id": input_id, + "output_files": [ + file.model_dump(mode="json", by_alias=True) for file in output_files + ], + "warning": raw_response.warning, + } + ) + async def preview_redactions( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/__init__.py b/src/pdfrest/models/__init__.py index 92907075..6d4f8ad8 100644 --- a/src/pdfrest/models/__init__.py +++ b/src/pdfrest/models/__init__.py @@ -1,5 +1,6 @@ from .public import ( PdfRestDeletionResponse, + ExtractImagesResponse, PdfRestErrorResponse, PdfRestFile, PdfRestFileBasedResponse, @@ -12,6 +13,7 @@ __all__ = [ "PdfRestDeletionResponse", + "ExtractImagesResponse", "PdfRestErrorResponse", "PdfRestFile", "PdfRestFileBasedResponse", diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 8c79046a..70aecba1 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -357,6 +357,38 @@ class TranslatePdfTextPayload(BaseModel): ] = None +class ExtractImagesPayload(BaseModel): + """Adapt caller options into a pdfRest-ready extract images request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + pages: Annotated[ + list[AscendingPageRange] | None, + Field(serialization_alias="pages", min_length=1, default=None), + BeforeValidator(_ensure_list), + BeforeValidator(_split_comma_list), + BeforeValidator(_int_to_string), + PlainSerializer(_serialize_page_ranges), + ] = None + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + RgbChannel = Annotated[int, Field(ge=0, le=255)] diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index 01b168bc..82c362db 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -21,6 +21,7 @@ __all__ = ( "PdfRestDeletionResponse", + "ExtractImagesResponse", "PdfRestErrorResponse", "PdfRestFile", "PdfRestFileBasedResponse", @@ -390,6 +391,31 @@ class TranslatePdfTextResponse(BaseModel): ] = None +class ExtractImagesResponse(BaseModel): + """Response returned by the extracted-images tool.""" + + model_config = ConfigDict(extra="allow") + + input_id: Annotated[ + PdfRestFileID, + Field( + validation_alias=AliasChoices("input_id", "inputId"), + description="The id of the input file.", + ), + ] + output_files: Annotated[ + list[PdfRestFile], + Field( + description="The list of extracted image files.", + validation_alias=AliasChoices("output_files", "outputFiles"), + ), + ] + warning: Annotated[ + str | None, + Field(description="A warning that was generated during extraction."), + ] = None + + class PdfRestInfoResponse(BaseModel): """A response containing the output from the /info route.""" diff --git a/tests/live/test_live_extract_images.py b/tests/live/test_live_extract_images.py new file mode 100644 index 00000000..b89df400 --- /dev/null +++ b/tests/live/test_live_extract_images.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import pytest + +from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest.models import ExtractImagesResponse + +from ..resources import get_test_resource_path + + +def test_live_extract_images_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = client.files.create_from_paths([resource])[0] + response = client.extract_images(uploaded) + + assert isinstance(response, ExtractImagesResponse) + assert response.output_files + assert response.input_id == uploaded.id + + +def test_live_extract_images_invalid_pages( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = client.files.create_from_paths([resource])[0] + with pytest.raises(PdfRestApiError): + client.extract_images( + uploaded, + extra_body={"pages": "last-1"}, + ) diff --git a/tests/test_extract_images.py b/tests/test_extract_images.py new file mode 100644 index 00000000..2ef96842 --- /dev/null +++ b/tests/test_extract_images.py @@ -0,0 +1,211 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import ExtractImagesResponse, PdfRestFile, PdfRestFileID +from pdfrest.models._internal import ExtractImagesPayload + +from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, make_pdf_file + + +def _make_png_file(file_id: str, name: str) -> PdfRestFile: + return PdfRestFile.model_validate( + { + "id": file_id, + "name": name, + "url": f"https://api.pdfrest.com/resource/{file_id}", + "type": "image/png", + "size": 10, + "modified": "2024-01-01T00:00:00Z", + "scheduledDeletionTimeUtc": None, + } + ) + + +def test_extract_images_payload_rejects_non_pdf() -> None: + file_id = str(PdfRestFileID.generate()) + text_file = PdfRestFile.model_validate( + { + "id": file_id, + "name": "notes.txt", + "url": f"https://api.pdfrest.com/resource/{file_id}", + "type": "text/plain", + "size": 64, + "modified": "2024-01-01T00:00:00Z", + "scheduledDeletionTimeUtc": None, + } + ) + with pytest.raises(ValidationError, match="Must be a PDF file"): + ExtractImagesPayload.model_validate({"files": [text_file]}) + + +def test_extract_images_payload_invalid_page_range() -> None: + file_repr = make_pdf_file(PdfRestFileID.generate(1)) + with pytest.raises( + ValidationError, match="The start page must be less than or equal to the end" + ): + ExtractImagesPayload.model_validate({"files": [file_repr], "pages": ["5-2"]}) + + +def test_extract_images_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id_1 = str(PdfRestFileID.generate()) + output_id_2 = str(PdfRestFileID.generate()) + + payload_dump = ExtractImagesPayload.model_validate( + {"files": [input_file], "pages": ["1-3"], "output": "images"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/extracted-images": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [str(input_file.id)], + "outputId": [output_id_1, output_id_2], + }, + ) + if request.method == "GET" and request.url.path in { + f"/resource/{output_id_1}", + f"/resource/{output_id_2}", + }: + seen["get"] += 1 + return httpx.Response( + 200, + json=_make_png_file( + output_id_1 + if request.url.path.endswith(output_id_1) + else output_id_2, + "image.png", + ).model_dump(mode="json", by_alias=True), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.extract_images(input_file, pages=["1-3"], output="images") + + assert seen == {"post": 1, "get": 2} + assert isinstance(response, ExtractImagesResponse) + assert len(response.output_files) == 2 + assert response.input_id == input_file.id + + +def test_extract_images_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + payload_dump = ExtractImagesPayload.model_validate( + {"files": [input_file], "pages": ["1-last"]} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/extracted-images": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump | {"debug": True} + return httpx.Response( + 200, + json={ + "inputId": str(input_file.id), + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=_make_png_file(output_id, "debug.png").model_dump( + mode="json", by_alias=True + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.extract_images( + input_file, + pages=["1-last"], + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": True}, + timeout=0.3, + ) + + assert isinstance(response, ExtractImagesResponse) + assert len(response.output_files) == 1 + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.3) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.3) + + +@pytest.mark.asyncio +async def test_async_extract_images_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = ExtractImagesPayload.model_validate( + {"files": [input_file]} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/extracted-images": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [str(input_file.id)], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + return httpx.Response( + 200, + json=_make_png_file(output_id, "async.png").model_dump( + mode="json", by_alias=True + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.extract_images(input_file) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, ExtractImagesResponse) + assert len(response.output_files) == 1 + assert response.input_id == input_file.id From 760c98fd0d0b974f1013abcbb7ec0372bfbd02ec Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Thu, 18 Dec 2025 15:55:36 -0600 Subject: [PATCH 05/61] Add Extract Text Assisted-by: Codex --- src/pdfrest/client.py | 70 +++++++++++ src/pdfrest/models/__init__.py | 2 + src/pdfrest/models/_internal.py | 32 +++++ src/pdfrest/models/public.py | 44 +++++++ tests/live/test_live_extract_text.py | 42 +++++++ tests/test_extract_text.py | 168 +++++++++++++++++++++++++++ 6 files changed, 358 insertions(+) create mode 100644 tests/live/test_live_extract_text.py create mode 100644 tests/test_extract_text.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 6efbfe8d..6918bef4 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -62,6 +62,7 @@ from .models import ( PdfRestDeletionResponse, ExtractImagesResponse, + ExtractTextResponse, PdfRestErrorResponse, PdfRestFile, PdfRestFileBasedResponse, @@ -79,6 +80,7 @@ BmpPdfRestPayload, DeletePayload, ExtractImagesPayload, + ExtractTextPayload, GifPdfRestPayload, JpegPdfRestPayload, PdfCompressPayload, @@ -2260,6 +2262,40 @@ def extract_images( } ) + def extract_text( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + pages: PdfPageSelection | None = None, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> ExtractTextResponse: + """Extract text content from a PDF.""" + + payload: dict[str, Any] = {"files": file} + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + validated_payload = ExtractTextPayload.model_validate(payload) + request = self.prepare_request( + "POST", + "/extracted-text", + json_body=validated_payload.model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ), + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + raw_payload = self._send_request(request) + return ExtractTextResponse.model_validate(raw_payload) + def preview_redactions( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -2937,6 +2973,40 @@ async def fetch(file_id: str) -> PdfRestFile: } ) + async def extract_text( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + pages: PdfPageSelection | None = None, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> ExtractTextResponse: + """Extract text content from a PDF.""" + + payload: dict[str, Any] = {"files": file} + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + validated_payload = ExtractTextPayload.model_validate(payload) + request = self.prepare_request( + "POST", + "/extracted-text", + json_body=validated_payload.model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ), + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + raw_payload = await self._send_request(request) + return ExtractTextResponse.model_validate(raw_payload) + async def preview_redactions( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/__init__.py b/src/pdfrest/models/__init__.py index 6d4f8ad8..6cb78e06 100644 --- a/src/pdfrest/models/__init__.py +++ b/src/pdfrest/models/__init__.py @@ -1,6 +1,7 @@ from .public import ( PdfRestDeletionResponse, ExtractImagesResponse, + ExtractTextResponse, PdfRestErrorResponse, PdfRestFile, PdfRestFileBasedResponse, @@ -14,6 +15,7 @@ __all__ = [ "PdfRestDeletionResponse", "ExtractImagesResponse", + "ExtractTextResponse", "PdfRestErrorResponse", "PdfRestFile", "PdfRestFileBasedResponse", diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 70aecba1..a5060f42 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -306,6 +306,38 @@ class SummarizePdfTextPayload(BaseModel): ] = None +class ExtractTextPayload(BaseModel): + """Adapt caller options into a pdfRest-ready extract text request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + pages: Annotated[ + list[AscendingPageRange] | None, + Field(serialization_alias="pages", min_length=1, default=None), + BeforeValidator(_ensure_list), + BeforeValidator(_split_comma_list), + BeforeValidator(_int_to_string), + PlainSerializer(_serialize_page_ranges), + ] = None + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + class TranslatePdfTextPayload(BaseModel): """Adapt caller options into a pdfRest-ready translate request payload.""" diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index 82c362db..ecd97892 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -22,6 +22,7 @@ __all__ = ( "PdfRestDeletionResponse", "ExtractImagesResponse", + "ExtractTextResponse", "PdfRestErrorResponse", "PdfRestFile", "PdfRestFileBasedResponse", @@ -416,6 +417,49 @@ class ExtractImagesResponse(BaseModel): ] = None +class ExtractTextResponse(BaseModel): + """Response returned by the extracted-text tool.""" + + model_config = ConfigDict(extra="allow") + + text: Annotated[ + str | None, + Field( + description="Inline extracted text when output_type is json.", + default=None, + ), + ] = None + input_id: Annotated[ + PdfRestFileID, + Field( + validation_alias=AliasChoices("input_id", "inputId"), + description="The id of the input file.", + ), + ] + output_url: Annotated[ + HttpUrl | None, + Field( + alias="outputUrl", + validation_alias=AliasChoices("output_url", "outputUrl"), + description="Download URL for file output.", + default=None, + ), + ] = None + output_id: Annotated[ + PdfRestFileID | None, + Field( + alias="outputId", + validation_alias=AliasChoices("output_id", "outputId"), + description="The id of the generated output when output_type is file.", + default=None, + ), + ] = None + warning: Annotated[ + str | None, + Field(description="A warning that was generated during text extraction."), + ] = None + + class PdfRestInfoResponse(BaseModel): """A response containing the output from the /info route.""" diff --git a/tests/live/test_live_extract_text.py b/tests/live/test_live_extract_text.py new file mode 100644 index 00000000..18d98f71 --- /dev/null +++ b/tests/live/test_live_extract_text.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import pytest + +from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest.models import ExtractTextResponse + +from ..resources import get_test_resource_path + + +def test_live_extract_text_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = client.files.create_from_paths([resource])[0] + response = client.extract_text(uploaded, output=None) + + assert isinstance(response, ExtractTextResponse) + assert response.text + assert response.input_id == uploaded.id + + +def test_live_extract_text_invalid_pages( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = client.files.create_from_paths([resource])[0] + with pytest.raises(PdfRestApiError): + client.extract_text( + uploaded, + extra_body={"pages": "last-1"}, + ) diff --git a/tests/test_extract_text.py b/tests/test_extract_text.py new file mode 100644 index 00000000..048a636a --- /dev/null +++ b/tests/test_extract_text.py @@ -0,0 +1,168 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import ExtractTextResponse, PdfRestFile, PdfRestFileID +from pdfrest.models._internal import ExtractTextPayload + +from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, make_pdf_file + + +def test_extract_text_payload_rejects_non_pdf() -> None: + file_id = str(PdfRestFileID.generate()) + text_file = PdfRestFile.model_validate( + { + "id": file_id, + "name": "notes.txt", + "url": f"https://api.pdfrest.com/resource/{file_id}", + "type": "text/plain", + "size": 64, + "modified": "2024-01-01T00:00:00Z", + "scheduledDeletionTimeUtc": None, + } + ) + with pytest.raises(ValidationError, match="Must be a PDF file"): + ExtractTextPayload.model_validate({"files": [text_file]}) + + +def test_extract_text_payload_invalid_page_range() -> None: + file_repr = make_pdf_file(PdfRestFileID.generate(1)) + with pytest.raises( + ValidationError, match="The start page must be less than or equal to the end" + ): + ExtractTextPayload.model_validate({"files": [file_repr], "pages": ["5-2"]}) + + +def test_extract_text_json_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + payload_dump = ExtractTextPayload.model_validate( + {"files": [input_file], "pages": ["1-3"], "output": "text"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/extracted-text": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "text": "Example extracted text", + "inputId": str(input_file.id), + }, + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.extract_text( + input_file, + pages=["1-3"], + output="text", + ) + + assert seen == {"post": 1} + assert isinstance(response, ExtractTextResponse) + assert response.text == "Example extracted text" + assert response.input_id == input_file.id + assert response.output_id is None + assert response.output_url is None + + +def test_extract_text_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + payload_dump = ExtractTextPayload.model_validate( + {"files": [input_file], "output": "file-output"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/extracted-text": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump | {"debug": True} + return httpx.Response( + 200, + json={ + "outputUrl": f"https://api.pdfrest.com/resource/{output_id}?format=file", + "outputId": output_id, + "inputId": str(input_file.id), + }, + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.extract_text( + input_file, + output="file-output", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": True}, + timeout=0.35, + ) + + assert isinstance(response, ExtractTextResponse) + assert response.output_id == output_id + assert response.output_url + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.35) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.35) + + +@pytest.mark.asyncio +async def test_async_extract_text_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + payload_dump = ExtractTextPayload.model_validate( + {"files": [input_file]} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/extracted-text": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "text": "Async text", + "inputId": str(input_file.id), + }, + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.extract_text(input_file) + + assert seen == {"post": 1} + assert isinstance(response, ExtractTextResponse) + assert response.text == "Async text" + assert response.input_id == input_file.id From 1ccf51fe74ffeb44fd069d1422014ac9ab5867aa Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Thu, 18 Dec 2025 15:59:20 -0600 Subject: [PATCH 06/61] Add Convert to Markdown Assisted-by: Codex --- src/pdfrest/client.py | 82 +++++++++ src/pdfrest/models/__init__.py | 2 + src/pdfrest/models/_internal.py | 39 +++++ src/pdfrest/models/public.py | 44 +++++ tests/live/test_live_convert_to_markdown.py | 46 +++++ tests/test_convert_to_markdown.py | 182 ++++++++++++++++++++ 6 files changed, 395 insertions(+) create mode 100644 tests/live/test_live_convert_to_markdown.py create mode 100644 tests/test_convert_to_markdown.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 6918bef4..04bf7aa3 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -61,6 +61,7 @@ ) from .models import ( PdfRestDeletionResponse, + ConvertToMarkdownResponse, ExtractImagesResponse, ExtractTextResponse, PdfRestErrorResponse, @@ -79,6 +80,7 @@ BasePdfRestGraphicPayload, BmpPdfRestPayload, DeletePayload, + ConvertToMarkdownPayload, ExtractImagesPayload, ExtractTextPayload, GifPdfRestPayload, @@ -2163,6 +2165,46 @@ def summarize_pdf_text( raw_payload = self._send_request(request) return SummarizePdfTextResponse.model_validate(raw_payload) + def convert_to_markdown( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + pages: PdfPageSelection | None = None, + output_type: SummaryOutputType = "json", + output_format: SummaryOutputFormat = "markdown", + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> ConvertToMarkdownResponse: + """Convert a PDF to Markdown.""" + + payload: dict[str, Any] = { + "files": file, + "output_type": output_type, + "output_format": output_format, + } + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + validated_payload = ConvertToMarkdownPayload.model_validate(payload) + request = self.prepare_request( + "POST", + "/markdown", + json_body=validated_payload.model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ), + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + raw_payload = self._send_request(request) + return ConvertToMarkdownResponse.model_validate(raw_payload) + def translate_pdf_text( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -2867,6 +2909,46 @@ async def summarize_pdf_text( raw_payload = await self._send_request(request) return SummarizePdfTextResponse.model_validate(raw_payload) + async def convert_to_markdown( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + pages: PdfPageSelection | None = None, + output_type: SummaryOutputType = "json", + output_format: SummaryOutputFormat = "markdown", + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> ConvertToMarkdownResponse: + """Convert a PDF to Markdown.""" + + payload: dict[str, Any] = { + "files": file, + "output_type": output_type, + "output_format": output_format, + } + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + validated_payload = ConvertToMarkdownPayload.model_validate(payload) + request = self.prepare_request( + "POST", + "/markdown", + json_body=validated_payload.model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ), + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + raw_payload = await self._send_request(request) + return ConvertToMarkdownResponse.model_validate(raw_payload) + async def translate_pdf_text( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/__init__.py b/src/pdfrest/models/__init__.py index 6cb78e06..ce017acf 100644 --- a/src/pdfrest/models/__init__.py +++ b/src/pdfrest/models/__init__.py @@ -1,5 +1,6 @@ from .public import ( PdfRestDeletionResponse, + ConvertToMarkdownResponse, ExtractImagesResponse, ExtractTextResponse, PdfRestErrorResponse, @@ -14,6 +15,7 @@ __all__ = [ "PdfRestDeletionResponse", + "ConvertToMarkdownResponse", "ExtractImagesResponse", "ExtractTextResponse", "PdfRestErrorResponse", diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index a5060f42..1066dc37 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -338,6 +338,45 @@ class ExtractTextPayload(BaseModel): ] = None +class ConvertToMarkdownPayload(BaseModel): + """Adapt caller options into a pdfRest-ready markdown conversion payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + pages: Annotated[ + list[AscendingPageRange] | None, + Field(serialization_alias="pages", min_length=1, default=None), + BeforeValidator(_ensure_list), + BeforeValidator(_split_comma_list), + BeforeValidator(_int_to_string), + PlainSerializer(_serialize_page_ranges), + ] = None + output_type: Annotated[ + SummaryOutputType, Field(serialization_alias="output_type", default="json") + ] = "json" + output_format: Annotated[ + SummaryOutputFormat, + Field(serialization_alias="output_format", default="markdown"), + ] = "markdown" + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + class TranslatePdfTextPayload(BaseModel): """Adapt caller options into a pdfRest-ready translate request payload.""" diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index ecd97892..46d343b1 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -21,6 +21,7 @@ __all__ = ( "PdfRestDeletionResponse", + "ConvertToMarkdownResponse", "ExtractImagesResponse", "ExtractTextResponse", "PdfRestErrorResponse", @@ -460,6 +461,49 @@ class ExtractTextResponse(BaseModel): ] = None +class ConvertToMarkdownResponse(BaseModel): + """Response returned by the markdown conversion tool.""" + + model_config = ConfigDict(extra="allow") + + markdown: Annotated[ + str | None, + Field( + description="Inline markdown content when output_type is json.", + default=None, + ), + ] = None + input_id: Annotated[ + PdfRestFileID, + Field( + validation_alias=AliasChoices("input_id", "inputId"), + description="The id of the input file.", + ), + ] + output_url: Annotated[ + HttpUrl | None, + Field( + alias="outputUrl", + validation_alias=AliasChoices("output_url", "outputUrl"), + description="Download URL for file output.", + default=None, + ), + ] = None + output_id: Annotated[ + PdfRestFileID | None, + Field( + alias="outputId", + validation_alias=AliasChoices("output_id", "outputId"), + description="The id of the generated output when output_type is file.", + default=None, + ), + ] = None + warning: Annotated[ + str | None, + Field(description="A warning that was generated during markdown conversion."), + ] = None + + class PdfRestInfoResponse(BaseModel): """A response containing the output from the /info route.""" diff --git a/tests/live/test_live_convert_to_markdown.py b/tests/live/test_live_convert_to_markdown.py new file mode 100644 index 00000000..be0c1aef --- /dev/null +++ b/tests/live/test_live_convert_to_markdown.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +import pytest + +from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest.models import ConvertToMarkdownResponse + +from ..resources import get_test_resource_path + + +def test_live_convert_to_markdown_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = client.files.create_from_paths([resource])[0] + response = client.convert_to_markdown( + uploaded, + output_type="json", + output_format="markdown", + ) + + assert isinstance(response, ConvertToMarkdownResponse) + assert response.markdown + assert response.input_id == uploaded.id + + +def test_live_convert_to_markdown_invalid_pages( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = client.files.create_from_paths([resource])[0] + with pytest.raises(PdfRestApiError): + client.convert_to_markdown( + uploaded, + extra_body={"pages": "last-1"}, + ) diff --git a/tests/test_convert_to_markdown.py b/tests/test_convert_to_markdown.py new file mode 100644 index 00000000..fd7c3958 --- /dev/null +++ b/tests/test_convert_to_markdown.py @@ -0,0 +1,182 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import ConvertToMarkdownResponse, PdfRestFile, PdfRestFileID +from pdfrest.models._internal import ConvertToMarkdownPayload + +from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, make_pdf_file + + +def test_convert_to_markdown_payload_rejects_non_pdf() -> None: + file_id = str(PdfRestFileID.generate()) + text_file = PdfRestFile.model_validate( + { + "id": file_id, + "name": "notes.txt", + "url": f"https://api.pdfrest.com/resource/{file_id}", + "type": "text/plain", + "size": 64, + "modified": "2024-01-01T00:00:00Z", + "scheduledDeletionTimeUtc": None, + } + ) + with pytest.raises(ValidationError, match="Must be a PDF file"): + ConvertToMarkdownPayload.model_validate({"files": [text_file]}) + + +def test_convert_to_markdown_payload_invalid_page_range() -> None: + file_repr = make_pdf_file(PdfRestFileID.generate(1)) + with pytest.raises( + ValidationError, match="The start page must be less than or equal to the end" + ): + ConvertToMarkdownPayload.model_validate( + {"files": [file_repr], "pages": ["5-2"]} + ) + + +def test_convert_to_markdown_json_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + payload_dump = ConvertToMarkdownPayload.model_validate( + { + "files": [input_file], + "pages": ["1-3"], + "output": "md", + "output_type": "json", + "output_format": "markdown", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/markdown": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + for key, value in payload_dump.items(): + assert payload[key] == value + return httpx.Response( + 200, + json={ + "markdown": "# Title", + "inputId": str(input_file.id), + }, + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_to_markdown( + input_file, + pages=["1-3"], + output="md", + output_type="json", + output_format="markdown", + ) + + assert seen == {"post": 1} + assert isinstance(response, ConvertToMarkdownResponse) + assert response.markdown == "# Title" + assert response.input_id == input_file.id + assert response.output_id is None + assert response.output_url is None + + +def test_convert_to_markdown_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + payload_dump = ConvertToMarkdownPayload.model_validate( + {"files": [input_file], "output_type": "file", "output_format": "markdown"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/markdown": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + for key, value in payload_dump.items(): + assert payload[key] == value + assert payload["debug"] is True + return httpx.Response( + 200, + json={ + "outputUrl": f"https://api.pdfrest.com/resource/{output_id}?format=file", + "outputId": output_id, + "inputId": str(input_file.id), + }, + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_to_markdown( + input_file, + output_type="file", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": True}, + timeout=0.4, + ) + + assert isinstance(response, ConvertToMarkdownResponse) + assert response.output_id == output_id + assert response.output_url + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.4) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.4) + + +@pytest.mark.asyncio +async def test_async_convert_to_markdown_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + payload_dump = ConvertToMarkdownPayload.model_validate( + {"files": [input_file], "output_type": "json"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/markdown": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + for key, value in payload_dump.items(): + assert payload[key] == value + return httpx.Response( + 200, + json={ + "markdown": "Async md", + "inputId": str(input_file.id), + }, + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_to_markdown(input_file, output_type="json") + + assert seen == {"post": 1} + assert isinstance(response, ConvertToMarkdownResponse) + assert response.markdown == "Async md" + assert response.input_id == input_file.id From 2ac7e0002b924765b68801dab3f0b48843d18a33 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Thu, 18 Dec 2025 16:11:00 -0600 Subject: [PATCH 07/61] Add OCR PDF Assisted-by: Codex --- src/pdfrest/client.py | 94 ++++++++++++++++ src/pdfrest/models/__init__.py | 2 + src/pdfrest/models/_internal.py | 32 ++++++ src/pdfrest/models/public.py | 37 ++++++ tests/live/test_live_ocr_pdf.py | 42 +++++++ tests/test_ocr_pdf.py | 192 ++++++++++++++++++++++++++++++++ 6 files changed, 399 insertions(+) create mode 100644 tests/live/test_live_ocr_pdf.py create mode 100644 tests/test_ocr_pdf.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 04bf7aa3..81af585d 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -64,6 +64,7 @@ ConvertToMarkdownResponse, ExtractImagesResponse, ExtractTextResponse, + OcrPdfResponse, PdfRestErrorResponse, PdfRestFile, PdfRestFileBasedResponse, @@ -86,6 +87,7 @@ GifPdfRestPayload, JpegPdfRestPayload, PdfCompressPayload, + OcrPdfPayload, PdfFlattenFormsPayload, PdfLinearizePayload, PdfInfoPayload, @@ -2205,6 +2207,52 @@ def convert_to_markdown( raw_payload = self._send_request(request) return ConvertToMarkdownResponse.model_validate(raw_payload) + def ocr_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + pages: PdfPageSelection | None = None, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> OcrPdfResponse: + """Perform OCR on a PDF to extract searchable text.""" + + payload: dict[str, Any] = {"files": file} + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + validated_payload = OcrPdfPayload.model_validate(payload) + request = self.prepare_request( + "POST", + "/pdf-with-ocr-text", + json_body=validated_payload.model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ), + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + raw_payload = self._send_request(request) + raw_response = PdfRestRawFileResponse.model_validate(raw_payload) + output_ids = raw_response.ids or [] + input_id = raw_response.input_id[0] if raw_response.input_id else "" + return OcrPdfResponse.model_validate( + { + "input_id": input_id, + "output_id": output_ids[0] if output_ids else None, + "output_url": raw_response.output_urls[0] + if raw_response.output_urls + else None, + "warning": raw_response.warning, + } + ) + def translate_pdf_text( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -2949,6 +2997,52 @@ async def convert_to_markdown( raw_payload = await self._send_request(request) return ConvertToMarkdownResponse.model_validate(raw_payload) + async def ocr_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + pages: PdfPageSelection | None = None, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> OcrPdfResponse: + """Perform OCR on a PDF to extract searchable text.""" + + payload: dict[str, Any] = {"files": file} + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + validated_payload = OcrPdfPayload.model_validate(payload) + request = self.prepare_request( + "POST", + "/pdf-with-ocr-text", + json_body=validated_payload.model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ), + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + raw_payload = await self._send_request(request) + raw_response = PdfRestRawFileResponse.model_validate(raw_payload) + output_ids = raw_response.ids or [] + input_id = raw_response.input_id[0] if raw_response.input_id else "" + return OcrPdfResponse.model_validate( + { + "input_id": input_id, + "output_id": output_ids[0] if output_ids else None, + "output_url": raw_response.output_urls[0] + if raw_response.output_urls + else None, + "warning": raw_response.warning, + } + ) + async def translate_pdf_text( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/__init__.py b/src/pdfrest/models/__init__.py index ce017acf..a3a9fdb2 100644 --- a/src/pdfrest/models/__init__.py +++ b/src/pdfrest/models/__init__.py @@ -3,6 +3,7 @@ ConvertToMarkdownResponse, ExtractImagesResponse, ExtractTextResponse, + OcrPdfResponse, PdfRestErrorResponse, PdfRestFile, PdfRestFileBasedResponse, @@ -18,6 +19,7 @@ "ConvertToMarkdownResponse", "ExtractImagesResponse", "ExtractTextResponse", + "OcrPdfResponse", "PdfRestErrorResponse", "PdfRestFile", "PdfRestFileBasedResponse", diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 1066dc37..47ebde0d 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -306,6 +306,38 @@ class SummarizePdfTextPayload(BaseModel): ] = None +class OcrPdfPayload(BaseModel): + """Adapt caller options into a pdfRest-ready OCR request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + pages: Annotated[ + list[AscendingPageRange] | None, + Field(serialization_alias="pages", min_length=1, default=None), + BeforeValidator(_ensure_list), + BeforeValidator(_split_comma_list), + BeforeValidator(_int_to_string), + PlainSerializer(_serialize_page_ranges), + ] = None + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + class ExtractTextPayload(BaseModel): """Adapt caller options into a pdfRest-ready extract text request payload.""" diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index 46d343b1..15daf64e 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -24,6 +24,7 @@ "ConvertToMarkdownResponse", "ExtractImagesResponse", "ExtractTextResponse", + "OcrPdfResponse", "PdfRestErrorResponse", "PdfRestFile", "PdfRestFileBasedResponse", @@ -504,6 +505,42 @@ class ConvertToMarkdownResponse(BaseModel): ] = None +class OcrPdfResponse(BaseModel): + """Response returned by the pdf-with-ocr-text tool.""" + + model_config = ConfigDict(extra="allow") + + input_id: Annotated[ + PdfRestFileID, + Field( + validation_alias=AliasChoices("input_id", "inputId"), + description="The id of the input file.", + ), + ] + output_url: Annotated[ + HttpUrl | None, + Field( + alias="outputUrl", + validation_alias=AliasChoices("output_url", "outputUrl"), + description="Download URL for file output.", + default=None, + ), + ] = None + output_id: Annotated[ + PdfRestFileID | None, + Field( + alias="outputId", + validation_alias=AliasChoices("output_id", "outputId"), + description="The id of the generated output file.", + default=None, + ), + ] = None + warning: Annotated[ + str | None, + Field(description="A warning that was generated during OCR."), + ] = None + + class PdfRestInfoResponse(BaseModel): """A response containing the output from the /info route.""" diff --git a/tests/live/test_live_ocr_pdf.py b/tests/live/test_live_ocr_pdf.py new file mode 100644 index 00000000..43eeb2be --- /dev/null +++ b/tests/live/test_live_ocr_pdf.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import pytest + +from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest.models import OcrPdfResponse + +from ..resources import get_test_resource_path + + +def test_live_ocr_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = client.files.create_from_paths([resource])[0] + response = client.ocr_pdf(uploaded) + + assert isinstance(response, OcrPdfResponse) + assert response.output_id + assert response.input_id == uploaded.id + + +def test_live_ocr_pdf_invalid_pages( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = client.files.create_from_paths([resource])[0] + with pytest.raises(PdfRestApiError): + client.ocr_pdf( + uploaded, + extra_body={"pages": "last-1"}, + ) diff --git a/tests/test_ocr_pdf.py b/tests/test_ocr_pdf.py new file mode 100644 index 00000000..56b45bf0 --- /dev/null +++ b/tests/test_ocr_pdf.py @@ -0,0 +1,192 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import OcrPdfResponse, PdfRestFile, PdfRestFileID +from pdfrest.models._internal import OcrPdfPayload + +from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, make_pdf_file + + +def test_ocr_payload_rejects_non_pdf() -> None: + file_id = str(PdfRestFileID.generate()) + text_file = PdfRestFile.model_validate( + { + "id": file_id, + "name": "notes.txt", + "url": f"https://api.pdfrest.com/resource/{file_id}", + "type": "text/plain", + "size": 64, + "modified": "2024-01-01T00:00:00Z", + "scheduledDeletionTimeUtc": None, + } + ) + with pytest.raises(ValidationError, match="Must be a PDF file"): + OcrPdfPayload.model_validate({"files": [text_file]}) + + +def test_ocr_payload_invalid_page_range() -> None: + file_repr = make_pdf_file(PdfRestFileID.generate(1)) + with pytest.raises( + ValidationError, match="The start page must be less than or equal to the end" + ): + OcrPdfPayload.model_validate({"files": [file_repr], "pages": ["5-2"]}) + + +def test_ocr_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + payload_dump = OcrPdfPayload.model_validate( + {"files": [input_file], "pages": ["1-3"], "output": "ocr"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + output_id = str(PdfRestFileID.generate()) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf-with-ocr-text": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": str(input_file.id), + "outputId": output_id, + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + return httpx.Response( + 200, + json=make_pdf_file(output_id, "ocr.pdf").model_dump( + mode="json", by_alias=True + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.ocr_pdf( + input_file, + pages=["1-3"], + output="ocr", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, OcrPdfResponse) + assert response.output_id == output_id + assert response.output_url is None # not provided in mocked response + assert response.input_id == input_file.id + + +def test_ocr_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + payload_dump = OcrPdfPayload.model_validate({"files": [input_file]}).model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf-with-ocr-text": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump | {"debug": True} + return httpx.Response( + 200, + json={ + "outputId": output_id, + "inputId": str(input_file.id), + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=make_pdf_file(output_id, "custom-ocr.pdf").model_dump( + mode="json", by_alias=True + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.ocr_pdf( + input_file, + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": True}, + timeout=0.4, + ) + + assert isinstance(response, OcrPdfResponse) + assert response.output_id == output_id + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.4) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.4) + + +@pytest.mark.asyncio +async def test_async_ocr_pdf_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + payload_dump = OcrPdfPayload.model_validate({"files": [input_file]}).model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ) + output_id = str(PdfRestFileID.generate()) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf-with-ocr-text": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "outputId": output_id, + "inputId": str(input_file.id), + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + return httpx.Response( + 200, + json=make_pdf_file(output_id, "async-ocr.pdf").model_dump( + mode="json", by_alias=True + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.ocr_pdf(input_file) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, OcrPdfResponse) + assert response.output_id == output_id + assert response.input_id == input_file.id From f78d6f9ef17075e11b65b415913ef2d78b2a72b5 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 11:13:42 -0600 Subject: [PATCH 08/61] Refactor OCR PDF to utilize PdfRestFileBasedResponse Assisted-by: Codex --- src/pdfrest/client.py | 55 ++++++--------------------------- src/pdfrest/models/__init__.py | 2 -- src/pdfrest/models/public.py | 37 ---------------------- tests/live/test_live_ocr_pdf.py | 7 +++-- tests/test_ocr_pdf.py | 16 +++++----- 5 files changed, 22 insertions(+), 95 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 81af585d..8e554990 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -64,7 +64,6 @@ ConvertToMarkdownResponse, ExtractImagesResponse, ExtractTextResponse, - OcrPdfResponse, PdfRestErrorResponse, PdfRestFile, PdfRestFileBasedResponse, @@ -2217,7 +2216,7 @@ def ocr_pdf( extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, - ) -> OcrPdfResponse: + ) -> PdfRestFileBasedResponse: """Perform OCR on a PDF to extract searchable text.""" payload: dict[str, Any] = {"files": file} @@ -2226,32 +2225,15 @@ def ocr_pdf( if output is not None: payload["output"] = output - validated_payload = OcrPdfPayload.model_validate(payload) - request = self.prepare_request( - "POST", - "/pdf-with-ocr-text", - json_body=validated_payload.model_dump( - mode="json", by_alias=True, exclude_none=True, exclude_unset=True - ), + return self._post_file_operation( + endpoint="/pdf-with-ocr-text", + payload=payload, + payload_model=OcrPdfPayload, extra_query=extra_query, extra_headers=extra_headers, extra_body=extra_body, timeout=timeout, ) - raw_payload = self._send_request(request) - raw_response = PdfRestRawFileResponse.model_validate(raw_payload) - output_ids = raw_response.ids or [] - input_id = raw_response.input_id[0] if raw_response.input_id else "" - return OcrPdfResponse.model_validate( - { - "input_id": input_id, - "output_id": output_ids[0] if output_ids else None, - "output_url": raw_response.output_urls[0] - if raw_response.output_urls - else None, - "warning": raw_response.warning, - } - ) def translate_pdf_text( self, @@ -3007,7 +2989,7 @@ async def ocr_pdf( extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, - ) -> OcrPdfResponse: + ) -> PdfRestFileBasedResponse: """Perform OCR on a PDF to extract searchable text.""" payload: dict[str, Any] = {"files": file} @@ -3016,32 +2998,15 @@ async def ocr_pdf( if output is not None: payload["output"] = output - validated_payload = OcrPdfPayload.model_validate(payload) - request = self.prepare_request( - "POST", - "/pdf-with-ocr-text", - json_body=validated_payload.model_dump( - mode="json", by_alias=True, exclude_none=True, exclude_unset=True - ), + return await self._post_file_operation( + endpoint="/pdf-with-ocr-text", + payload=payload, + payload_model=OcrPdfPayload, extra_query=extra_query, extra_headers=extra_headers, extra_body=extra_body, timeout=timeout, ) - raw_payload = await self._send_request(request) - raw_response = PdfRestRawFileResponse.model_validate(raw_payload) - output_ids = raw_response.ids or [] - input_id = raw_response.input_id[0] if raw_response.input_id else "" - return OcrPdfResponse.model_validate( - { - "input_id": input_id, - "output_id": output_ids[0] if output_ids else None, - "output_url": raw_response.output_urls[0] - if raw_response.output_urls - else None, - "warning": raw_response.warning, - } - ) async def translate_pdf_text( self, diff --git a/src/pdfrest/models/__init__.py b/src/pdfrest/models/__init__.py index a3a9fdb2..ce017acf 100644 --- a/src/pdfrest/models/__init__.py +++ b/src/pdfrest/models/__init__.py @@ -3,7 +3,6 @@ ConvertToMarkdownResponse, ExtractImagesResponse, ExtractTextResponse, - OcrPdfResponse, PdfRestErrorResponse, PdfRestFile, PdfRestFileBasedResponse, @@ -19,7 +18,6 @@ "ConvertToMarkdownResponse", "ExtractImagesResponse", "ExtractTextResponse", - "OcrPdfResponse", "PdfRestErrorResponse", "PdfRestFile", "PdfRestFileBasedResponse", diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index 15daf64e..46d343b1 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -24,7 +24,6 @@ "ConvertToMarkdownResponse", "ExtractImagesResponse", "ExtractTextResponse", - "OcrPdfResponse", "PdfRestErrorResponse", "PdfRestFile", "PdfRestFileBasedResponse", @@ -505,42 +504,6 @@ class ConvertToMarkdownResponse(BaseModel): ] = None -class OcrPdfResponse(BaseModel): - """Response returned by the pdf-with-ocr-text tool.""" - - model_config = ConfigDict(extra="allow") - - input_id: Annotated[ - PdfRestFileID, - Field( - validation_alias=AliasChoices("input_id", "inputId"), - description="The id of the input file.", - ), - ] - output_url: Annotated[ - HttpUrl | None, - Field( - alias="outputUrl", - validation_alias=AliasChoices("output_url", "outputUrl"), - description="Download URL for file output.", - default=None, - ), - ] = None - output_id: Annotated[ - PdfRestFileID | None, - Field( - alias="outputId", - validation_alias=AliasChoices("output_id", "outputId"), - description="The id of the generated output file.", - default=None, - ), - ] = None - warning: Annotated[ - str | None, - Field(description="A warning that was generated during OCR."), - ] = None - - class PdfRestInfoResponse(BaseModel): """A response containing the output from the /info route.""" diff --git a/tests/live/test_live_ocr_pdf.py b/tests/live/test_live_ocr_pdf.py index 43eeb2be..065a7022 100644 --- a/tests/live/test_live_ocr_pdf.py +++ b/tests/live/test_live_ocr_pdf.py @@ -3,7 +3,7 @@ import pytest from pdfrest import PdfRestApiError, PdfRestClient -from pdfrest.models import OcrPdfResponse +from pdfrest.models import PdfRestFileBasedResponse from ..resources import get_test_resource_path @@ -20,8 +20,9 @@ def test_live_ocr_pdf_success( uploaded = client.files.create_from_paths([resource])[0] response = client.ocr_pdf(uploaded) - assert isinstance(response, OcrPdfResponse) - assert response.output_id + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_files + assert response.output_file.id assert response.input_id == uploaded.id diff --git a/tests/test_ocr_pdf.py b/tests/test_ocr_pdf.py index 56b45bf0..b5059e30 100644 --- a/tests/test_ocr_pdf.py +++ b/tests/test_ocr_pdf.py @@ -7,7 +7,7 @@ from pydantic import ValidationError from pdfrest import AsyncPdfRestClient, PdfRestClient -from pdfrest.models import OcrPdfResponse, PdfRestFile, PdfRestFileID +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID from pdfrest.models._internal import OcrPdfPayload from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, make_pdf_file @@ -80,9 +80,9 @@ def handler(request: httpx.Request) -> httpx.Response: ) assert seen == {"post": 1, "get": 1} - assert isinstance(response, OcrPdfResponse) - assert response.output_id == output_id - assert response.output_url is None # not provided in mocked response + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.id == output_id + assert response.output_file.name == "ocr.pdf" assert response.input_id == input_file.id @@ -134,8 +134,8 @@ def handler(request: httpx.Request) -> httpx.Response: timeout=0.4, ) - assert isinstance(response, OcrPdfResponse) - assert response.output_id == output_id + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.id == output_id timeout_value = captured_timeout["value"] assert timeout_value is not None if isinstance(timeout_value, dict): @@ -187,6 +187,6 @@ def handler(request: httpx.Request) -> httpx.Response: response = await client.ocr_pdf(input_file) assert seen == {"post": 1, "get": 1} - assert isinstance(response, OcrPdfResponse) - assert response.output_id == output_id + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.id == output_id assert response.input_id == input_file.id From 52728ac99c35918c1d3e6e980486c9c59cf7aca1 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 12:08:16 -0600 Subject: [PATCH 09/61] Remove and replace ExtractImagesResponse Assisted-by: Codex --- src/pdfrest/client.py | 78 ++++---------------------- src/pdfrest/models/__init__.py | 2 - src/pdfrest/models/public.py | 26 --------- tests/live/test_live_extract_images.py | 4 +- tests/test_extract_images.py | 8 +-- 5 files changed, 16 insertions(+), 102 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 8e554990..e00be917 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -62,7 +62,6 @@ from .models import ( PdfRestDeletionResponse, ConvertToMarkdownResponse, - ExtractImagesResponse, ExtractTextResponse, PdfRestErrorResponse, PdfRestFile, @@ -2290,7 +2289,7 @@ def extract_images( extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, - ) -> ExtractImagesResponse: + ) -> PdfRestFileBasedResponse: """Extract embedded images from a PDF.""" payload: dict[str, Any] = {"files": file} @@ -2299,40 +2298,15 @@ def extract_images( if output is not None: payload["output"] = output - validated_payload = ExtractImagesPayload.model_validate(payload) - request = self.prepare_request( - "POST", - "/extracted-images", - json_body=validated_payload.model_dump( - mode="json", by_alias=True, exclude_none=True, exclude_unset=True - ), + return self._post_file_operation( + endpoint="/extracted-images", + payload=payload, + payload_model=ExtractImagesPayload, extra_query=extra_query, extra_headers=extra_headers, extra_body=extra_body, timeout=timeout, ) - raw_payload = self._send_request(request) - raw_response = PdfRestRawFileResponse.model_validate(raw_payload) - output_ids = raw_response.ids or [] - output_files = [ - self.fetch_file_info( - str(file_id), - extra_query=extra_query, - extra_headers=extra_headers, - timeout=timeout, - ) - for file_id in output_ids - ] - input_id = raw_response.input_id[0] if raw_response.input_id else "" - return ExtractImagesResponse.model_validate( - { - "input_id": input_id, - "output_files": [ - file.model_dump(mode="json", by_alias=True) for file in output_files - ], - "warning": raw_response.warning, - } - ) def extract_text( self, @@ -3063,7 +3037,7 @@ async def extract_images( extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, - ) -> ExtractImagesResponse: + ) -> PdfRestFileBasedResponse: """Extract embedded images from a PDF.""" payload: dict[str, Any] = {"files": file} @@ -3072,47 +3046,15 @@ async def extract_images( if output is not None: payload["output"] = output - validated_payload = ExtractImagesPayload.model_validate(payload) - request = self.prepare_request( - "POST", - "/extracted-images", - json_body=validated_payload.model_dump( - mode="json", by_alias=True, exclude_none=True, exclude_unset=True - ), + return await self._post_file_operation( + endpoint="/extracted-images", + payload=payload, + payload_model=ExtractImagesPayload, extra_query=extra_query, extra_headers=extra_headers, extra_body=extra_body, timeout=timeout, ) - raw_payload = await self._send_request(request) - raw_response = PdfRestRawFileResponse.model_validate(raw_payload) - output_ids = raw_response.ids or [] - semaphore = asyncio.Semaphore(DEFAULT_FILE_INFO_CONCURRENCY) - - async def fetch(file_id: str) -> PdfRestFile: - async with semaphore: - return await self.fetch_file_info( - file_id, - extra_query=extra_query, - extra_headers=extra_headers, - timeout=timeout, - ) - - output_files: list[PdfRestFile] = [] - if output_ids: - output_files = list( - await asyncio.gather(*(fetch(fid) for fid in output_ids)) - ) - input_id = raw_response.input_id[0] if raw_response.input_id else "" - return ExtractImagesResponse.model_validate( - { - "input_id": input_id, - "output_files": [ - file.model_dump(mode="json", by_alias=True) for file in output_files - ], - "warning": raw_response.warning, - } - ) async def extract_text( self, diff --git a/src/pdfrest/models/__init__.py b/src/pdfrest/models/__init__.py index ce017acf..6ab74f89 100644 --- a/src/pdfrest/models/__init__.py +++ b/src/pdfrest/models/__init__.py @@ -1,7 +1,6 @@ from .public import ( PdfRestDeletionResponse, ConvertToMarkdownResponse, - ExtractImagesResponse, ExtractTextResponse, PdfRestErrorResponse, PdfRestFile, @@ -16,7 +15,6 @@ __all__ = [ "PdfRestDeletionResponse", "ConvertToMarkdownResponse", - "ExtractImagesResponse", "ExtractTextResponse", "PdfRestErrorResponse", "PdfRestFile", diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index 46d343b1..8e25bfb0 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -22,7 +22,6 @@ __all__ = ( "PdfRestDeletionResponse", "ConvertToMarkdownResponse", - "ExtractImagesResponse", "ExtractTextResponse", "PdfRestErrorResponse", "PdfRestFile", @@ -393,31 +392,6 @@ class TranslatePdfTextResponse(BaseModel): ] = None -class ExtractImagesResponse(BaseModel): - """Response returned by the extracted-images tool.""" - - model_config = ConfigDict(extra="allow") - - input_id: Annotated[ - PdfRestFileID, - Field( - validation_alias=AliasChoices("input_id", "inputId"), - description="The id of the input file.", - ), - ] - output_files: Annotated[ - list[PdfRestFile], - Field( - description="The list of extracted image files.", - validation_alias=AliasChoices("output_files", "outputFiles"), - ), - ] - warning: Annotated[ - str | None, - Field(description="A warning that was generated during extraction."), - ] = None - - class ExtractTextResponse(BaseModel): """Response returned by the extracted-text tool.""" diff --git a/tests/live/test_live_extract_images.py b/tests/live/test_live_extract_images.py index b89df400..7d8abd39 100644 --- a/tests/live/test_live_extract_images.py +++ b/tests/live/test_live_extract_images.py @@ -3,7 +3,7 @@ import pytest from pdfrest import PdfRestApiError, PdfRestClient -from pdfrest.models import ExtractImagesResponse +from pdfrest.models import PdfRestFileBasedResponse from ..resources import get_test_resource_path @@ -20,7 +20,7 @@ def test_live_extract_images_success( uploaded = client.files.create_from_paths([resource])[0] response = client.extract_images(uploaded) - assert isinstance(response, ExtractImagesResponse) + assert isinstance(response, PdfRestFileBasedResponse) assert response.output_files assert response.input_id == uploaded.id diff --git a/tests/test_extract_images.py b/tests/test_extract_images.py index 2ef96842..5dea441b 100644 --- a/tests/test_extract_images.py +++ b/tests/test_extract_images.py @@ -7,7 +7,7 @@ from pydantic import ValidationError from pdfrest import AsyncPdfRestClient, PdfRestClient -from pdfrest.models import ExtractImagesResponse, PdfRestFile, PdfRestFileID +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID from pdfrest.models._internal import ExtractImagesPayload from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, make_pdf_file @@ -98,7 +98,7 @@ def handler(request: httpx.Request) -> httpx.Response: response = client.extract_images(input_file, pages=["1-3"], output="images") assert seen == {"post": 1, "get": 2} - assert isinstance(response, ExtractImagesResponse) + assert isinstance(response, PdfRestFileBasedResponse) assert len(response.output_files) == 2 assert response.input_id == input_file.id @@ -152,7 +152,7 @@ def handler(request: httpx.Request) -> httpx.Response: timeout=0.3, ) - assert isinstance(response, ExtractImagesResponse) + assert isinstance(response, PdfRestFileBasedResponse) assert len(response.output_files) == 1 timeout_value = captured_timeout["value"] assert timeout_value is not None @@ -206,6 +206,6 @@ def handler(request: httpx.Request) -> httpx.Response: response = await client.extract_images(input_file) assert seen == {"post": 1, "get": 1} - assert isinstance(response, ExtractImagesResponse) + assert isinstance(response, PdfRestFileBasedResponse) assert len(response.output_files) == 1 assert response.input_id == input_file.id From 4a351939aeb5e09a36c97762d3f135a91f5a4b9e Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 13:23:47 -0600 Subject: [PATCH 10/61] Split Translate PDF methods by output type Assisted-by: Codex --- src/pdfrest/client.py | 89 ++++++++++++++++++++-- src/pdfrest/models/_internal.py | 4 +- src/pdfrest/types/__init__.py | 2 - src/pdfrest/types/public.py | 2 - tests/live/test_live_translate_pdf_text.py | 24 +++++- tests/test_translate_pdf_text.py | 19 +++-- 6 files changed, 117 insertions(+), 23 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index e00be917..d0decbfd 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -114,7 +114,6 @@ SummaryOutputFormat, SummaryOutputType, TranslateOutputFormat, - TranslateOutputType, ) DEFAULT_BASE_URL = "https://api.pdfrest.com" @@ -2242,20 +2241,19 @@ def translate_pdf_text( source_language: str | None = None, pages: PdfPageSelection | None = None, output_format: TranslateOutputFormat = "markdown", - output_type: TranslateOutputType = "json", output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, ) -> TranslatePdfTextResponse: - """Translate the textual content of a PDF, Markdown, or text document.""" + """Translate the textual content of a PDF, Markdown, or text document (JSON).""" payload: dict[str, Any] = { "files": file, "target_language": target_language, "output_format": output_format, - "output_type": output_type, + "output_type": "json", } if source_language is not None: payload["source_language"] = source_language @@ -2279,6 +2277,45 @@ def translate_pdf_text( raw_payload = self._send_request(request) return TranslatePdfTextResponse.model_validate(raw_payload) + def translate_pdf_text_to_file( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + target_language: str, + source_language: str | None = None, + pages: PdfPageSelection | None = None, + output_format: TranslateOutputFormat = "markdown", + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Translate textual content and receive a file-based response.""" + + payload: dict[str, Any] = { + "files": file, + "target_language": target_language, + "output_format": output_format, + "output_type": "file", + } + if source_language is not None: + payload["source_language"] = source_language + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/translated-pdf-text", + payload=payload, + payload_model=TranslatePdfTextPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def extract_images( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -2990,20 +3027,19 @@ async def translate_pdf_text( source_language: str | None = None, pages: PdfPageSelection | None = None, output_format: TranslateOutputFormat = "markdown", - output_type: TranslateOutputType = "json", output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, ) -> TranslatePdfTextResponse: - """Translate the textual content of a PDF, Markdown, or text document.""" + """Translate the textual content of a PDF, Markdown, or text document (JSON).""" payload: dict[str, Any] = { "files": file, "target_language": target_language, "output_format": output_format, - "output_type": output_type, + "output_type": "json", } if source_language is not None: payload["source_language"] = source_language @@ -3027,6 +3063,45 @@ async def translate_pdf_text( raw_payload = await self._send_request(request) return TranslatePdfTextResponse.model_validate(raw_payload) + async def translate_pdf_text_to_file( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + target_language: str, + source_language: str | None = None, + pages: PdfPageSelection | None = None, + output_format: TranslateOutputFormat = "markdown", + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Translate textual content and receive a file-based response.""" + + payload: dict[str, Any] = { + "files": file, + "target_language": target_language, + "output_format": output_format, + "output_type": "file", + } + if source_language is not None: + payload["source_language"] = source_language + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/translated-pdf-text", + payload=payload, + payload_model=TranslatePdfTextPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def extract_images( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 47ebde0d..41a0b8a1 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -28,7 +28,6 @@ SummaryOutputFormat, SummaryOutputType, TranslateOutputFormat, - TranslateOutputType, ) from . import PdfRestFile from .public import PdfRestFileID @@ -451,7 +450,8 @@ class TranslatePdfTextPayload(BaseModel): Field(serialization_alias="output_format", default="markdown"), ] = "markdown" output_type: Annotated[ - TranslateOutputType, Field(serialization_alias="output_type", default="json") + Literal["json", "file"], + Field(serialization_alias="output_type", default="json"), ] = "json" output: Annotated[ str | None, diff --git a/src/pdfrest/types/__init__.py b/src/pdfrest/types/__init__.py index 87cb53b8..adf09638 100644 --- a/src/pdfrest/types/__init__.py +++ b/src/pdfrest/types/__init__.py @@ -15,7 +15,6 @@ SummaryOutputFormat, SummaryOutputType, TranslateOutputFormat, - TranslateOutputType, ) __all__ = [ @@ -33,5 +32,4 @@ "SummaryOutputFormat", "SummaryOutputType", "TranslateOutputFormat", - "TranslateOutputType", ] diff --git a/src/pdfrest/types/public.py b/src/pdfrest/types/public.py index 1c692c5e..915968cf 100644 --- a/src/pdfrest/types/public.py +++ b/src/pdfrest/types/public.py @@ -27,7 +27,6 @@ "SummaryOutputFormat", "SummaryOutputType", "TranslateOutputFormat", - "TranslateOutputType", ) PdfInfoQuery = Literal[ @@ -121,4 +120,3 @@ class PdfMergeSource(TypedDict, total=False): SummaryOutputType = Literal["json", "file"] TranslateOutputFormat = Literal["plaintext", "markdown"] -TranslateOutputType = Literal["json", "file"] diff --git a/tests/live/test_live_translate_pdf_text.py b/tests/live/test_live_translate_pdf_text.py index da35d638..fdb1e2ae 100644 --- a/tests/live/test_live_translate_pdf_text.py +++ b/tests/live/test_live_translate_pdf_text.py @@ -3,7 +3,7 @@ import pytest from pdfrest import PdfRestApiError, PdfRestClient -from pdfrest.models import TranslatePdfTextResponse +from pdfrest.models import PdfRestFileBasedResponse, TranslatePdfTextResponse from ..resources import get_test_resource_path @@ -21,7 +21,6 @@ def test_live_translate_pdf_text_success( response = client.translate_pdf_text( uploaded, target_language="fr", - output_type="json", output_format="plaintext", ) @@ -46,3 +45,24 @@ def test_live_translate_pdf_text_invalid_output_format( target_language="es", extra_body={"output_format": "invalid-format"}, ) + + +def test_live_translate_pdf_text_file_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = client.files.create_from_paths([resource])[0] + response = client.translate_pdf_text_to_file( + uploaded, + target_language="fr", + output_format="plaintext", + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_files + assert response.input_id == uploaded.id diff --git a/tests/test_translate_pdf_text.py b/tests/test_translate_pdf_text.py index 5f2fd1b3..8b45c88c 100644 --- a/tests/test_translate_pdf_text.py +++ b/tests/test_translate_pdf_text.py @@ -7,7 +7,12 @@ from pydantic import ValidationError from pdfrest import AsyncPdfRestClient, PdfRestClient -from pdfrest.models import PdfRestFile, PdfRestFileID, TranslatePdfTextResponse +from pdfrest.models import ( + PdfRestFile, + PdfRestFileBasedResponse, + PdfRestFileID, + TranslatePdfTextResponse, +) from pdfrest.models._internal import TranslatePdfTextPayload from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, make_pdf_file @@ -95,7 +100,6 @@ def handler(request: httpx.Request) -> httpx.Response: source_language="en", pages=["1-2"], output_format="plaintext", - output_type="json", output="translation", ) @@ -145,19 +149,17 @@ def handler(request: httpx.Request) -> httpx.Response: transport = httpx.MockTransport(handler) with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: - response = client.translate_pdf_text( + response = client.translate_pdf_text_to_file( input_file, target_language="es", - output_type="file", extra_query={"trace": "true"}, extra_headers={"X-Debug": "sync"}, extra_body={"debug": True}, timeout=0.3, ) - assert isinstance(response, TranslatePdfTextResponse) - assert response.output_id == output_id - assert response.output_url + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.id == output_id timeout_value = captured_timeout["value"] assert timeout_value is not None if isinstance(timeout_value, dict): @@ -199,7 +201,8 @@ def handler(request: httpx.Request) -> httpx.Response: transport = httpx.MockTransport(handler) async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.translate_pdf_text( - input_file, target_language="de", output_type="json" + input_file, + target_language="de", ) assert seen == {"post": 1} From 3104a63bd79eec3cbac14add2623fc31d4125716 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 15:06:00 -0600 Subject: [PATCH 11/61] client.py: Ruff format imports --- src/pdfrest/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index d0decbfd..90666b86 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -87,8 +87,8 @@ PdfCompressPayload, OcrPdfPayload, PdfFlattenFormsPayload, - PdfLinearizePayload, PdfInfoPayload, + PdfLinearizePayload, PdfMergePayload, PdfRedactionApplyPayload, PdfRedactionPreviewPayload, From 9c3f650a3bf6bcd03eef8b29e349f9cf7a8342cb Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 16:13:10 -0600 Subject: [PATCH 12/61] Add Convert to Excel Assisted-by: Codex --- src/pdfrest/client.py | 53 ++++++ src/pdfrest/models/_internal.py | 24 +++ tests/test_convert_to_excel.py | 275 ++++++++++++++++++++++++++++++++ 3 files changed, 352 insertions(+) create mode 100644 tests/test_convert_to_excel.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 90666b86..6c3c8eac 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -94,6 +94,7 @@ PdfRedactionPreviewPayload, PdfRestRawFileResponse, PdfSplitPayload, + PdfToExcelPayload, PdfToPdfxPayload, PdfToWordPayload, PngPdfRestPayload, @@ -2495,6 +2496,32 @@ def merge_pdfs( timeout=timeout, ) + def convert_to_excel( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Convert a PDF to an Excel spreadsheet.""" + + payload: dict[str, Any] = {"files": file} + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/excel", + payload=payload, + payload_model=PdfToExcelPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def convert_to_word( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -3323,6 +3350,32 @@ async def merge_pdfs( timeout=timeout, ) + async def convert_to_excel( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously convert a PDF to an Excel spreadsheet.""" + + payload: dict[str, Any] = {"files": file} + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/excel", + payload=payload, + payload_model=PdfToExcelPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def convert_to_word( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 41a0b8a1..3a18662e 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -763,6 +763,30 @@ class PdfToWordPayload(BaseModel): ] = None +class PdfToExcelPayload(BaseModel): + """Adapt caller options into a pdfRest-ready Excel request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + class PdfToPdfxPayload(BaseModel): """Adapt caller options into a pdfRest-ready PDF/X request payload.""" diff --git a/tests/test_convert_to_excel.py b/tests/test_convert_to_excel.py new file mode 100644 index 00000000..42346aac --- /dev/null +++ b/tests/test_convert_to_excel.py @@ -0,0 +1,275 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import PdfToExcelPayload + +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) + + +def test_convert_to_excel_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfToExcelPayload.model_validate( + {"files": [input_file], "output": "report"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/excel": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "report.xlsx", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_to_excel(input_file, output="report") + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + output_file = response.output_file + assert output_file.name == "report.xlsx" + assert ( + output_file.type + == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ) + assert response.warning is None + assert str(response.input_id) == str(input_file.id) + + +def test_convert_to_excel_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/excel": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] is True + assert payload["id"] == str(input_file.id) + assert payload["output"] == "custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "custom.xlsx", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_to_excel( + input_file, + output="custom", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": True}, + timeout=0.4, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.xlsx" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.4) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.4) + + +@pytest.mark.asyncio +async def test_async_convert_to_excel_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfToExcelPayload.model_validate({"files": [input_file]}).model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + ) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/excel": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async.xlsx", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_to_excel(input_file) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async.xlsx" + assert ( + response.output_file.type + == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ) + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_convert_to_excel_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/excel": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["id"] == str(input_file.id) + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-custom.xlsx", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_to_excel( + input_file, + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": "yes"}, + timeout=0.55, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.xlsx" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.55) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.55) + + +def test_convert_to_excel_validation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + png_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="Must be a PDF file"), + ): + client.convert_to_excel(png_file) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ), + ): + client.convert_to_excel([pdf_file, make_pdf_file(PdfRestFileID.generate())]) From e615b1cf81d1fe7bcae12ce32b0695837ce2bc88 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 16:18:59 -0600 Subject: [PATCH 13/61] Add conversion to PowerPoint Assisted-by: Codex --- src/pdfrest/client.py | 53 ++++++ src/pdfrest/models/_internal.py | 24 +++ tests/test_convert_to_powerpoint.py | 277 ++++++++++++++++++++++++++++ 3 files changed, 354 insertions(+) create mode 100644 tests/test_convert_to_powerpoint.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 6c3c8eac..24a1a232 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -96,6 +96,7 @@ PdfSplitPayload, PdfToExcelPayload, PdfToPdfxPayload, + PdfToPowerpointPayload, PdfToWordPayload, PngPdfRestPayload, SummarizePdfTextPayload, @@ -2522,6 +2523,32 @@ def convert_to_excel( timeout=timeout, ) + def convert_to_powerpoint( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Convert a PDF to a PowerPoint presentation.""" + + payload: dict[str, Any] = {"files": file} + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/powerpoint", + payload=payload, + payload_model=PdfToPowerpointPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def convert_to_word( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -3376,6 +3403,32 @@ async def convert_to_excel( timeout=timeout, ) + async def convert_to_powerpoint( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously convert a PDF to a PowerPoint presentation.""" + + payload: dict[str, Any] = {"files": file} + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/powerpoint", + payload=payload, + payload_model=PdfToPowerpointPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def convert_to_word( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 3a18662e..89b95fc9 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -787,6 +787,30 @@ class PdfToExcelPayload(BaseModel): ] = None +class PdfToPowerpointPayload(BaseModel): + """Adapt caller options into a pdfRest-ready PowerPoint request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + class PdfToPdfxPayload(BaseModel): """Adapt caller options into a pdfRest-ready PDF/X request payload.""" diff --git a/tests/test_convert_to_powerpoint.py b/tests/test_convert_to_powerpoint.py new file mode 100644 index 00000000..a8c1daa0 --- /dev/null +++ b/tests/test_convert_to_powerpoint.py @@ -0,0 +1,277 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import PdfToPowerpointPayload + +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) + + +def test_convert_to_powerpoint_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfToPowerpointPayload.model_validate( + {"files": [input_file], "output": "slides"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/powerpoint": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "slides.pptx", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_to_powerpoint(input_file, output="slides") + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + output_file = response.output_file + assert output_file.name == "slides.pptx" + assert ( + output_file.type + == "application/vnd.openxmlformats-officedocument.presentationml.presentation" + ) + assert response.warning is None + assert str(response.input_id) == str(input_file.id) + + +def test_convert_to_powerpoint_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/powerpoint": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] is True + assert payload["id"] == str(input_file.id) + assert payload["output"] == "custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "custom.pptx", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_to_powerpoint( + input_file, + output="custom", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": True}, + timeout=0.4, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pptx" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.4) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.4) + + +@pytest.mark.asyncio +async def test_async_convert_to_powerpoint_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfToPowerpointPayload.model_validate( + {"files": [input_file]} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/powerpoint": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async.pptx", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_to_powerpoint(input_file) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async.pptx" + assert ( + response.output_file.type + == "application/vnd.openxmlformats-officedocument.presentationml.presentation" + ) + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_convert_to_powerpoint_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/powerpoint": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["id"] == str(input_file.id) + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-custom.pptx", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_to_powerpoint( + input_file, + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": "yes"}, + timeout=0.55, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pptx" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.55) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.55) + + +def test_convert_to_powerpoint_validation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + png_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="Must be a PDF file"), + ): + client.convert_to_powerpoint(png_file) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ), + ): + client.convert_to_powerpoint( + [pdf_file, make_pdf_file(PdfRestFileID.generate())] + ) From e9c51298cb5b75602e1fd5e932f2c5a5831dc206 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 16:24:18 -0600 Subject: [PATCH 14/61] Add missing live Excel test Assisted-by: Codex --- tests/live/test_live_convert_to_excel.py | 114 +++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 tests/live/test_live_convert_to_excel.py diff --git a/tests/live/test_live_convert_to_excel.py b/tests/live/test_live_convert_to_excel.py new file mode 100644 index 00000000..26068b28 --- /dev/null +++ b/tests/live/test_live_convert_to_excel.py @@ -0,0 +1,114 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_pdf_for_excel( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + "output_name", + [ + pytest.param(None, id="default-output"), + pytest.param("live-excel", id="custom-output"), + ], +) +def test_live_convert_to_excel_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_excel: PdfRestFile, + output_name: str | None, +) -> None: + kwargs: dict[str, str] = {} + if output_name is not None: + kwargs["output"] = output_name + + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.convert_to_excel(uploaded_pdf_for_excel, **kwargs) + + assert response.output_files + output_file = response.output_file + assert ( + output_file.type + == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ) + assert str(response.input_id) == str(uploaded_pdf_for_excel.id) + if output_name is not None: + assert output_file.name.startswith(output_name) + else: + assert output_file.name.endswith(".xlsx") + + +@pytest.mark.asyncio +async def test_live_async_convert_to_excel_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_excel: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.convert_to_excel(uploaded_pdf_for_excel, output="async") + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async") + assert ( + output_file.type + == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ) + assert str(response.input_id) == str(uploaded_pdf_for_excel.id) + + +def test_live_convert_to_excel_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_excel: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError), + ): + client.convert_to_excel( + uploaded_pdf_for_excel, + extra_body={"id": "00000000-0000-0000-0000-000000000000"}, + ) + + +@pytest.mark.asyncio +async def test_live_async_convert_to_excel_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_excel: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError): + await client.convert_to_excel( + uploaded_pdf_for_excel, + extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, + ) From 33b8ee2e0f466d807fef687763d3636af9403dfd Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 16:24:27 -0600 Subject: [PATCH 15/61] Add missing live PowerPoint test Assisted-by: Codex --- tests/live/test_live_convert_to_powerpoint.py | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 tests/live/test_live_convert_to_powerpoint.py diff --git a/tests/live/test_live_convert_to_powerpoint.py b/tests/live/test_live_convert_to_powerpoint.py new file mode 100644 index 00000000..f46da580 --- /dev/null +++ b/tests/live/test_live_convert_to_powerpoint.py @@ -0,0 +1,116 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_pdf_for_powerpoint( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + "output_name", + [ + pytest.param(None, id="default-output"), + pytest.param("live-powerpoint", id="custom-output"), + ], +) +def test_live_convert_to_powerpoint_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_powerpoint: PdfRestFile, + output_name: str | None, +) -> None: + kwargs: dict[str, str] = {} + if output_name is not None: + kwargs["output"] = output_name + + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.convert_to_powerpoint(uploaded_pdf_for_powerpoint, **kwargs) + + assert response.output_files + output_file = response.output_file + assert ( + output_file.type + == "application/vnd.openxmlformats-officedocument.presentationml.presentation" + ) + assert str(response.input_id) == str(uploaded_pdf_for_powerpoint.id) + if output_name is not None: + assert output_file.name.startswith(output_name) + else: + assert output_file.name.endswith(".pptx") + + +@pytest.mark.asyncio +async def test_live_async_convert_to_powerpoint_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_powerpoint: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.convert_to_powerpoint( + uploaded_pdf_for_powerpoint, output="async" + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async") + assert ( + output_file.type + == "application/vnd.openxmlformats-officedocument.presentationml.presentation" + ) + assert str(response.input_id) == str(uploaded_pdf_for_powerpoint.id) + + +def test_live_convert_to_powerpoint_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_powerpoint: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError), + ): + client.convert_to_powerpoint( + uploaded_pdf_for_powerpoint, + extra_body={"id": "00000000-0000-0000-0000-000000000000"}, + ) + + +@pytest.mark.asyncio +async def test_live_async_convert_to_powerpoint_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_powerpoint: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError): + await client.convert_to_powerpoint( + uploaded_pdf_for_powerpoint, + extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, + ) From e814bf4c8f20f7da74d8fecfa0ebb2d7f79bdfa3 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 16:43:13 -0600 Subject: [PATCH 16/61] Add XFA to AcroForms Assisted-by: Codex --- src/pdfrest/client.py | 53 ++++ src/pdfrest/models/_internal.py | 24 ++ .../test_live_convert_xfa_to_acroforms.py | 110 +++++++ tests/test_convert_xfa_to_acroforms.py | 271 ++++++++++++++++++ 4 files changed, 458 insertions(+) create mode 100644 tests/live/test_live_convert_xfa_to_acroforms.py create mode 100644 tests/test_convert_xfa_to_acroforms.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 24a1a232..74484a2e 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -98,6 +98,7 @@ PdfToPdfxPayload, PdfToPowerpointPayload, PdfToWordPayload, + PdfXfaToAcroformsPayload, PngPdfRestPayload, SummarizePdfTextPayload, TiffPdfRestPayload, @@ -2549,6 +2550,32 @@ def convert_to_powerpoint( timeout=timeout, ) + def convert_xfa_to_acroforms( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Convert an XFA PDF to an AcroForm-enabled PDF.""" + + payload: dict[str, Any] = {"files": file} + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/pdf-with-acroforms", + payload=payload, + payload_model=PdfXfaToAcroformsPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def convert_to_word( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -3429,6 +3456,32 @@ async def convert_to_powerpoint( timeout=timeout, ) + async def convert_xfa_to_acroforms( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously convert an XFA PDF to an AcroForm-enabled PDF.""" + + payload: dict[str, Any] = {"files": file} + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/pdf-with-acroforms", + payload=payload, + payload_model=PdfXfaToAcroformsPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def convert_to_word( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 89b95fc9..792b3639 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -918,6 +918,30 @@ def _validate_profile_dependency(self) -> PdfCompressPayload: return self +class PdfXfaToAcroformsPayload(BaseModel): + """Adapt caller options into a pdfRest-ready XFA-to-AcroForms request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + class PdfLinearizePayload(BaseModel): """Adapt caller options into a pdfRest-ready linearize PDF request payload.""" diff --git a/tests/live/test_live_convert_xfa_to_acroforms.py b/tests/live/test_live_convert_xfa_to_acroforms.py new file mode 100644 index 00000000..428fccb2 --- /dev/null +++ b/tests/live/test_live_convert_xfa_to_acroforms.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_pdf_for_acroforms( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + "output_name", + [ + pytest.param(None, id="default-output"), + pytest.param("live-acroforms", id="custom-output"), + ], +) +def test_live_convert_xfa_to_acroforms_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_acroforms: PdfRestFile, + output_name: str | None, +) -> None: + kwargs: dict[str, str] = {} + if output_name is not None: + kwargs["output"] = output_name + + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.convert_xfa_to_acroforms(uploaded_pdf_for_acroforms, **kwargs) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_acroforms.id) + if output_name is not None: + assert output_file.name.startswith(output_name) + else: + assert output_file.name.endswith(".pdf") + + +def test_live_convert_xfa_to_acroforms_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_acroforms: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError), + ): + client.convert_xfa_to_acroforms( + uploaded_pdf_for_acroforms, + extra_body={"id": "00000000-0000-0000-0000-000000000000"}, + ) + + +@pytest.mark.asyncio +async def test_live_async_convert_xfa_to_acroforms_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_acroforms: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.convert_xfa_to_acroforms( + uploaded_pdf_for_acroforms, output="async" + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async") + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_acroforms.id) + + +@pytest.mark.asyncio +async def test_live_async_convert_xfa_to_acroforms_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_acroforms: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError): + await client.convert_xfa_to_acroforms( + uploaded_pdf_for_acroforms, + extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, + ) diff --git a/tests/test_convert_xfa_to_acroforms.py b/tests/test_convert_xfa_to_acroforms.py new file mode 100644 index 00000000..6080d22f --- /dev/null +++ b/tests/test_convert_xfa_to_acroforms.py @@ -0,0 +1,271 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import PdfXfaToAcroformsPayload + +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) + + +def test_convert_xfa_to_acroforms_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfXfaToAcroformsPayload.model_validate( + {"files": [input_file], "output": "acro"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf-with-acroforms": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "acro.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_xfa_to_acroforms(input_file, output="acro") + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + output_file = response.output_file + assert output_file.name == "acro.pdf" + assert output_file.type == "application/pdf" + assert response.warning is None + assert str(response.input_id) == str(input_file.id) + + +def test_convert_xfa_to_acroforms_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf-with-acroforms": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["id"] == str(input_file.id) + assert payload["output"] == "custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_xfa_to_acroforms( + input_file, + output="custom", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": "yes"}, + timeout=0.31, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.31) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.31) + + +@pytest.mark.asyncio +async def test_async_convert_xfa_to_acroforms_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfXfaToAcroformsPayload.model_validate( + {"files": [input_file]} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf-with-acroforms": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_xfa_to_acroforms(input_file) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_convert_xfa_to_acroforms_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf-with-acroforms": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["id"] == str(input_file.id) + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_xfa_to_acroforms( + input_file, + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": "yes"}, + timeout=0.52, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.52) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.52) + + +def test_convert_xfa_to_acroforms_validation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + png_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="Must be a PDF file"), + ): + client.convert_xfa_to_acroforms(png_file) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ), + ): + client.convert_xfa_to_acroforms( + [pdf_file, make_pdf_file(PdfRestFileID.generate())] + ) From b98145609939a8e27f20bc6a28ddb9f488becf4e Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 16:53:48 -0600 Subject: [PATCH 17/61] Add Flatten Transparencies Assisted-by: Codex --- src/pdfrest/client.py | 57 ++++ src/pdfrest/models/_internal.py | 25 ++ .../live/test_live_flatten_transparencies.py | 113 +++++++ tests/test_flatten_transparencies.py | 297 ++++++++++++++++++ 4 files changed, 492 insertions(+) create mode 100644 tests/live/test_live_flatten_transparencies.py create mode 100644 tests/test_flatten_transparencies.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 74484a2e..6a598188 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -93,6 +93,7 @@ PdfRedactionApplyPayload, PdfRedactionPreviewPayload, PdfRestRawFileResponse, + PdfFlattenTransparenciesPayload, PdfSplitPayload, PdfToExcelPayload, PdfToPdfxPayload, @@ -2661,6 +2662,34 @@ def compress_pdf( timeout=timeout, ) + + def flatten_transparencies( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + quality: Literal["low", "medium", "high"] = "medium", + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Flatten transparent objects in a PDF.""" + + payload: dict[str, Any] = {"files": file, "quality": quality} + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/flattened-transparencies-pdf", + payload=payload, + payload_model=PdfFlattenTransparenciesPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def linearize_pdf( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -3567,6 +3596,34 @@ async def compress_pdf( timeout=timeout, ) + + async def flatten_transparencies( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + quality: Literal["low", "medium", "high"] = "medium", + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously flatten transparent objects in a PDF.""" + + payload: dict[str, Any] = {"files": file, "quality": quality} + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/flattened-transparencies-pdf", + payload=payload, + payload_model=PdfFlattenTransparenciesPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def linearize_pdf( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 792b3639..da78e122 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -966,6 +966,31 @@ class PdfLinearizePayload(BaseModel): ] = None +class PdfFlattenTransparenciesPayload(BaseModel): + """Adapt caller options into a pdfRest-ready flatten-transparencies request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + quality: Literal["low", "medium", "high"] = "medium" + + class BmpPdfRestPayload(BasePdfRestGraphicPayload[Literal["rgb", "gray"]]): """Adapt caller options into a pdfRest-ready BMP request payload.""" diff --git a/tests/live/test_live_flatten_transparencies.py b/tests/live/test_live_flatten_transparencies.py new file mode 100644 index 00000000..f936fe6b --- /dev/null +++ b/tests/live/test_live_flatten_transparencies.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_pdf_for_transparencies( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + "output_name,quality", + [ + pytest.param(None, "medium", id="default-output"), + pytest.param("flatten-transparency", "high", id="custom-output-high"), + ], +) +def test_live_flatten_transparencies_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_transparencies: PdfRestFile, + output_name: str | None, + quality: str, +) -> None: + kwargs: dict[str, str] = {"quality": quality} + if output_name is not None: + kwargs["output"] = output_name + + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.flatten_transparencies( + uploaded_pdf_for_transparencies, **kwargs + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_transparencies.id) + if output_name is not None: + assert output_file.name.startswith(output_name) + else: + assert output_file.name.endswith(".pdf") + + +@pytest.mark.asyncio +async def test_live_async_flatten_transparencies_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_transparencies: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.flatten_transparencies( + uploaded_pdf_for_transparencies, output="async", quality="low" + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async") + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_transparencies.id) + + +def test_live_flatten_transparencies_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_transparencies: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError), + ): + client.flatten_transparencies( + uploaded_pdf_for_transparencies, + extra_body={"id": "00000000-0000-0000-0000-000000000000"}, + ) + + +@pytest.mark.asyncio +async def test_live_async_flatten_transparencies_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_transparencies: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError): + await client.flatten_transparencies( + uploaded_pdf_for_transparencies, + extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, + ) diff --git a/tests/test_flatten_transparencies.py b/tests/test_flatten_transparencies.py new file mode 100644 index 00000000..0035fd70 --- /dev/null +++ b/tests/test_flatten_transparencies.py @@ -0,0 +1,297 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import PdfFlattenTransparenciesPayload + +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) + + +def test_flatten_transparencies_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfFlattenTransparenciesPayload.model_validate( + {"files": [input_file], "output": "flattened", "quality": "high"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/flattened-transparencies-pdf" + ): + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "flattened.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.flatten_transparencies( + input_file, output="flattened", quality="high" + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + output_file = response.output_file + assert output_file.name == "flattened.pdf" + assert output_file.type == "application/pdf" + assert response.warning is None + assert str(response.input_id) == str(input_file.id) + + +def test_flatten_transparencies_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/flattened-transparencies-pdf" + ): + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["id"] == str(input_file.id) + assert payload["output"] == "custom" + assert payload["quality"] == "low" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.flatten_transparencies( + input_file, + output="custom", + quality="low", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": "yes"}, + timeout=0.29, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.29) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.29) + + +@pytest.mark.asyncio +async def test_async_flatten_transparencies_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfFlattenTransparenciesPayload.model_validate( + {"files": [input_file], "quality": "medium"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/flattened-transparencies-pdf" + ): + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.flatten_transparencies(input_file) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_flatten_transparencies_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/flattened-transparencies-pdf" + ): + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["id"] == str(input_file.id) + assert payload["quality"] == "high" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.flatten_transparencies( + input_file, + quality="high", + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": "yes"}, + timeout=0.52, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.52) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.52) + + +def test_flatten_transparencies_validation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + png_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="Must be a PDF file"), + ): + client.flatten_transparencies(png_file) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ), + ): + client.flatten_transparencies( + [pdf_file, make_pdf_file(PdfRestFileID.generate())] + ) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, match="Input should be 'low', 'medium' or 'high'" + ), + ): + client.flatten_transparencies(pdf_file, quality="ultra") # type: ignore[arg-type] From 112003945ef40e8be963a162c3efbb59d01ba5c5 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 16:55:52 -0600 Subject: [PATCH 18/61] Add Rasterize PDF Assisted-by: Codex --- src/pdfrest/client.py | 53 ++++++ src/pdfrest/models/_internal.py | 24 +++ tests/live/test_live_rasterize_pdf.py | 110 +++++++++++ tests/test_rasterize_pdf.py | 265 ++++++++++++++++++++++++++ 4 files changed, 452 insertions(+) create mode 100644 tests/live/test_live_rasterize_pdf.py create mode 100644 tests/test_rasterize_pdf.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 6a598188..b213eb77 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -94,6 +94,7 @@ PdfRedactionPreviewPayload, PdfRestRawFileResponse, PdfFlattenTransparenciesPayload, + PdfRasterizePayload, PdfSplitPayload, PdfToExcelPayload, PdfToPdfxPayload, @@ -2716,6 +2717,32 @@ def linearize_pdf( timeout=timeout, ) + def rasterize_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Rasterize a PDF into a flattened bitmap-based PDF.""" + + payload: dict[str, Any] = {"files": file} + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/rasterized-pdf", + payload=payload, + payload_model=PdfRasterizePayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def convert_to_pdfx( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -3650,6 +3677,32 @@ async def linearize_pdf( timeout=timeout, ) + async def rasterize_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously rasterize a PDF into a flattened bitmap-based PDF.""" + + payload: dict[str, Any] = {"files": file} + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/rasterized-pdf", + payload=payload, + payload_model=PdfRasterizePayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def convert_to_pdfx( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index da78e122..22a4b8fc 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -966,6 +966,30 @@ class PdfLinearizePayload(BaseModel): ] = None +class PdfRasterizePayload(BaseModel): + """Adapt caller options into a pdfRest-ready rasterize PDF request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + class PdfFlattenTransparenciesPayload(BaseModel): """Adapt caller options into a pdfRest-ready flatten-transparencies request payload.""" diff --git a/tests/live/test_live_rasterize_pdf.py b/tests/live/test_live_rasterize_pdf.py new file mode 100644 index 00000000..70f41c20 --- /dev/null +++ b/tests/live/test_live_rasterize_pdf.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_pdf_for_rasterize( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + "output_name", + [ + pytest.param(None, id="default-output"), + pytest.param("rasterized-live", id="custom-output"), + ], +) +def test_live_rasterize_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_rasterize: PdfRestFile, + output_name: str | None, +) -> None: + kwargs: dict[str, str] = {} + if output_name is not None: + kwargs["output"] = output_name + + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.rasterize_pdf(uploaded_pdf_for_rasterize, **kwargs) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_rasterize.id) + if output_name is not None: + assert output_file.name.startswith(output_name) + else: + assert output_file.name.endswith(".pdf") + + +@pytest.mark.asyncio +async def test_live_async_rasterize_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_rasterize: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.rasterize_pdf( + uploaded_pdf_for_rasterize, output="async" + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async") + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_rasterize.id) + + +def test_live_rasterize_pdf_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_rasterize: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError), + ): + client.rasterize_pdf( + uploaded_pdf_for_rasterize, + extra_body={"id": "00000000-0000-0000-0000-000000000000"}, + ) + + +@pytest.mark.asyncio +async def test_live_async_rasterize_pdf_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_rasterize: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError): + await client.rasterize_pdf( + uploaded_pdf_for_rasterize, + extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, + ) diff --git a/tests/test_rasterize_pdf.py b/tests/test_rasterize_pdf.py new file mode 100644 index 00000000..707ab223 --- /dev/null +++ b/tests/test_rasterize_pdf.py @@ -0,0 +1,265 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import PdfRasterizePayload + +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) + + +def test_rasterize_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfRasterizePayload.model_validate( + {"files": [input_file], "output": "rasterized"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/rasterized-pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "rasterized.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.rasterize_pdf(input_file, output="rasterized") + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + output_file = response.output_file + assert output_file.name == "rasterized.pdf" + assert output_file.type == "application/pdf" + assert response.warning is None + assert str(response.input_id) == str(input_file.id) + + +def test_rasterize_pdf_request_customization(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/rasterized-pdf": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["id"] == str(input_file.id) + assert payload["output"] == "custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.rasterize_pdf( + input_file, + output="custom", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": "yes"}, + timeout=0.31, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.31) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.31) + + +@pytest.mark.asyncio +async def test_async_rasterize_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfRasterizePayload.model_validate( + {"files": [input_file]} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/rasterized-pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.rasterize_pdf(input_file) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_rasterize_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/rasterized-pdf": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["id"] == str(input_file.id) + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.rasterize_pdf( + input_file, + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": "yes"}, + timeout=0.52, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.52) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.52) + + +def test_rasterize_pdf_validation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + png_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="Must be a PDF file"), + ): + client.rasterize_pdf(png_file) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ), + ): + client.rasterize_pdf([pdf_file, make_pdf_file(PdfRestFileID.generate())]) From 971ec822b398d62a2d183f37a38b5582a0137e78 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 16:56:39 -0600 Subject: [PATCH 19/61] Fix problems reported by ruff Assisted-by: Codex --- src/pdfrest/client.py | 4 ++-- tests/live/test_live_flatten_transparencies.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index b213eb77..fc37a504 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -87,14 +87,14 @@ PdfCompressPayload, OcrPdfPayload, PdfFlattenFormsPayload, + PdfFlattenTransparenciesPayload, PdfInfoPayload, PdfLinearizePayload, PdfMergePayload, + PdfRasterizePayload, PdfRedactionApplyPayload, PdfRedactionPreviewPayload, PdfRestRawFileResponse, - PdfFlattenTransparenciesPayload, - PdfRasterizePayload, PdfSplitPayload, PdfToExcelPayload, PdfToPdfxPayload, diff --git a/tests/live/test_live_flatten_transparencies.py b/tests/live/test_live_flatten_transparencies.py index f936fe6b..f7a8bb49 100644 --- a/tests/live/test_live_flatten_transparencies.py +++ b/tests/live/test_live_flatten_transparencies.py @@ -22,7 +22,7 @@ def uploaded_pdf_for_transparencies( @pytest.mark.parametrize( - "output_name,quality", + ("output_name", "quality"), [ pytest.param(None, "medium", id="default-output"), pytest.param("flatten-transparency", "high", id="custom-output-high"), From bde50f10fffdaced6a25c26bf8bf0b2b92692d90 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 17:08:02 -0600 Subject: [PATCH 20/61] Add Flatten Annotations Assisted-by: Codex --- src/pdfrest/client.py | 53 ++++ src/pdfrest/models/_internal.py | 24 ++ tests/live/test_live_flatten_annotations.py | 110 ++++++++ tests/test_flatten_annotations.py | 281 ++++++++++++++++++++ 4 files changed, 468 insertions(+) create mode 100644 tests/live/test_live_flatten_annotations.py create mode 100644 tests/test_flatten_annotations.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index fc37a504..647e38d5 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -86,6 +86,7 @@ JpegPdfRestPayload, PdfCompressPayload, OcrPdfPayload, + PdfFlattenAnnotationsPayload, PdfFlattenFormsPayload, PdfFlattenTransparenciesPayload, PdfInfoPayload, @@ -2717,6 +2718,32 @@ def linearize_pdf( timeout=timeout, ) + def flatten_annotations( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Flatten annotations into the PDF content.""" + + payload: dict[str, Any] = {"files": file} + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/flattened-annotations-pdf", + payload=payload, + payload_model=PdfFlattenAnnotationsPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def rasterize_pdf( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -3677,6 +3704,32 @@ async def linearize_pdf( timeout=timeout, ) + async def flatten_annotations( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously flatten annotations into the PDF content.""" + + payload: dict[str, Any] = {"files": file} + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/flattened-annotations-pdf", + payload=payload, + payload_model=PdfFlattenAnnotationsPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def rasterize_pdf( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 22a4b8fc..91dacfc3 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -1015,6 +1015,30 @@ class PdfFlattenTransparenciesPayload(BaseModel): quality: Literal["low", "medium", "high"] = "medium" +class PdfFlattenAnnotationsPayload(BaseModel): + """Adapt caller options into a pdfRest-ready flatten-annotations request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + class BmpPdfRestPayload(BasePdfRestGraphicPayload[Literal["rgb", "gray"]]): """Adapt caller options into a pdfRest-ready BMP request payload.""" diff --git a/tests/live/test_live_flatten_annotations.py b/tests/live/test_live_flatten_annotations.py new file mode 100644 index 00000000..9a669fe2 --- /dev/null +++ b/tests/live/test_live_flatten_annotations.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_pdf_for_annotations( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + "output_name", + [ + pytest.param(None, id="default-output"), + pytest.param("flatten-annotations", id="custom-output"), + ], +) +def test_live_flatten_annotations_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_annotations: PdfRestFile, + output_name: str | None, +) -> None: + kwargs: dict[str, str] = {} + if output_name is not None: + kwargs["output"] = output_name + + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.flatten_annotations(uploaded_pdf_for_annotations, **kwargs) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_annotations.id) + if output_name is not None: + assert output_file.name.startswith(output_name) + else: + assert output_file.name.endswith(".pdf") + + +@pytest.mark.asyncio +async def test_live_async_flatten_annotations_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_annotations: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.flatten_annotations( + uploaded_pdf_for_annotations, output="async" + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async") + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_annotations.id) + + +def test_live_flatten_annotations_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_annotations: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError), + ): + client.flatten_annotations( + uploaded_pdf_for_annotations, + extra_body={"id": "00000000-0000-0000-0000-000000000000"}, + ) + + +@pytest.mark.asyncio +async def test_live_async_flatten_annotations_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_annotations: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError): + await client.flatten_annotations( + uploaded_pdf_for_annotations, + extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, + ) diff --git a/tests/test_flatten_annotations.py b/tests/test_flatten_annotations.py new file mode 100644 index 00000000..d5407a3d --- /dev/null +++ b/tests/test_flatten_annotations.py @@ -0,0 +1,281 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import PdfFlattenAnnotationsPayload + +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) + + +def test_flatten_annotations_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfFlattenAnnotationsPayload.model_validate( + {"files": [input_file], "output": "flattened"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/flattened-annotations-pdf" + ): + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "flattened.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.flatten_annotations(input_file, output="flattened") + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + output_file = response.output_file + assert output_file.name == "flattened.pdf" + assert output_file.type == "application/pdf" + assert response.warning is None + assert str(response.input_id) == str(input_file.id) + + +def test_flatten_annotations_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/flattened-annotations-pdf" + ): + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["id"] == str(input_file.id) + assert payload["output"] == "custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.flatten_annotations( + input_file, + output="custom", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": "yes"}, + timeout=0.29, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.29) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.29) + + +@pytest.mark.asyncio +async def test_async_flatten_annotations_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfFlattenAnnotationsPayload.model_validate( + {"files": [input_file]} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/flattened-annotations-pdf" + ): + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.flatten_annotations(input_file) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_flatten_annotations_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/flattened-annotations-pdf" + ): + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["id"] == str(input_file.id) + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.flatten_annotations( + input_file, + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": "yes"}, + timeout=0.52, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.52) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.52) + + +def test_flatten_annotations_validation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + png_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="Must be a PDF file"), + ): + client.flatten_annotations(png_file) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ), + ): + client.flatten_annotations([pdf_file, make_pdf_file(PdfRestFileID.generate())]) From 8c116dcbe4d14ecdadf3f4cf874ba40be9f35e3f Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 17:17:24 -0600 Subject: [PATCH 21/61] Remove erroneous `output_format` parameter from Markdown live test --- tests/live/test_live_convert_to_markdown.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/live/test_live_convert_to_markdown.py b/tests/live/test_live_convert_to_markdown.py index be0c1aef..f86215af 100644 --- a/tests/live/test_live_convert_to_markdown.py +++ b/tests/live/test_live_convert_to_markdown.py @@ -21,7 +21,6 @@ def test_live_convert_to_markdown_success( response = client.convert_to_markdown( uploaded, output_type="json", - output_format="markdown", ) assert isinstance(response, ConvertToMarkdownResponse) From e6de765a3c9992061c696d4f6c83a21583daf7d6 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 17:26:26 -0600 Subject: [PATCH 22/61] Add missing `page_break_comments` parameter to Markdown conversion Assisted-by: Codex --- src/pdfrest/client.py | 6 ++++++ src/pdfrest/models/_internal.py | 4 ++++ tests/test_convert_to_markdown.py | 24 +++++++++++++++++++++--- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 647e38d5..627e72e7 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -2177,6 +2177,7 @@ def convert_to_markdown( pages: PdfPageSelection | None = None, output_type: SummaryOutputType = "json", output_format: SummaryOutputFormat = "markdown", + page_break_comments: Literal["on", "off"] | None = None, output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, @@ -2192,6 +2193,8 @@ def convert_to_markdown( } if pages is not None: payload["pages"] = pages + if page_break_comments is not None: + payload["page_break_comments"] = page_break_comments if output is not None: payload["output"] = output @@ -3121,6 +3124,7 @@ async def convert_to_markdown( pages: PdfPageSelection | None = None, output_type: SummaryOutputType = "json", output_format: SummaryOutputFormat = "markdown", + page_break_comments: Literal["on", "off"] | None = None, output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, @@ -3136,6 +3140,8 @@ async def convert_to_markdown( } if pages is not None: payload["pages"] = pages + if page_break_comments is not None: + payload["page_break_comments"] = page_break_comments if output is not None: payload["output"] = output diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 91dacfc3..4e799c6e 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -401,6 +401,10 @@ class ConvertToMarkdownPayload(BaseModel): SummaryOutputFormat, Field(serialization_alias="output_format", default="markdown"), ] = "markdown" + page_break_comments: Annotated[ + Literal["on", "off"] | None, + Field(serialization_alias="page_break_comments", default=None), + ] = None output: Annotated[ str | None, Field(serialization_alias="output", min_length=1, default=None), diff --git a/tests/test_convert_to_markdown.py b/tests/test_convert_to_markdown.py index fd7c3958..88c9135a 100644 --- a/tests/test_convert_to_markdown.py +++ b/tests/test_convert_to_markdown.py @@ -40,6 +40,14 @@ def test_convert_to_markdown_payload_invalid_page_range() -> None: ) +def test_convert_to_markdown_payload_invalid_page_break_comments() -> None: + file_repr = make_pdf_file(PdfRestFileID.generate(1)) + with pytest.raises(ValidationError, match="Input should be 'on' or 'off'"): + ConvertToMarkdownPayload.model_validate( + {"files": [file_repr], "page_break_comments": "maybe"} + ) + + def test_convert_to_markdown_json_success(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(1)) @@ -50,6 +58,7 @@ def test_convert_to_markdown_json_success(monkeypatch: pytest.MonkeyPatch) -> No "output": "md", "output_type": "json", "output_format": "markdown", + "page_break_comments": "on", } ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) @@ -79,6 +88,7 @@ def handler(request: httpx.Request) -> httpx.Response: output="md", output_type="json", output_format="markdown", + page_break_comments="on", ) assert seen == {"post": 1} @@ -95,7 +105,12 @@ def test_convert_to_markdown_request_customization( monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(1)) payload_dump = ConvertToMarkdownPayload.model_validate( - {"files": [input_file], "output_type": "file", "output_format": "markdown"} + { + "files": [input_file], + "output_type": "file", + "output_format": "markdown", + "page_break_comments": "off", + } ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) output_id = str(PdfRestFileID.generate()) captured_timeout: dict[str, float | dict[str, float] | None] = {} @@ -129,6 +144,7 @@ def handler(request: httpx.Request) -> httpx.Response: extra_headers={"X-Debug": "sync"}, extra_body={"debug": True}, timeout=0.4, + page_break_comments="off", ) assert isinstance(response, ConvertToMarkdownResponse) @@ -151,7 +167,7 @@ async def test_async_convert_to_markdown_success( monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(2)) payload_dump = ConvertToMarkdownPayload.model_validate( - {"files": [input_file], "output_type": "json"} + {"files": [input_file], "output_type": "json", "page_break_comments": "off"} ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) seen: dict[str, int] = {"post": 0} @@ -174,7 +190,9 @@ def handler(request: httpx.Request) -> httpx.Response: transport = httpx.MockTransport(handler) async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: - response = await client.convert_to_markdown(input_file, output_type="json") + response = await client.convert_to_markdown( + input_file, output_type="json", page_break_comments="off" + ) assert seen == {"post": 1} assert isinstance(response, ConvertToMarkdownResponse) From 2d7d556d42ae7b1a24abaf2795f3c2eb6c0ff7d6 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 17:27:04 -0600 Subject: [PATCH 23/61] Test Extract Images live with PDF with images --- tests/live/test_live_extract_images.py | 4 ++-- tests/resources/duckhat.pdf | Bin 0 -> 88669 bytes 2 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 tests/resources/duckhat.pdf diff --git a/tests/live/test_live_extract_images.py b/tests/live/test_live_extract_images.py index 7d8abd39..faaff70d 100644 --- a/tests/live/test_live_extract_images.py +++ b/tests/live/test_live_extract_images.py @@ -12,7 +12,7 @@ def test_live_extract_images_success( pdfrest_api_key: str, pdfrest_live_base_url: str, ) -> None: - resource = get_test_resource_path("report.pdf") + resource = get_test_resource_path("duckhat.pdf") with PdfRestClient( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, @@ -29,7 +29,7 @@ def test_live_extract_images_invalid_pages( pdfrest_api_key: str, pdfrest_live_base_url: str, ) -> None: - resource = get_test_resource_path("report.pdf") + resource = get_test_resource_path("duckhat.pdf") with PdfRestClient( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, diff --git a/tests/resources/duckhat.pdf b/tests/resources/duckhat.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8dbaff230a95fb7edd930cd827ed4c3e5772613e GIT binary patch literal 88669 zcmeFYcU)6Tw=f(Kjub^eiXbQ;2ni5+NhpVoNu(r%(D5Kd3>a!6AmULJm8z70(xinD z>XF`TC?Fs;0i-G*QWR7Ku^b=2oq**j_xN0`H{dOA~s#y_Ek?(Mu)WYC}Osq1|>!6*P}hx0AZ&pw@_%rn1uNE{RTEUa)@6Yze~=0!yLko+1Me>aEM=FvvuET5B5K> z1^aKXrLFrzyf7_*F}?pvyqA)C_}bJ3E&s%)HzN-dZDKI?@c|a`r}M?cUAVxPvE$hF zH#X&#+y~>Y_Z;nOGUXEylr=H=?TER_p6j6|2XAC?-8XC5f8N(lBpESwa)ikil;N_GFEHcK3vS#vRv{*VzK`!kKm#=riW4(Ghk5^4R~)gk3x2NU3XrF`^| z4o#&_UJHhPrlSV?o%>{5^lgcObeXHBGK9(BIJowK#7ETX|9u+2%wl_2)7AdDi3Rg% zYidI^VY>QyI>2<*Wqv8bbfKCW`nnpLdNNuXI#4Ywn5LGNwv0ECPYpp-m$3-sL-YOn zc-lXu_I*69|K4~S8tPD8U_f0xT^Svyx;`+Nmd^j(aJv6CoR)^3x|##|IEl4sJKG%H zdLuS?l9!Jc$?GR8hQEggd3pQeNixUrzJvfHxi>BM&1VLo!d8}k{4!^4R|M4&gBaN?H4Uh`+JAb;$NR@KLFABAY}S0d)$phAM0@}#6nIYKKRyS@)W+t=qyKR@BJn>y z8T5-h{^>cO0T@n8?S}w>`56M^z-$reEmUuL^Dv=uKw5QiO#unN0A3t9$$CXzJ-7SJhM3*HHC3p{=J1^U?5D z_0fQ7XsN?sI@%g|HXAlJzdrkK{UQQFNnQco`0xFCt7~Zc=;`~Y>cG5Vs@guB;^EoNvs*|&KuBCvOjJlj z0swJIG4TTjq$FkbOG_(C@0XENWPzND8UQMK1_lPK|83yd&Cf5uFCesMkMKSrAt7P$ z{rf~j#1Dw?li4SJK;Zxz6b>n|;E;+s0RP$n=l|OdzD_{7+pLGg#P>-^DjZf+0zNon>{fRQ8t%{^rNNf8#ay2eTJ`aGU&t-TWW?-u^!_JoAI&w?A3_HV)baNS{N9Qwa1ssPFvuw*=ge z{DOl-whKpp;|2+GgLZ-V1Wmrz{YuAf{j~zX0eax?C$P%w()T+E#3ceU*&^VT*}8wf znt(vpzvIB2C0C#d1p58kz<=cSpP91fk>O)e{{m3ZFO0J}+v4x%^*gcs_Df`y)%UTk zTWqs1E)EOU4aCd7feqRC{7T?9n?mzFJ zAJ}H|*GkUwzm`}yz%;vRn|T%v%7w}ik0P;IfsCU4vY`U60Kmuk;0voHVj27XrmJ7ywcvR(PhOJ&yptI0&16Lenb z{5F9w`_5j`EEax+Kff^nZuvkQGFubj5>)3re`K5D{1&qSp8ytpK2IiZyMgu2f1v+@ z@O@u+B|rmAa#%+mxviKyYoLd0g&&q`LX%1 z8`VMV6$k?4&6)-P95UN-*)9WBfDlFiAshjw&nmG&rT_$J<@S4F&-teaSOmX_0mPvp z4%(yt96Nq7edO_-oDPvLj#iaTVOIx=C*$IS2}qiDf7s!)(aOzaGSe@DLefIdD$B30 zf643@EFK#Ud|9AXPDPH%f8~^%_N!d*U|g2%&$GfTGP_PLI%%bkmG5<5QXj(l;!x?v z4Y`FcmdkQh=D;E5RjwY-R#M&4g(OhkzsdUIjo^hgUB)muX8B4QBYNh`AsC)iTiW73 z6dov{ok?l0m`}kj%fG!nd-Z16U-Zz9p^+hEp-X4*6v`z(SgbWIe0|sJd}U;CYE~{n z+3Vq{15$;|_~rz-S0ruHo`RcAKP$v2@rcliOLlXKtG`R9HXNwwDykk)@>7p%O1bFK zpRacmjjGORrz(}VhXjq((bXLFF28?Z)V?3`l(bxVFt~pqz9A$(@N(K2V{S$=a#b8V zf2vU81j4JJ`pj1pXt$cyktG_8c8>c%RTBT;xX}o7pTvt?c5F&XlU;5+I z)?+&r?6lHK-y;joJ$jkwno;MWdZW1ckJP@^p(2`uj!3f)9#ZDew*Q*e23RpE__@vAi$TKYb-%94oXN!7b%nUPBfVaTyNFmv&tl)8M^N%^)xqxgzGK|@VJgZhZA3Fn zcdDv)EJM?Doq()OiId5}|JL+o(Jt&ySey1@ zr+?Jq%Qv_K0xk8rBGq&7QZ;Fw0P->pJ0-gB?(4bK47;I)VDu+K-D|7iy1MGdpo*K; z?a68uXhFkI`e+QzjoFgqcbXEYP}6-P1isuh%t$L?5I04VX3@8VgOdejPYo@>ZKB|Q z4t}1a(b8@v7eksHQ~Jx33FA})mjF5pmE?)+J!bEu<5o2NY}fT={4ieJylCP>apw4E zzv1$is7HANIuMbfhWodN$@HfY!=T{VF z!M)nT`GUMp;XSKrs)v+a{j&X__Q-sHzHDzZDl}0g6KqCta}=5DUT!W_Ey0jhvmvdP zn;GYP*W*YhmirhAYbOJ^HBpoy|m4ES$LFPf3wBt#8<@U+Hzg zM9b$eb-3o$PEV|M4F>uL+d^S;_bZqR3uj_&_YB-e`laP~{B=pP(*>$&X=H{~cv1Y> zSQ1XGyE#OQyZ(Apy=icUd{9k1;r-)ARCDDeSw|E&bhe8Yy4oZP37$+@Xcrl-^1{nE zH-3?u&N&&u9qj+;79BbplI;{@V?Wb6v~Y^yPFoQGTl;IT`}JUp=3-i;(l+6w*%#BH zdrRogJMLGvn)bdRc(-F+O+fkVMfu$uVi$nRmU;QU<+5wp?#4gSZMjY|d>mZgy}2VS zyTc*NTG=cs%MIeN2Xuu6z{dOCYHykBZGRFdaJj7<%BE~mt~7;9d&V#Q2;N;QmZRg!Co*$O*gxL? zBnBO~vUztd)w(j@$v<80nM&=MqoBqB1WgGezkZ`iX_LVq!QfvZeO$wtO||@_C%HTtXkUUM+W<260H%zP)|_x zUB;Viw@Y)K#<7@<1y8DhTO0zVKiRwMO{wG4rOuF(J)g=O5~Td~>Lc7aXgRL%DD6f& zX`x%UGVKgTQp;>wKgS>XYmGv;s)#t8^2TSVD)7&-Y$?oKzCiwPK&hXgBOQmj)2WDG zk-#;)rh<#`t0gF_3W=dC(mb=SmWhwRK(Fm{`6HB(A53=$3cs|}x)L>?P6;!pT`6l< zE#r4j3G8NPjWo#eH0q3M zp_MwWg}FE+G)8HkPpa6__ye*q-cX%e$0iWl!Vw$m_uH2RH0bYr0eQa=r+iP3zTZd>xO_JW1T z$^tv8*iwuwwb@$ntg8vo#`0IUT+Z+6#KFzhrJrOVkg3U5d;usB7tfZWu>5VlEf*5d zFFlnA%Oy6WI5JzAv(9tWwPG~E;NnHak})8U*c5y z{>}pPK`UQ!UBXH0(6f|HQ7K4(#io;1>RM2QY)(t`ldEo;nc`$fIX>BgoO#0nK@3C##O<%Fo>oCn~o=-3o3=&pn?r zGlbIQ=MKM5D!+1a0o>KtuFxMG9QdMqWvFU+-NU8d%POa64z~6;|4rM(5x+Kmjn$|f zrN_H@j}6ykQKZ^6-#qzHp1Tk<`~Fprg3$OAf z-c^LWX5Y(J$(Tgl=}7mtoyhK~@g~#5?;fD|%PMsJ>;nwHsMmSFyj-+IozJg#o#_1EMHx>+&ZR!_etAre6mYtSBcw>8QbJ|nwVH*h7TDIsA0tcT!NE1@Q; zzIBE|64FNV!|JpE8T>M3V~_AiLfNs-Zf~Ec60*dkBr)>NZhfbVL93mrqSFK&WmQL` z8bAMn2PIQH=qD|xL9Ox5o1dLD-uD`sTI>~2-pw&NVOCqFPn<7!*-;l+PbYfX1Wh=1 zYw^_$pFEzOm2eCkmZd6=rG=Vl)1*^Sv{;^|Q86aucUkQ;jfZ%nid4qxa#t|IawelD z4jFLvwM%R^oxj(y&vEZ1On4`6(O&Yon?7}r4czOH@y-Z(Az=Zo5gb29kE>};zvaTb zLO)C}=BxsrdO42iEV&^4b)IkIol@gH&qecnW@w6a`e?*p@p!*m(Y!u2Rx#xR4!gu~ zzQ*fSPUelR&N`Yod?@xII4H~9k=mXU##5j7w?L_0KtJiU7dWU{dayBZLl1I}COQDW z8RBm0r=8}N)W56j9;0>M2C=ygUwnw?i|skJHa6%Qbe9oECJpt#i#=`7S5AjE3Uc^p zDaRiHak`&pEpL|bV5tmi+5doTi?mhV(x-o+5M=4ij_uf2Mg+uZ0(f0A`9BN~%lAJI z$~$7u?pG=LJWvK85709n2#8BKWW3tJ(=%X3Onjr2^83+(te&p!@jpu4L-cZIT2sN+ zCtZXuQxKSR+fmC=w*{3$DLw-eI#(2z1eb-nDkx)Bs{2A5W1)eJo{L4^LaJe;b6wcGq{U&f z6_v%GF_M6`nIEQaLi*hDX(=E6(AD_s>k|L~>p1&`q4>zk4_0hmCc z^&KUt8ugtWNSA zSsV5=xXG2y<8kzat{o4VXCYm0cltP&F$A#`S+M47gCg90B*YJnp%^%iF&t=DC|!0D2xhOvEr%-96d3LZAx`uw zBqBa7Ln+#s@Ay^29h|Xg1cd2K4zqOHOj+y6vGAVAlZ1^lX3#^w3=7K~=p5<0lfZk^0KG)JNG_iyh1L zscAUR(1Cy)tLZkk!_&u(W=m2YExUhkudnhkrz5QHc<7XTPR+RB?H^gfctUUY+BkXz zk*}i&r|V3*F`8mr^oRVFO9kjSS7iBeGbMO~AZn9Ae&@In@w(_3mS!gr9yp*8=2T6K zROEhTsyXa|FpDchBGg=A#M&MMh4~YcS&P?Sx$Xxmj$oEuoy*Qd#lDO=Z>+BBqN{CqFO9D+JX-f`iewtT3ObjX7y^yFZ-(oZui&hwgyxcVL}(xX!J# z39#qpIu#GNRPijE6HbB*;()zW8fjI@K+xw~Sfa)|Kir$sXglZ5pzYo@apNr;D=T0v)E;35H*C;SV&{5>!tcO|0QqsHX z>Yf*6PY!iOl{t_nrn+MhE**9?lP)L&Y)6{@F20?166*UzKqvXn-{;|c&hquxz;gJ0 zWS7}BENixvsoTY}K-&?~HaY8dkd;*gf+h|bmhSL?&TnT5;p_wz4$=*hP|(5>bf;v)9^W2r|J6Lv>VQc&R)wJzf621I|Yq{I(pa1 z@uj`(;V0G}Z*`tB8`P(XGvT+>$-1@qQXi|QJREEkmMx95em6h~zfq){6;*0+r-)%P z*K**ZhK2Kwm#gc8l3vdkA2~UM3DDY*x8tYJ&^Jl^VC)owyUOwp}cBa-eP?7 z*g{-p6miY{NPW!T@@b5a@I~`dQo(TTXZsZg;#{MfMH~Iw)63eoRmxG)JnN;? z7|e9bN7?D-9tTR@5qj5bu=^PWo%(#qnv|02nn~Buq1N|kY(`1tZ1V?(sFsA))izbN5E@S&L#UgSk@C6agSQf2sU@qzF?PqrID&; zIlMM~z0olxyK3lg4h!&c< zv*F(Trh?5*L+$3BL9{!Aezr&wY>L^yP*L=^o9c?>NZ+UofY|PfbDl|zk<9cDr5ml> z`-2x1sx(Vipus`zRe66-q%u;5_Q%-7&;A_kAjUF)MyL;=AGoO!8@^$=R}ZJL zQ$t{kA(jHl6Ep#VgyKNjJ9)nA3=eDdf6th+aCX`kFhnvyN{*GJ`%cJe1Ayb(a6j-N zjKf5RoyOiuxv>*)oZk~~fCHG%{oQhLgG2yL1tqnHG18(q(egDCQLYxn5{OQT_0E9+ z!x(a`&f=9L&tpA~ly9o!hz%sV8pp|!#$6N}%IWRpg4Mek(#KwU2;n`ckn2-Z(e**$ z{lN;UZjtUEWvW*ZHgCEY-Jg0!=-fOMS4T^+#eCqwOK-R)!IQ(#0ymGnZX!xU%d)Er znRCqys%2omd40#-Lz1!hyj8(xgE`o=4F9G*)wS;nYzrSsOKw1(K`4!Ov-20~_^JSTL*`j%)tJ~xh0ukVhRwhD5 zs=zQ*U3$_u*%t?XCz=!`?Ce7S9V~Jkp@iGCvvu%jVC>{;Zl{w9(S%WY3lx1#rXq&e6D=Ap}mASp6aJurvExXnNgvHSWQTT-VCQu)VG$^lq6(6UOrq~(b5k^$~CefBErgYz7^C>oWFOQF40|{1z^z8CEMV0jyJ?Rvr z%c6$sU|mYW0p3vm3ViUsa@79FZd8_s9)|v6?tqh_>?R{8?is})31^P1BA(7ck|q^% z9eC?v!wQt+JWY+#hF(2;;6|P5L{%(A94|IHIWX<i9;;5(n7vncPFQW%lev0Z<6Q(DykXDTpf&U8 z$M`u1zpI=VAc-6%?BIxHvGUjo1bm$=01(zkzwhNdAayOn!+8}b%di#i{B{fLw)5Kv zpm4zFCK;f>RyS6xUx*_6-l>cZ*CLj$g)SN277 zQc|A-Ro_pOrtoXGY@gpyy(Im@^OGU%zEc_80TGEIYF1yw1mE`)fqb$IPUw_Zxe`@p zy)OV&x@bx7?Y^uR^0X()aX6D^t9;93l~SRg054O!e)hjFc#HbrwmDLK-i`475JiHh0Vq?(mm0lQ;~3Gyz`g6xufQk8ppH62QQ=G z>DQYOBf%~LGmd7<38fJbkFOG=ws#`MdoW2+u^v&F%Q-<5?TL4*ssut`xu_%Bs3*^o z5%HnpAA2t?BULLSV+KyzV@qvwTo9J6hjpJ>Hqq1A?BXy@0m43J5YVOZeIGOvXh|wY7q2~~@o&#U9?8!SSZ5t-GAqu(u3~^>+foHJ_ zPk7kC1XHK^@IAt~L z^eA>1FX7!OmCT?^jwQ@IJR5}TaVn#_cZ#h)8VIe1n#+)s#B>I=8(5@ogKktmHn2*dX>v}kK&*H2y zlKN4*4*o>w+f~_Mn!8n^_}G(Mkq4DZ(#&c5k`V1zVTAq z>~zQ81h>V3iHWcmlirF6dj~2O$osm=>jvDiii_&oz0~h&D{D_UV)Tp0OKwgncPYLo z^)^1_XRBUP=J4n&X8*z-u%5?D7ncXTmbhG}YXu0eD7fmwFHVhx>iIR ztaHCfd_~0^ruH_vRjB0U9MPptVewsjNMlv{=+XHXOVvXZr8lLyZm+fOJ~OFHOQl$Y$sgp!b>1kg z(lGmH4x(3|rxjnOy88FMN2j%9EuuZK6uwWDDdV3l--bE06yhd~O&Jj#p+h=+%iho{}GvB+*|wHI#9pyaDN9^c!hWGRGj!81ZoG@#ga@ZuO40VU_{c z{^COJR_+zQ9-oi9l@-UR9-^!ZIgTl4Ra5pD-2npLynfXaNU_>8mO#h4eri!`fSQTluc3%%$Et z(v$=9m9UVL?;iR1WNyr^(bC2AA^$#n`LbCEvFQC|5H^3lQm(kZ{H=Lo*kD7>$hTOMdiFey2^xIqSUR`crsu35x zFx~H9&{reVv^wou7eWKkBbUMuP`I4571>93+A5@SJbXsfJ~KKzB0{y!rD%LHNfjy> z5`cPo1-?2=9V)XDl^Rcd3!hF^JA50avvhs<8P6N^>nfj6Rm^JhBYjV%LD*B*?y>nV zaz?m%+@!bsb($wqGh=tm3e|(Y@y88E7gyAoh)#tH|FSscQ=YbG$aHFb%cDD-ZM|;H zi__z8PYLBaMEJUrhQrYFVbW(r@jP%!@zcx|7^d*Xy@1d~aZ<^H)fJSH#(lZaDr%^$ z23g5Ye?fX)bTbpu5E6VA8k5+*E_^PmA;Lc+i7!453&Es)wJJUnw$mX;_g45TMLX}z zb%dzPam2t`((_wSo}BLO2?%OZ3()a%cR|BU7wB8pGt*P~{m=YO($8Y+od?{!5P=Y;rIhtp_2n|{PxVA9i3`}--E|?HhsCZ8;W>M^PhS<|KT0P=( zawm)#2B98lvNW!0jPA4iGnn8YkjwlUODtX%YD`G;6e#%+FZ`)+cXnR(ce$rhpQ&5<48uvYjRG?E+&_4P7X|H&7kxtB0}ZYiX5P zuw!OCLo1CRQydnT?g_0Bn|j^}*QS=fn>JfKZ3UgZm=Kg^nfThzt=TSN7u_4~jLz6C zT6dspW!=0b2Xl$gZv3HZf>M0wKuD|xU+udCB`V%`>WuR)5?{`!4*7Zzj4V4e=D5`G z+)8(k1x17&nqDnd^?Z0wjrqBw8w{JMoftoc0E zwII$e{Uo6+>1Ms=3i3!^zS2;IbbpyzDWR@-*jYzDJxW+?sEnyvP!?GF?f_lL#ffHS zt9kkOvt2qlR$*@5aJSROQG4$uD0!M?X9pDs!EsrHXZB+v8Na!UE}lQjoc*GZ=mDp?f`?N23;K)>k7VRb7COZ+tJl^kzb)Co zpsz1I(^CA?7t>eOa;>(-R8OWM*3o6M6i z4-mvc*J&FpRSz0*bd|BR1f&Boj{BVn)!`L-b`FV>S#GP`liDY-HO<+C+QMm5&tAVU zi2B)uY^jV zX508znS|ZhyuUT2#g-k3SSpgB+kz@n(f%?{s*omx482s8{*b|k4injg`7)gs{nnQ{ z_Lmer+x>^^vDFq^$NhHk%Q)n@u*keGx3HpttqFu-Y6iNejxrfGGMI1cS9C!0Q>RJeko#>2#^voh0rvsA zGQ7Jb$qT(UdKy87LsK0tK5^0cTIY%z?9_iH2Vw40po1q z$$AMgBRiUpqZbHbWq}iuCL)|$Mptr66}PT{GYxpjYycuyP&CVQgYE;`4j5)O?D;m7 zw~Yekmiy;%L3!{_Zt2%|P&Nw4WWORH0&bcfmK?XJ>pC%PL_?)Kph-I#Y_L?j&AR6Ds z5*U64S@*4S7n;^rrE!IE9s2KNnFj+ULrT)d20h2DJXhAPmN4>nAp?^s(4vLOOn0_p+9h`7E>(O^T8dq>C@=Kngm5F zt;AC*&|Oo*5xB_WHF(Ddp_By11A#g1%D^`(E1%o3@03fHYgZMk1;?vB2LGyUqTbbb zxPJ5{t(eK6wIt}}?z31riOE-z&y_eZSrAaJ? z4VB@E)_(CpftrC$*E5L2tvZqdNaw&}zLnpPCfzhdtcHjsl6TI7ZQxNohS#0mTOx`0 z<1Y8bLKut9@kB%2 B2W=Dhrg2yws@ljT*eSUUzyb1|t6czdk$(~gy*~d~-KFG9` zCf8f?Sjb>=l5-LiKd*R^jb zo#L%@f+?1i20S{^v>b#Tltn^lxgn$H>l3 z3{(R2-_bw6G4j}CZ0!!&M%j_?PIiHB+nBQw#Cagr6^NUaK!Q^<=+qM@^b@i?52t+k zQgcX~8gq!ktkl~yK@Yn3STo-C{@jur187Fp9(UoB$eZojrApvYs;@R5-mlan~i(z zj#BKYgV|Uf%Ib2UIuP|*9xYZ`;r>xhZ!vldZr`8=8h(7PsGw66pgGGJ|NO_oox#fD z&C^A@kRAYU2}uHUG|HgRpxi0=tc`S7p#S!wHTqtSG|fI0mqbkj=rvqjH874YP@-|xQS}m*f%^OvlI%R7>x+(EU*r(7^-tS zBw)}TBYNOr)in#`^}K@FSBJUh`LiVL*CueTtNL}y<6*TmJUSOI+6C7Tq2uQidUJXu z5gp++n_(Q5cG$){?V=RXSvPBIs;9C}X861P!L<5GI#OAIv1=9%0hcg*=E{fF8Q=vNPAsN zUm!z5v(k@ADAG#X%GUf0J6n^e@Q{RK=LF!qt|RG@kDJmO>qcMUGE%Zr;-05d{dCu+ z=Bu0U9^E)udNVUT0`AR6)~##}f!?5xk?DhUSAmQEMlONA<%133X>0aq%6@RGrPl3H z>ONSbY#3TYxuvCXSuj9Zf3dsyU5=|u@S-)%y|MoNm@gh|)9tb$KCldN@)U6t*aNdI z7fCx<4aSdj;7l%ua%_uq4aFE7?0x4$IB%5XB?R7teHKRAl5fp1?z8wInWpUY=zxx zGE;0`+VNgX%t0lE>5|ON8f{d99(#xsC%WQn^gl3GO(lh!%Bej13&plsQHp(`$QHC` zY?vFm&fcENeYjtA9{46=2;iT ziBgJaq8+x|4KKelSvo!1$B|Z_*sG(`9&m1ev>}0XPHEGpdoqe!b5ztUV)yLUIt@?&;wT^zF}J9JTM zRF4uMpC#ZcCa#CX1$o-mq^x&z+9$63E2lJTLQA_~hu^q?lS8a~BT2&G=8;(2N94BIqGLu9 z07v-hnjXX>XLnoo9miGVYq}5eXK-H*ia=QW<;)p5v(%B>j)HP6ODm_M)WXB^VzR{&R8V^)qC9Sbnu(MFv zKPA!jaf>87kIZQFWu&+ZBFKi|@?d67j1fz&pfD@_nry*Zm3REsznbqm7wfnX!~5cL zm=2_W|1{5?oOT=TI|wV){xXA=H-E>)O|2aLdUx}RVjH})ExotTJyldubFGs4!ZVz3 zt$Jj3zFwoV0AI4qYh&iV<~+5$JkIM7*Tuxf7T@$+cD4G4K3$;;5%JOt9e|}$o?{2z zJA?)){*yK<%c9$fljm zYQ+Ktv@CIlUZ{nVjzUkoSqM2SlaOf=eH959dZDYV54!-!u&>{pc;LI#c~%9>mESh+ zJHHL*v#J0PK?CkP030UVCV&>Rfvw703Z2JJx3LU6@H?&Rzj1n@Dw^8L6%JexS-1l; zZZxPH&%%oYV`!LVq+op4lb*pVs1y-A&XXsTDk_#98r@D{TuPI_lS~fu-DjqecsEKc z>4X#Ga#x2=DN3k#te9cgJ#sg}51V=E?7nXxS&UY=-@bKT`c%njf2Y6!xmWajpB3$| znyJ^);?s@j=<3zap6}+{qMUo~!Fz6exLW(AER(DuTL1fBvZpM}+*ZW=MWOzBi$0ls zpG8KK^HE_5H+xI{*6~KwPwsFkVVluYW_&{DWSYAx^Mh*kwHHQ94)^lb>84wCFRyv7 zyaeAZe^KFu%$jnoTo~op>rlU7k@F!y+7?bp&e3O%>9qvfaU%R2eMQYu{SIL(DjY&r zeixRq@z?jiO_PpG4}N}?@T@r{Bxt;quc3UVZlCfD| zuC9u?`rBRFMa4$MszDe*L!JL&zO!%9- zr*x_{h35hAOOw;-8nvfR?RU8kEg?KQp4vrl?WGR9AW6zMb33Kjfukj_2YWSLnpbob z(uSriCW))WOFWM^C~hFT4qdlIJ&G0`jW|@5bm^!{f@O?ylbL}}2eS&282aE*@iN(` zwKiESOtc&-C4wy+mV@ZD7TO*?@=oOH!tU(6Rfrq5{iIj&7!qBYFZ#fPKej#{mG!%# z+5W7BolY*hRrw`#mWb&B29Li`9XjcX3!jMmb(6ynKnRLW0{z*SWSd3M9z>!Qs|{cJ>jc9p6T_RZs-9rGkJ4n(>_n z5@9T5v8dzThyuE$hM17SD*AM)~j+>Gh=4v zXialscI7Mc2m9O-Oi!{ZU>K2I)^Z2-X>lS1yz6yZQ+c3Vu%V!xXK(c=2`bHPy|_ zybk(9xfSD6X;-PiOb~)j!PC6EoJ2rc zdl;O``S3wvm9w6bl{^(#+%W0pw$!+)Qt?{+JTg5gxep~NsTtKi#V^v5^u^8bz>JR# z&S79s?sjT*ZX$Od|5ew`)CkdpKW`b+IQ3%Gp_I_^l`glux7)mZeSo0`V6(!xN;317Nl+?2@j6NjajmJf|3@k4C z8|}8f?#b~&{DK6y2k%>`FsZSGfL^Y|btT*Odgpa%-uRq}NyqVh&V+_OXrx5Hm2(Yw zW7gFjG29V1`#!pIwu4mjVA-*jVmw>@#xx=I`C)~ZL&bypbzHC9Tw+9^qN7&_2h-K1 zEt(t-U!1f|5V-hIuCKtuxFD9m42QWt{fKRo>yh)gUKUNTJEK2?7A`GPDJ~i;ZuuxJ z+0p`&n|>)}8YE_k@Rl;7r_zt3<1+q~*Dkek_f9Rv-7XYWng6RA;>IjfvJx!pmx>Q` zeVP4UX$CQ;$bISWwCZUT{}?1~c231BRd{(b`ql9963flBzn0R&oSG8JFJ4&Vk}znO zHiKeX{6T?L-}@KR-zNt-za17!wFD^$ALY^K+`XkWtRsP-?ce6Id{R>rR*w1pAHX*x z)AIwsq4m)%r2&4OyR9goox8V=&vI@Z0%RRm-?CPK<=P>)T5$9F{X^nl`PgB7*IxKF z*s2_!xKB@)vJ?IO{_F9`Vix*vhy8I6z@yr3a)BH#A^qU$}Nn#$I&;n1WSAvD3@KuG9P z0w_UnC;Ps(8YPEEMmA&Kr-X_j(USoj9vbnO$WH?DLUm{y_t+5 z4}psfR?^2+JpSv!7|>ef2w&J_BL#+GIx`*o@c5vQ}ztKWU7-GsCm0i&eZ8@)8Hqwiq!3q9J~I_v!SKE z*NcgFJVG{cQmwWqNaK$4QiWttd8YK;fr0#j#(xPr{wVKyY-ayGtom2&&*4*gSBYD_ z9{carRz0Mh2I==~Xlf<D_eEyqnwPchIpJL0z3 z&?*hJt+tczD}Sf4mKi(TaJV`xswtx4oDm$OF7t=j8G-aagfm=*xj9;OA94qR$nXGj zLwEs3T&#GqoRB^*2I|}4{#qCHQt&YlFh~U{Jied)Lh?teYZA%kwPiL@ex;xAX9Ie3nAgQ~L(AJ2 zl3io`%5^^ZEB5~moqk$XWGyYd&7xq#S~S3Z)g~dw_tgsAQ|wL(vWwsArZ{Ap-adZL zR*8G{@@s(?$ZJDejM%wfm=5KhH_pa9v)^EiMDu*Z6|2;>oErh2zH8!r=9Q!NFQWbG zvr51C)on7Jm3+PZuefnMXgi!}92W=O=<;Q^! z03Y2NE%cvxK9UZICxDe8XYCF!!z8j~4m@uV=m1F)e6w8;wyAg`g_Nk%N=owZt7pnq zlza+&A+qW1oSP+?O7tZC({12dq}M8*P_Pij>PXm5ObL}W@Gz1a9ZiyJ*-Q#}z1O$) z!r|52W*y3J$?2#1XCW7=Pybcv2e%yzQLORDguo2W&N^!)Xs}%DRAz?)zDQR#&HAS5 z-$2^7p8ue9VV%S1*CCaddeS-`%Z&ImeTaAjKU$5}YHdq56sqsx#Hc~5<<`?O4PE5m zx#FN-#cUkYr(unldLcPSb!i+NHv+=J3k0=FjRfYJ65X4r^vABxQv(v+o2bF|l_L+G zsL!2yN3`5T`XP4f1-#5zah0W3STeRV%TLy3P^=__3iqCn*sd`pAZF@}p1}%wzE=~M zg;pro;b9x3DGfU%Ft)1Hsr)E5EaS`F47ufN~f*?Q-uY}1WOpG)*@qF;NX=Tv`%{%3F7N3Ena(5X-k zu+tz=*ungj(5rfIb0~ri)?)lF4hsNk4%te4)O?dZ2fo}PW^_OTb_H=&*G4U@Lq6&L zm?#Q=?f50sSU0S09;NecO7rN8@HtE^u^so@XBm`V6!O2Jou z!`}|C3vU^9oT0r`X$fi9lP;fq-TRaEM`$&@tmFaL?iY)IGksm?!Mq+vIK8Kc^8n7i z$nLHDAdZ6Whh{j=h;6d;dRww&H8|YJBRUS3u`)W5I%*QW-4}A*PcTX#4IXrfImf!Tq zk?>nVkNKHP$BG|?N9?zJRQlzj_tTWNxCm3nc$3?GmzMrHJK!;m{QRWvZO0O`_{V|gt)8f~C6tN{Q2gWcl~AUYrh4FlUqAk# z12k!iKU=C~tb{1et6tO-t-7L^Wl~_T#QS%6`(O8l1}u8|R8RxI$dXgFi$#>R!L#g7 z^T)Y10sCqxMN8)l1eRnOrIn-Spc(eN# z@MF{;Qh_q4TV+2Joy$RzCt(M0nL7l{SINV_IrL1dzAl0KrT+rxfc180WC4Pv7I*^) z_@!ZR-&xw3B}y(^2PUE_ymJ&0+&8}6C$n6<{yef#p_h23xMpv|G&K>G8U$lAO~LBR zw1xVX&zm!(%4^qIE7V|{Nz@C&D&1g}kE@r7om{&6Z9?|t5gCG4D6(2SSfwSZ5kfr? zI8MgoMR{Yg<29fjTD%A8Llj1jsfulS`aA`#*&v8ss>`(86&FGkmCJ)zpzXR3wn1cw zzz6iaH;k-@IhF^=yx@>!cA{#o` z@p-i|w2hefogVN0%{x7}`mr%t#kH~fZ4x7Tx-(0tFy*#io=y3e(#y}iQp#H)Y`HCJjAS>=uE3z*l7CM zTjLLoczJ4d)|4deeW45I_`A~|XKax6rIYu!vl4Bl2JF;YuO6KhW3ZhEu9D(^`%WCX znlNd*l&h}BbBNWV-%(VTg;MJM7+Omca4jG#1CWG;toV6cD`|VP$N;1*{2V%-*TA>$Sl*eOTA#P4Q2P_6rZdW19fj5+BtLbxqPhftsP*b@u$DV zmjEubz<-v$K}h7Vm(GC&^fg8-2neO`Y-E)k@ewl7e<(d4r1ZT?UuM1P`wZ)yS<70Q zD6GOk3cuG@{po#g(@H*e%X6V;kzVyl#_~nT@%2O$B83~+qk;(@^yBQI+wTP3otOpL zK(X0-M&09afe$!6G!)NSm=|3QC_#wW8&xXLrI>1&mBM&Fhr^0 zz051HzO`*>FXcwHPzC~i+S0_Q{#Ag zr5NHKlBzx8Y6pT_xV-FrR;k z+7oqmpV+&!r?3hmwV!F0Y^b1n&9AL?=)}(NCh%~C@de#8arclj|DZE-)?#(TXYYR0 zPUYRycx4+I>Oaz$Qkw2PcW3mP%O-PDc}DGF%%aV0C7H)j+y-y6{7pQp*>HzIT55{b zVbMnG{1-!iS`XHD4SP_j$E##KFb~?ZM2cZbknA#T>2+#w@mXs&4lt&^_6V{!r34?B zqCyA0gXu8p5r#7Zumee&nfizD4HgVrjD_smj{*>=I8HX8wc zpjtJ!qL65(SfWUVx4y|H#p!M8S@If>P$oQjFQ+St;<=0rYJ zKu0Ki!oeY8fSBaLGu3)S;3|F4#))tF@I#b<>H@eyhk+2HRxaT430U!6KLIOgP@U%3 zr?fm?({xnkBGWpFOa$oggDKzPy{m#kMUkK8cDj4^D#|j=!?tUgwkw4NJDs`9&NJJwKBn5sM16{_)^lsEzY8(#qSX9 zzur%L>>HBY4*&E0ckipWB^5)5-am;+ITBV{RkQ9{J|x0a488iB#>u3z^hTdl|IxGZ zYkK~btV;{J?{`i}+>#t-F6&(msaF~A=@0B@^f%8SZz`<=IY>~L}D#h?PR(k|76m($$$+;!i zV)f}@-|x)1kAIBs|HEYNRkVR)jn`}P_t@4849g>pLYZpeAnCRiRX})OFSE|ED}cX( zpUr&ya4I`#RD3G-sQH!65sSN$;V%rEmG4^DOQ=Oz#Gg?Gsi>xbtbqS7Y`qqkzhNou zaLu0&Su6ZHi$fgoLBoIk6-IUk6}bTQ%;99?2glxS=$Fhhc#?Uuv)SVm75TejUUiQ@ zJha|&Fa}yq` zKt_d~p(<*+y5@0A+*e{X3T%!wLz0J`Vl)7+NY3#%$!olZffOV(xw&Sqe_~bBS!+k; z`)9!ez5z6PjGM5Nh>oVextDqqd2LsZOrL9_>NMrPM%uyr9+X&1a*MWUa3}o?V$maw zA#iDCtoYqLrpfg4w=N&9D&iZTi9~q2$QV|Jy!n!Kn^vq7dbF_W)P=Fj7o;{Le|lZv zFPmt2+A3d&(6h4 z=1cMEp14Z`St~!Xo;bbH_~Fsr9|-qf{vJ2$Sb)(yT3?9fG$2c}w!B}tzDCfWKw4W> zo*f;RxGC70RTfKnemz#Uyox;-M?eKQxWB3PlXhL(h=s{g^=}i@ifb6ZtJjOY*LZ(X zPg!|h{rEE^Pwsv=sUh@x%wG21tESZNtIqzce(7~hGef_<^+JIq2%EnI@3NUIVoALi z-Zm_EvzKry9xP~3%(%~h1F^1Dm!2T~ytdq=3457owc^#0TLaQP<-Jqn3&b>O@NybR zfDbg#0l%5Q_&*Bn0Acr4?g;_mta|?Ed}&t8UmQ@G`RY={Mh%d4(sFsfExRos)b5q8 zxGg|<)p}pN*xkR*Xd!0Ue>muET&OgG;G;9R=b>!OKI(F6xWu1mJhU904Ty8Em^>^c ziiS2ekJoy-ojZZ0Yc?e;q>BtC8Q*8bNPCT!8nLP9IkG^YP*aIo-|ON|hE<}LO2T@E zPONz#l$J?fK=?`B5TM0)7<4mW*^VM@dN!RF6ks>h6jGDN}aIU*dJ<1O0 znf<3>$+hLymY#m3>s`16g)r)3tyf1i+TON}g$qW@}-f8@aO==hf;Pl32S72)s? zYTkC1`vu`~6vMgo&5!;2s7qRbA+Rmx$vBE{p0}5V-=_9j@6uZ){13?GKZ0x|^g5ey z`U6&_bcVOQLV_N$kD1Tzfyls#doHVsc#C-ha>h)NJ*_7oL68Yiu{XZoh|Ky2uzOKB3 zf<7>)Z#&MU`m%M99LuxeXXf(H&e$C%ZP(exKCypf>Dq!TknYixKdK}CS~I9XwHsm~ zcs7^di5Qc)F6dDN0v7?n2R_I9ufV|+#x^@r6mXigjxZj z4-kFe&uP-2M#b(Ij>Uy-D!XCiL}&BCD>CsJPDSoj=r})y^Rj)~ogjyn{V^rY?6_?s z_qkOl=QUj4i({A>4?=HPHzOfr2h@+!+>c z8Q#u}In8h_w-RZVt=A&K+{8$#851|2Q9WSpUhGO^^}^H6$4#6d_ULZ)0`d9`uBSuqRoYFeID+E-W~0 z=s%?s4c*tiq!)4wqC?NP#A>rGruADS=tnG0>$W*aZL}mo-Y_*!Hiw`{n_)HCaUO;{ zubfbDTv&*Dy;hGp(sb!dwx^SFP2)_eWQt+Xb-Fz!ArFJ{g7(-Q&_^AVBD;3_Cx^_9@PN$+LW)! z_-!?<3Kl_oBPby_!PXMcQssjI^A#-uwE#Bw<6A!YU=2)%gfS_B^n+LxV6lDEv@!fQ zBo0HKIv^{->)uwvPC-$P_bAiN@aF_hC~IyLbF-DU9c$nsuh>G%Mk%NW#*L!9E(#q7 zU8RxLgS~{nq-QVp^u7%@IR}x9h%nJfP5La zCgDC-L%s9~vEorB1O$+A7UEt00|?PNc2x4oa_bLpGL(SGLx{t2I*(|k?WKzoyDYKS zlmF<_#Gh=mRit6kb%F|Piv@-=ZHpwbq(Oe-=~e(47WBBsXbfI2jfx;Rh%C#^c(*h9 z!%5^s3CkY5W|N!ejasB!rCE7Wl8cgxPNe85vv;Hg3eKoDj^`$Q(RC|1YA!;3@^YBNSwAKp)B_&EK^f#F?zJ0Ufk0qF^b}xvX${`)q zW62ylrFWqr9b>cLt`8dWPwZB-mZo8CxCWXj{0`#PrlMwj?2i)2Q~FoP?;XHf6)!wB z*OYz~;?E1g*}y|0SNQS_|x{FkpHe1`L3k zZw4436TVRgfhF+4H)M><=Zby{NF74sFcuC35)OFvPG-teZ?uA~As3HuAPGt^ zKc~pB(BeTxO#Jh)>phiVyba1+dziwAOI*ygadJ1`LFdt3TVQB9fE+tH7tog54fQ4I zr{l#(AsH>IvBPRH&$Ps7C>yOb8*E{m42IO424`<$(%sZwtD4tyWKpkSUTpE6DQ%f1 zeX9}-kv+#9rWz$lVcowgVlR!UiAtCn#Q}v*R#8-`*R;zHa2vl8?V?}9tK-oI8IVhY zrlx1WWmaU=E#d^^Ig+Y>4 z=Aj+1T}Y@nhW&V6a_6#riut?Lk}m4A=rgqPDyN97Rn1A!g)+@k+7@w6jj7rtvfyV6 zc)M%M3Y0I#t9Kw9G#V+0kgVOhP#5?9R9_`Cal*1kqr0t}5s|`SnRO?JH(0VuPaUM%+6y1?Ym!pbcdYM!X39Qq*Vt2tQ;ZNte zk1S(uRk&b&yA^@BrmaF`a?@#We=AR1Gt4P949aZ=e z!ehj0a^5dJ9ekvJPI*J>GNzVeUi0PRZ}17mU!~@~2kgga z{0-_&#CC79z9C2;4$vC>9p{hvF{^{CFh8lmhsMDWmiqpqn$ywd;?o$c^O>hO0M; zWmK96k!>)tBUEcyE2@c-4vY-9LFnP&m<>+QDA-C2?{0m3U0E!C85N`Oa-K2_UagOcA%4povBl!O^*eV7wTnoXcKRp$UztCrQ%a`gY3VX=}2of_3x3 zBG}AEfjdy?NLxQgAz@1D{FP*_A8!veg>4P!^ouP<)-=m+`)5zSnu-jGnkjVEjUU0` zPC71pUQ{aTiA-6i26*kVtHrbKDP2MraQiEsEPZ?;8Kx6zr?eLrD!dfJ!HzEPQ=>6W zJs} zEeMs4Cb?GgA9V+K_v`vMp^#~Z%F3p^(uYPwlho)>m+BaMUK8a&mVak|_f<%5NVvae zUgYr7@c590+0zvL=DCmm0h!8OEqUUqd}hnxVTY8HtV5H=Z+bn$e{ZgTXl?$@!sPEa zV0<17RsIaz0DZXY4g!V%8USMgC?exep!x%rxllHR-iViWX{0q%N$WcyW-eST-vl-u63Ta9*g3F^xf9 z>cYImL*s$CY`Kkx-Z3}0BL$nTdPiyC3@IH}M7j}kNllv|7Zh9!2Lh}(0d|~nj_UO^ zw6@v_%*~-JooXM>D+C??is0mDocBq8EYvjb3TYSrP}v1f0U~pz*^g8R)$f{ZM5S?v z=S3^*ibUyaL$`VdA)GPC!0Ntvmq=~BIiBozdV9UxXLe`MTbR!YpK{FGq!T#r(DMn7 zZwBkfoi;vbQ7`;iq{|_7_sxf#uc&wWf@~Vl8yZIqDPjjae6V+!LYDUVx5`A8t7rc$ zJ~lD5;;})vDQ3^%(68S<2OZ{K0nGm-UBY7eL;Hram+{6>av5kSlq} zIC9}*fI?(cR?xa2?+Zgs0%8e>AG^fk4jx4=zBUhU`2({{s*h zz&~0CN|J1g#!;Y+&VMe=*!28)nO)|dp=oDa3IFtLrseX2d2K8{)c>?o;G&$PpuFBO zyigOI8ftOT`J@d3S@cH}JIHZ}u3=3(_25hb9EAe_S_C~XXicICOO~M2oMzDF$mc1lJ)dsIx{ba{}ko!#BMd=0}wFu7_ z^ibO=YC;)ia`GyagSd1A7H=|jNgTD-d7r7sc{wZjr=1;l>z-uHvkv?vO9|y0dUFxJ zI)vzR*tf}1yHA+!8m#C20NLonr7Q;?GF9FUq*0r%t~A=wL?diAaL>vB7=FPSYogC*>^13l@}HKNpwU z-GmlRybH|;g>~Nl%v-_LVjx>qW5NK--LPk>L|a{@2%Fly_V!RL1ZE}3Z^RF`Jdso#zfHL zr8)`rrAJIRPD(|cb$*Zt^iWHgnpZtFw20qJihiA)m;K0j)*aY+fwWo^J_LZlR~v^I zIk^3bjjvCRi-80T4tPO;v;cy|2Yrb^F=AQ3yL@u1Fi0>DFgf`0gP*<c{q;}E;zCxe$pRU~XT z*s=(U4J0E-%|`N0E_XaCs*xi(U`)+*r(yEYG!i!L@nm}wYS%bM#gNn;3WvKWNt*}5 zFpLjWSdm@@SUr<0E5JUUW+lp!l1fLWK5xvt(`L_-e zm5TmYqu}KG4(DD_yc?1@8!`}EU4FNHXNgW&dp+yr(XrV&eL?SHiiO9{l`xl+;yXp? z$|`P?=V*enD+-4GRN;3QH6_OF+`UYs;bwGoJyvH~%m-}zDU-pwq&vT9Z|o)cxJ4Di zTDp~{oJZq^Uy7+(Ni;Gqw#(p5qCKL{ zO<`@1!c)AYD@9>;K6cQ#gfL1qK3hfXO7tMc{NUY69um z$)m1?ZT9`9;`^A5o7OJ!FI_!9K5#bt&jAhk6=?qP@mzgp$ zp7vg^Og8)AXWVVrRAB}>hSu$pc1Hk!_?u+-3W~pB#ESpu;0?hazJ@UPYU9AZke7h* z)ryETk)MkKn#~^)$M`onN3k(LLjp<7%=3P6Ngl}#B0&hltYhg)aIpVay;S8fVQF|l zhQAKFI}}Dm7O9BS>=^R^Ofb9xPTlD^8}x1z2CEVCx7UEi6ax{fkH#$u*zd_+znxRu|>b7++dKibEzp|Ex~h^d3o?@l}0{$ z|9m7WgfN`Jd(D}ehiMHJd({5R=0Bi_m5K?cyJdvDL>2qz8nvmxU|5Nf$L1?>l|GFa zfBgsJ*VI^;zA_ybh7-NP}3nAbphM==1q(1YFu@zLhz@Mt5Qx~TMIW8$~z6D zXem&)D-rLu{sJfLMFwny*p881fv&mxsvsLHVDI@u^y@xWS&jC`wg5<0uFt6Y&jud0lkO**=h7MJtLeD3_sho6a{S0V}-8gm2 zwD$T5_G4XTF2vp_@9Ct~iQ%g8tJl%+eRrBEIKk3Fx{Rnpq9IZ07P9tDj<%h$a6C%g zz0_E!UN5y&OfE+oLQc>wj914)uypvUI!*26+$)`oAPBL>T>kd?_@)}yy!~5pWnYHW zN|v@lUKnl--gmPo5g+VjuPpyH^xVOt16mX1)c$E+JpB2tqr_S7=T5W{gp&%Rfn#>V z&IEOXb?PzO|wuE(cDweE%X zOP<>Kz#bj1)L@S{=*WfV$>v`l6LQMiXi08s)&NQ}ytsg(`mghIh%u|f%i&Y2uV*S9 z(sd4#oQJ*;K-d8yXR5V;$^sZ8UyU0es&b&nK1rV#_oB}D)Q!G^?>F|$hAwR`LCLkY zkTB^TJ%^*YYl><`!ZvRCq6%L}E8L6?PxG3-r^>@%fI(;+vM-b-%$^8|hnQ@^yY0u=es$4|7p1uNx+AW~;LNs~*(ZF=23-VVn4 z8xS9dD+V3fjZ{J)cBsDVhJvNfg?6n(Lf%@1Ej(eiGVD?3UFT<5SfNNwdh8`es)}QP z6Lnhin4-B3^TscI&owh+%m$RNJdb{4rqky0dSGsBa>!!eeVwV1xX3NUjUk$$D$5@0 z4{VZc;U6es0d7Z9+NwI4&m%r$d>Qq^JaVUD=ffl89BP=Av(xi0AT{qojL|zRSksqR zEWfR?O!?e%Ta3yX`zg$c+0ZH5{d>aK0pwfL+x2QWz|XSS~Bgr@?QQ>4V?824{Pi z3{F3oi17LGD3jv?U)z1S)e%;^S4Pln^)~t4PB9>e`;h695Tc6|RC7x-_1|jMvjUla zE7s+Mjc+?0j<5F80qO+#0*=q|`M*#H6!Y<8a%q4z1QtSRe9CJznf|2<(z(L6?g-gG00N zQdF^7qafKp`J^(t-cX&U9#%nR``JtRiDy8p!Y&I5i|uNN8=eu+Nc&YlKtxJH@La*0 ztc}&hz@$WFnUsqW@Aq2^Cuk-;LvL8n-+M0RhMc_&D(^=9Qz zcYwLS*xi;K<1{BI#FXtAgWh(jR*rU`tPoXO8Ag@LiqO5_HQNW=u3{BbTAzhCW|0=_J?Xs1)FT zJtp2b4mO)d;ND-rPXRefaG>+?aX9gflH~UWw_D z7;?AnhqFh*m~QF_Yve#@;8l;yuD8nW?VVrUei$nDeEK1V)VDn|uph`H7beO3T!{5` zD>*ZL)}$q<-hT9rO`!)+8W^UZgG{WGy)i>G5R_^>AK?+sy5 z1-q3Rzv?$MO~3f_eyb(kDTipm>9jEq$O5k^?3qk0ADmx z1%cB9a{*VJZ;gE&hjQ&8>htx?6QHpuKg#nBW_;}lyc;tg2UuNvYdnpQGrk4Jx6UO% zuSBjr=$V~f7&VJP9LfTGZ@ z1_6!ljCuMVVDw31{9)!DN{8yvXPj3++zkJk6#k6RNk<`rTwd}j}Nxf zbjMp2@{TXGO<5Xbm-g1vhMlrDH96 zFb92n&kVAb6Dzi&DEN-+Cpd{OD+Y?&EI(-R+EswE<+Z)n^A(rnJ^g`pGRE1NJ(sO3 z28ur8^mDU?^Vu7PNnvlGjFQ?U;zO;dU%~>Vp z!*H4nnhFx?0AGpt!mzh@AU15UA+?hGfKZixcKf_m$CBNXe|sWj`aBogpCMbGp{JwX z;xSZH^+Muz%H`%w&BowHoeUDLjpj9j==LRNv@|-4Qpuz^FN3k0=B_3RGSHrCi zw+ig!@!PAWGw|5*XOFaQ>*(Z|OfYT}iG%XX9n0v~aNm~Sc*qXnN2d}V$|n0)2Ief8 z?5uDGIyqBI&;6d;_xbrC2Pr?F)xJ%c(HmY?sB(UkpVj#qgvTD0h{&tkEbm5x%mI4Z zf2}NFpZNpu+EjrHe*m9;|Fs}dz{(1U|9#CMiAalrau57Dz>Nb5^AQ8M_5z@zj9c}h zmJe-9BjpO{O7PL8+S8k z(q*Tfs|vo>V{r3q$iHVj7gPMCN2*lLyPPLJccznc=Szkd-r(Yeu{~tzIuTQGE_{lOBxZvnicqck=x6aEam_Ei7pbMUI(LwtA8#2u7mwV? zV)Dj1B3-=^W;;vf!Fn|kJAL=$9uJ8Cef=yz$HrdGF{W8kTJILc}(zG%DI>us|yA}$}@Rb^+2=e4>6V%Lw>88BJQNd8u0C@rJ1i zae3?1rum0gemr{vmu%kJ(kdf*J{#=Rr+z^n>8;t|RPR+aE9zQv%+$Nt_^v;7*`zG; z)b6$?>&^98pUM`752t47)3pl5F?%rURyZ#IdS|OFEUxBwQ=$BX`<@4O7$kIC6$EG| zP#PZ0ZN1Pyvfv zBQqCrvPRI~D0nvPK?znIFCdr2mdH_SNfp4Wq)XT8$N_(ff@;G+`NLBw$;ON|2(-u= zq9{aD1Q`pOIQF^JY3&||ubjZR%n$r7|HQ{sH;186Wg9Mj*C25#c0b+wtk2_{M~xG^ zLY$ovuQ_R%=T)5brR=B_YIjOHRU))4m>H<;&dOiA7)(o_&{Ix!{gI9%dlV-+C-cFJ z^Y|K3_rVg41P|tQT8AF=Jeo)#j(%DA7eF$dE%YlN^w_l9_4DGjdf`L!PY~XYTbdqO9JCz2h}i=2YxN3Pn2by{WIG>` zKO(GT;T-7V_!;6m3Rlu3~5=>T$m6XKyxsHuIzZ0<3q9_D@$;W2fW84LaA*LA0PG0xZJR^A1pN8| zU>(qJaPArDN5^-&kQtBEbuAZElmey8o$)E_QtnZSW_Nl*jy5_spUoCtNsL-W5-IrW zf{m7Apn4&q9S#bdgCwpYf{C+fjMo+iW4!8iq}7L6eU^2G5V>7UtIV2*g79?hrt#0U z8lw2}P2&{1~GeCpZ(94FTx5*Ygj1wU)}eU89%%0 zbyJCPvpzp{iFVJ6`no`#x9<1rDmA7>J zbbR9qZDaz!y66`n_uC(Id`RLIy?7auyLr!ekN*fYsb8Pg*Tr*Y@HwKg;Ee{oeeb#O zbGU1vtv0t)%BuD47g;)P**J2#OJvJr`K1P+;+_K>(RaFXXy*fL<&{U4$Bf1C!P3B; zX63gfZ0OP@xj|#51(1{~|G@>Prbq6h!QR0>CoyHS?akJ901?d7v181_CiM1~)Y|Z36a!lIuI$49gh2uUnW1s8_UB}aR>}Nal5>S!2LOU{SRVs=+ z`x=rzCZ`?9+M&F-GZG^IjMHonKYvVx=mipZo(2+512W_w0aXw%4oxNjG_VAY_pSW~ zMEApELil+Pz!4la0f2en1_CA$Sv$}Wodg+=Ne6oqH;aA8u5j~!B1Z1p3?e&$m#Hoc zFSWNx(V!?niq((gywkvW$wsK@r&!AjsKk+F$%66%mubJsTaleq8t4uKdG+zyr;Se5 zqpaf6q*m0)`om9caENrC`YkX{P}5Bv3a+kqQWB4U2hQviG%Gq@en&B^TwJH$0Jqr>^WdiUx(8V;jF ztS?#t5WoI~wnr&5_#2`a)8Y!|vMGz{e3bFNOmDF55_8%tVlMWY#8yc+(K(-!fzQ$6 z?YRv*U^)Y%&IiBi7oQJ#QPZuo`>ZA*LwjNuK&5=7L4)$c4xQb~_2TAY&Cq}7c+D6@ z|5$JrRnuxRC zMdb`ETyy=#KjDZvt%zkYS%6r-^wzF;GQ1{Fqr`N)*|wWz#*VKfC%EWbtU!$*wHHpd z*1n(`VMi2mOd2wj#(#^&qj)%rKj{9^re>tB_~ney5X|?25=9ms^G3R?s)1H_7qj~W zW99pmzh30tv+MOY*^T|dw{2SgXsFm-A%&nQAV3zJr4W~E6_E#uZ(o8_zSp*ZoM-i@N3m}G zj+6zL6ZXP@gN?R}s1&b+LX8~z2KJxB+H)zK2jsd^H?h!`r|mEdJCjTpQJ8hqcrIqw zrNo`sOEzzCOG{uw^`Vpka1Pd`5&p1&E(&)RBc+2!PB+vop5T@{S!Vo+w1a|&Wm1fw z;$7-_s&}i0gVUG$g44x;m4jfC094TLz0hS9u%MYV9jxx>T9{!@LOe=0bkiYSf*%1} zr^kSD)Iho<9^ynwAPl{(R)tU zxS#e#U#93T_1n(7wlpO^0TjZfi2=}ceepiFDJG&Y^ol~o6R*iS3uUVRiSi!cbP`MM zVh-`GtiMs`Z?QJfWMAM|O@CMO!8U)TYi6d=60MVk%xkyKspVF%cqQN2JG^i9PAat* zacVxP+&R51;k6x7deb}UhR?gO-&LZN8Btyla@;|rU80s!@}CW?lilzemFO?SJgwHF zWNhDuC3uEUF||6UQW+1NYoZdjysxo5YJlxC*%!IqTC!4>QrXd+*pDQ_{yGe&m>^453dhU>!B zKRte(=8aWWqsNte)}1q5q8+5^%+cXj+M&*UJcElw-oGPM*S6>UJ&`WQmFE3p4IlCJ zkpnGHu#5&~*$fY6xinF?*&3e-l6uE4mjg&C2Vxc^2e=u)J#YscefB7gBnChlFff6P z5%BDrJpd$+L;->e5PB2rKC)c;v%Zu(4Ix(X{&ki6hqQTQRyt%radg=0z$}TtUMTS|iX}Zxs z`t*j;@Y@o#`;J9r<`nVTtUa6le$c+$ggcgEDw={tLcH;q$R=_F!sP5F&<5SlZql)J z_=ePx#;m4GpC{C-h$DCg z3nzpvKmA0F(kWGUPw6ngZgG}bsrN5bOkW1RCt={g1ArEnjh0Fy@xMOc>xBi$%6l1| zuCv;T+3=r)Pu@`!TDH$lf3QaA(+u_yFL{tXJ;Z!zXrnGz{`j;h-me=3Zjey-ez1Wp zNXsauf-kawz&rc8j#Yq&l5~ip;dRMjcg?`jIPMGTu4057mfE#8L(M#KzTwwltLIFzZR*jxm0}J$(Q2~Jtfdbo@sF95OvFRaCNcOb zg=x*Eot{>Wn{WE(1S7?qahd8OUED$-J&11qbL=~h_#$-%GMA29R;c7snEIcr$ip7c z%wkL-aZF8fV2!llqC^UD1#N{TxbI8dC=ki7EaJ4wz8mID!4b1hCAHRM#O?6#Wb+V+ z5<0UXtg!xK_CV3j@jp~o9L3VM0d}|;nyTK6yk=INKz-z*LM#(aXk(c;xGnwA2zTet zz7m^xOZWT@u-qR-FR~`WQ$v;JkgEU7rX0ZI>vY6QAP%Ti`+9fC6H5EqCHNriBAkA( zTVIcG|8|r+=s!!3($XOAAXN~^3Djc6|C#cC!2c}^0zm<%%w6>^Yf>@Avbric>J8Iv z#$)EaeweK=e1*=~EwgbVmZ&#r2d5*TyVo1#jn9d9yz5&0pdfbvAkrn0?4W6N#-D#@mwk zN-Q|y$H?#Qa464dm@#2Edm-wl)fkr}MH~)EjDfVpQ&5OR6vQn~)d75DiRQwnXGq|K z<|ayIsHt(2NYMrX?f6Ln=3Gb8Ej2#xb<+U5&Uw`yd*MRGg#^IRv;uYo_<{W2;~R?Kj1O3%q&h5H;!wPq ztGlB9S$cb3+V9r5w|v!TKC#i8MLW-3v2U4RBs)5=xW0E~K+wG3LTvp!XR~0hBIwk#WeNyT_VA1(!v5cCC1-;JH!8AT)DJFl;2*FRSuOf120H#&Ru z(w8q)Ai}yIw`tK@r-ibdrHZHF*~F&F7NlS+gOUO!;h`0;1|2(^J!DU#E+ca#zKA+#kyH z)(uCdgNv9nuW<(`m<$xHQqi%bF7r&MFrZo`TXHhKqbxuwpUr_x@D^TN zaFAJU`N9{+2>_e!TN3=+7X0^Wb)eJuUdI2n7SMzs5a9fN9kr_8R=`!@KaXdDD2gy0 z$8SfV3L=3-WPni0p(e~d*vfj4aASQHNntKAd9z~8NWWJdOA`_!SP~`scjL>tnZAdEd_TXTO_}VyO+y^ zA``|LadyQ?77bJuq|f0t4dR;F_w+^mgsfPZ$7wgA%3|}{BrrmCrKWyLt>4^uAXd+w z$@?=kRGYuCKUJ{5{V4yoJ^#lN2zpul?E$b~@U(+_!8E|b%>CO(!15pec`|#T_ztWf z0gSbXl`!y@RlNiN3_zI5S&5DcYG39BDilS?U^;Gj7Z;oBdaS{}?Bft6cdKU1cAD{i zf0TVzn2q%lZtAbgnX5^7l5%d@MpJI}?TlF3Gv~-rc%e69)Vw1~u|oyYm**!yFz+oM zL7Yf&+4Ht}m^fk*rd6cQbci?+?;K~oh@o7il2E5QE)VtIGQ#-a(7R1NdQ}6REh*^O z$*RUb46as=2*0bJ$Zz{>o=ZCAdreZy9T&`$hiFJO!?jXO&dh9hnQQ8Cxd|Wjs6p=s zrcz;T(Zx-Vp>Zze;WV8FMNy7^jGMVG9Fi^Oc(qQB`+XQ^q(}iWeL}C#yiBUZ+7J~? z2Xv#3gV>v9jYi=fuizb`{8SulEML&4d)2nT+VD!zGb@BN{QH1&Adk67t44VF(b(aD z^K5t=9iRI|&P9uRLvZ5Zuoh()kr5)ht+7hZ{g9=t_4J0?t7^7G1ko#*-x{` zN4`vM+b`&Lb6_j}zXu>JwSd2Gx)FHE5ZD=kG(#F+!t&Fb9ONL%0Cy3V;5-v!-^Fj)P3@p4-mM5kG~@}_;?WUmA6SBuK7$fo7D z<8OQVwFlj#!hgILwDH;PX1q=-A>rlY)cw<}X5~}$EsMuAT9^6*Jdz*%KkU7CSQA^@ zFFXkm2!s|o0tN_Gnl!2I&_ievdKD0)_adO~Cm=4YXyr0NEwqevGK-GCs7 z9l^GX-x}O|KhJyK_q(q1UEiPQtZOo5t;x(<_x-!ey4RW+qwPEbwAa@U9IfdoANC^f zN}xK;xY;M6|3zE}!$Y{`T@; zm~Y>@owU=+p!W3GgPL#r^B;`8I88{!gZQda6Vx>kkE+~i7Q^>9F8P!!amU;2?f8~- zhz`oxKYM1z`}-Hr`6f1GYniRSt~DDLUTGc7Hj0*`^X3-L z__+zRmr)ET41y8d`3=qQfqJ$NZPX{QR9DO{o+~i3|>DU%@`s z^k=f=ydKQWQtu-N+YEf!>GuO&FNwGh<@?eUx2-8o)B;;U%RcInjrXHQg|CMvWq?LgxUOY zF^o} zWom7x)x-aeiygm50Do__?DSu#`Cm5&HsEiY@b~ zQ)T%^9wC+0mvyYizMhXg`{tA8IJ*9G%9c{)iBYYGZBrNS3pU)8_~mK#lb`d*RAM|WHlX!V3YllJO9mdr-?K-#WcNzO&yt!kA#h5zC$*a5p@gQ&vhJ=p8s)1Lqj2< zrjhOkvp#41 zN04?xX=A`6=cmklpKEg(W_0>TmJTbN(-3j-nxfA?;XhOB+gysxSlmhN4eL=V+ka*A z^H!VOyVq_}=ntzC#rbc4zwcfvYP_8zX7ghI91suq2# z!<=uv#^J4Zg7FK=Sq18aQB$?~KBxxU2oa$$Exz+2cJ%i~DJNIb3>HpQ&EGKBlBvw) zQ4mj|>q`!BcWsoFx^FulkF>gGq=?Nu^HLU5fWM@;o-Lht{`E$zL7B5|K)8Hn!}Fk| zQ#$Ws>u}+Q_}-M0%TpOP1JZd%1Xi!{8<~u{?wQ`XF_jAII~23wY<2%J*vBc<{H}O zvVoPBzM^(rj>^V%1NT*V6(&0^LmFSk1b&HxdE6y|IrfSYe2vkJ%W}0e*Gh|-HD6LP zaXORe_!v=8Nt0@^l2P<-XSYIII!F1HtW%{i8X%5l5Me)^f zN$<^s6Jmn-&B;y)5QH&=L2<_(ojJ2 z;fre%JpNl25p)pnyOp!7*DWHT@x zC6&_&y5*_sht9i>{~CPFD7@2I`{lhr#MeQsqmKegRo8g;uj67}=AX{L9^rBfaHI{| zYof7*NY!ynZI|p_vmW^!qo^__+3={vJcDhs%}8Spb4=OhRZ!07oIhj1ZiIz;1&7n;o zxCM`?=qdE88!~S;wn=pFiAC@Bzc3G&LFkq_i|0%W<8D}pJ@}WgV>SMFjJkik*2D?S z9=fr{d%swe)OS6~Cx;WB9Qj`o4n8`7H*jKze-%+6#D6^xIcYY_`{zr$JR5Vg5;^|e zUe2*&0fV)2^md(lgAII*#ooK_9M-iLw5%d6^&5vzL~Y+>I9aG^f?ZE}boE-b=3Tur z9&M7(ia)mp?cH%GY%rfNOua)=s{hx}`!i>nYkJ;fau=j&D>Tl|+uInwf1wf~VY$DM zP&(Pw_$WH=)Ga^UPHsMazNco_%wVc%^0nz@(agtXaqWgHDe^@wp`}~@Ivcn@LcZ`U zQ?hN2zN|3&!Yt+jmu2HC_E&236Qk#Ve8r@s0P&G3720h$B3i-{rX6!5Na$f%8!7Yk z*b*~R-~ zQEAuIvpY|Ewr0mmyJwq2^sG2UkG9___h4+OfIv^6T-b4S)_T&hb;ZeK@wdcEbKg!?h-_$_WeH(DC6 z@l_Enr%U6u_tS4D@+6cV+cu%oBq?r>=JBn`=UJ|BPAPDWX!+T?QpfILP$vgP3yu2~ zq10+!R(7uAbWDjd1A^d?V!=EgK77I0dqfLS!O?$3lF_=n^rL>*-NUqZYT3NSDGKjt zduV}a5f>f&qY7o}4IK1i`AahXp5B0iP?Z13`mf@GTwiXd%S`F&iq~iVbX5I_>2+fJ zKXqrqnZk$m1 zQpWVNo#Txx$#ds!svakuJnz>Edp&;+^SR{o>TH{EWhO)nk5xp}wxBtHXx-uxoKpefk~MfaT(cG3r}Nx*tmK=yduC?6vxZcDP=6tX$Oq$`G zaFx*&I(Qy@BU?qMej?vV`19wxmTDP}WP7Jh-qHY8D^#_l%-+pUk6v^YL{ zcovH4?kUNBTo~;#y;`QRQZiW;+wNhGZ;_NEF`rLQQvqMcj+6<_edKs!!c5ADx_!Gc zIcAN^mPR~RXqIHvKaMG|z?6EQOl@x&2^%Y@rZJze5;3e%`&=4Le`BO}#IRbmE{QbK zjn?8M8FgFtZ8psp3ukJ(=J+a}NS>~vVWV9HJ1V%NKa@FZZ>-mR)uBg{@Q;L#`S3Do zMH0_@n5t;z1XD)7qDJiEm?ve>&&S?Wo%FfZf;`q3wP8ZJ9J@#?+tasMOJ{jv#;tO^ zfA4c7zL3Z^WolT?laM!zO;VF$x^+W?_`E08W@XB?`ms+|?9H%^dcUDnHKAb+Sp98B2>5LVr#%p&5%M)7D^0Xc`1Y8$ zAr(vKAd4C1;m!MwbA_@E@R6$YRCq>1#n(;Md1>|EUM>{QTc3-#v?i?fvO98WRfXrP zl+A#AfTzKGmsu(PGy&D6MbUlKh<_Ep8wXF1+&W=WzmM zs4%>0-nje`)TJF$`PAsgv)?jh!coytiH|p&{3=D^Sj4#bW)}Ch|Ga=TD7R0m+~e1o(67RJZ_#xQ23;`1Hvy< ze}UPiaX-RhlxFaSh5R3q8*buaGaBWyu@E zl4|0N<^!rHHE)zz?Kt}<9^fLgcC<;{>{wZ)>yzg31BZ7&Db*s@E%0r6kGqq=~-MEXPI4U40J9aoq3Zw z;_PQBLzT)_MB25OA!fJD&hm6gRjbJTuA=!6I@_6VY~vFgxXJS3ig1V6Sgs}L<%Q93 zc&T-3D>~mu>bG>6m~JUOiQ1U%vz6>pG|F={5PrE!(Fl+LBUe}V^cb=hGs#HW)(P~ zNI;16Z(jX-tIP=eRl>n|#=kS+J=}ji{Bd;T`(rlWP@kyiNMElIcrVO5kWEDmr;LMl zbaj;s0-~c%`bHXtg@lKN`i4g1G?fg)g2N(h!@W-XDjE642AuXajr59FG6@Kd_Kj3B z3HFNiHS#?j=HshG^bPRykH#HSR#8*3@D25g_6PgwYD!jKA-+mZO13fH(edHFN@gKm ze!fa}@UD}!ccAa-XeGx0pM#TC)J~lEt8OgB^w$Fj&}N82{oe5E$2j-@)TIBL_kRKf zD33210-%GT`tPT72LHCR|0lHk|F-czDPWDnKslG*as78Krh4q(TFgja256(;Sde3P z2CV#WvPwo|px%r_fu;%w^#i*njZBQfPXC>OQ`G?Hkx!gZG6@Tf2E_+|SJ8qqm0Y}x zym6|^KTd#83GfZ}iE?#6apG?ks;UAC|5BlUsXmSW&}9)UYAR}qDq0$v$JN1!%DDeH z)HAqGS5y#A6*dp#|F7!%`_lKf5W7AwFuq_M79_;?C(tm2VP6MwczF0l`2}zS{GyVg2T4*w22K+4${_u(7eN01y}sep<+Nz%q?Gg5B{La!pp}mASi^Fl#-T_ zl~YsKIIgLsZD?d{VoEe4fma+jI667Ic%Sz1J>%ye5EvO19TR&tEaf|DFso4Bz8PAW3O8iw* z)xWB0@ONFw|E;nS|I{||pX%oRQ{VFc%?hvmQ{(sluJZR~2m`2(;6idiKS853h(tN4 zI#wSCwnPMu6Gef{zzg34}xcvjhLE1zg5M7|0W57!vS_%G~c+ z!ieHZGDJ`qP@a1bISYY<$;Y7fqk*WuT8i$65J?A3207rA4Gt#>(AHJJZ5)i7EWk`q zJ&1VFoL$rbwe@j`gEqn>*ds8}KnwMmDqse1_6tDzs8u0ABfwQ6XgCcLqmKKhBEVHR z%^1MEdobnzw;OIXxMd;^0trQd=?W>sy*y8d%79A5aRMrU67Yi|y;=h4Ya$7N;0VwV z6gZO+qV|JI0E_|_a{`_-0vecrf54E2aNzcyXqem(1c1s2VKXH#6g$Fv)BDzwYZMN# z7BkNeV+_BFtBV)Y{Hw<)8au|kQ!hBNbPXllF@{RgyQV^{;AWf5D&HMnyI&nhkHj-IIt=k{|#BKp2jS zBtQT`FvTo6VG@cG^;E~8c)torH&H$@nE{`O;xvPw2tJ??0`MOKD?kxu{9icbAn4;@ z!UEC((i~6+0;E%;KmZFUtN;c9L7<8WdhRajKFSx7uMvcqH>kAk-}=zNl+#l@ z6;Fw{h!X%X0MJ1|<2YDyVVwgAoyC~|>k9~%L^#+$fP8p}03mRx0CGOqF$gFVW*-%O z3J^R3Q{g2&HNqgXBo#-2V~}GMAa#j21P)vm2WSg{dss1`DA11t(-zPYkwgR^k|Qz@ z0giAGBNgI8!WlriAO<2QxY-_HJj`Ge1*`xkIKj>V%>mt+bB|6oogfJ~Yk%9Uk@%Er zW7d{!(0sAnV9QaNGQF(1u=LsE36sjy$A;XBN=QLHp%ohMsZ9AT z3F%wXRfuR(VB2DA_oO_-;9G%mNtucKZek_AoQd(zg(vp3^KC76*_illGzSE^t!MoS zNy=A_zoOw_VW;3xq68o>u;T7VA%K*t5Fv^*3ev|=VXPxCsR_C;@Q484VpMbRCkV;x zLj_PK04*p_0TA~D$_;*Z*ch-(Nq95^Ok{Y0AW;fffrK=K5IC@f%rwQs3DSUQ$0!3h zafpb4g_(-ihqb&XDA$9y$G5>24}LIafrWyN)UhN274Hdhq!AP~PoV73>x~PV8dJhM z-Q%6lV&Y2n$eN)wCYU+uQ=uzbdmG2lQ1sfVp`GxzQg1f0ewT7*SrR5*U`ZKLe4eiq z5p+@YURPDqbyY*d%)xa{Uu1{Os)t>twz{xy4}UQM-Q%R&CH}QN!*vW>(-QB0t1(Z& zhv9*?%au2BB=cQ~*^V4XD&h0N%Jl0t%J`!3`MVhNd-_JYnVmnD>J?*f{Sio^WMK4s6Q567kvOmK#LhOfT0cQe905xPxBEWpWq4B__ zqoF8(Yp6sV7X_~j74hZ-2xElI7r>N<#L^K)Q95R*KHr@45PfxDus~wPdx?RIo>=UN zy(}fmT%^wNSA5` zAz+%)I4pJnXW}7<#(3oWqj8!(z7bsF#sLLsL?g<-p^b&G%=riu1?XFdnqcIa3z?xP zLjSK3Kjr3Dtt^sgNq=lXlO2QwONP(qM~4` z0#-0UsKguKOaNa9C=CLT9AGFZz!gH^fb$1fj)fS}5;(;AdPOCZ^omo|muh#X8pQ$7qi<}l2NPchCuC%DZ*3zX`bJ=COtmd*! z{ov{&C!tmw!HrfG)|BL`$IABvl8B1DCdpN&Sj|+v#jJ^34XnSFkFZk+KXo|zvS@~R zOTk&u5426xCUBlq@&9W8&m@Eti;LV&t{hF|-u#!Qe|gBBx9Z)mIJy?B*q_xb?Vd0ve_8OR=nN+^ z+EJuwIrnA6x4S(#UXV!4tWf5n&51uGPq~{4L++11zm)qdT4eU>v+zm8S&s|&CdGU6 z+SVSzIm%G4p?P(OUZ|E(8cF71@jyNzb@4iQr&0b$z^_|zlTv8kNX6<1@^x8dwhg6c zXMLydOdeC~Dz#r$XIGaCiavTR$ZB2njbIA#>=pIVjjtG+=aG5O^uwQ;G#-&U_lVCo zAcCZDZmDfcxO#|t6W87Q%xrCJ%iyD4^~4+HE~N{mO@-kSa@tnCbit;fSHqC_%Q811 zCI3|DE_AE0%uQK54p##63xVRM^ovmSH4$kv!B;5eGNK-5KMp!zH|&E#y9qcnN*PiU zwFVCR#@-kbs72`FBh$%^I4IupyUpAKhti zZ-4CZCUnfS4~m=aO`q+(?1VY=U<*pXDT+p?<|kkd-Mg1A#HuX7nwG`L5eId}4`35) z-W_t}urNJ@ui1UbR(5r>fcYu`LbxF`r6I*#l2KjR9@IO7_pob(8I!cG@q8 z9jiZoBGPr#vn$f>FdOF0xt*#`V!^$0Gslnfn@dVmpAeLdWwC3go9(iuT?%%kWtr(c z475-TsLK$(8eK;6X;J=c`7XEKF-B~{f+R_&^fsAhFH}E_sx-_VsZniHZSgIRyv^0o z{w}{;WizUvJEzv8TGS%&GB2HJlg(1eo$sNmdG2r;|HT)lSie~3Q?$Xicmfy3NTnO6 zn%L%g({3V|f(_c{m7D3t300<*0kLMxvU^CM-(Ed`A|4pQ65&zjao(Z$4a2E-H)~&Q zUA}N$M@qsri%|U`jFUaTQ7S{8;-&&bim7Cb86R=fseL z{N?Z<3GWp%s#Yd4LNwq`KuE#>4v8aTsVFQ33kn#3 zT0l_{lDHo_8o1CHtO<+^2#~Z0lh&)^!>*|kyfdvgyQw;op{DA1H~9e z>Rh$cdekDLgX_F*xfxMQ8*=!xN{=e?%4O@zPq_8529Y$%Y@NfX3p&>cCnK*7ZKWMS zj1;f=p5XTzID1o_Y~9us`RwgfXC6;c$&`7P4b@%5ex%P{*oz^Wice4DJlkru_`)#sBb5+vO7==V{UmJx>ho?{Dm`$=} z)M9HAFqpwekM?u+fr@0m{B-E^qlKT_$6o~|eAJCXQS^fHUM%?}Bhlj!#7WAh_tjvs zHW+>x_|jZhDPe4SX$0M|oE4JY`(mmjP_t1=#r1;wV%;<2@PrqJf(Jb(EI~*RQ)mdh zB|HKN_f~;@7IlWf0Etl$xZQ`Qg|Lqg_o0T4e4n3$u%v02v9R#~YlI8}Y~V$}h-!s0 z!zYVRIw7CAw6nfQ)G2W zniO4z&$e-QjdO6lq2UU}hHA?D_BlUnxYscKdWq12opW1c=7en?ipmK55Hx39=g$#; zdfPrZ5n0Yl_dk}msFGkK^4QWLE{idMJpO`HxRQrj<7{s`FH_}Q&SxOq+@x~Groz!u z=B-YUTCw+(oJ{D%(=o>&^9N=ocNWyX8fQcox9yT&i_{vb`}A7d2h%?=Z`0dzqbG@H znnHU@KXlFAJ!>74oF+NrE8J=4;HF_yWN&_U*>1j}pd*v+s{Br^upNAGuB^-2)Gti< zL6pwkYl#rvf|4tbBqO;x3(3aM(rA_DI2M1s`7rH z((qWSMUy)0?vg%n(d70om@pmThHFerW3%`bv>{U#$I8i3&nkMm{rvmO5q@Q+b%}Q> zXT?OR5D_N`I&yG41wt<&p?v|;dFsMpCWR$rORel(!^K6}c(PD!n2>LAIMC{Y5Q3^( z_E6r3;I4}t`G!D3RGZ0aXc7PBN>#2$N1W%bwieAQ%W+4h1 z2Qk9_FscF#%mEdMw;BcJFNJ1?0@wrH3*Z}|D6oZqtq16I0Iuuf5Hd*w*uw+(K!7VS zfHPK;2%sE7o}{=k4BhsJ*eqS;?7ub&zsh0k43F1=5LX(!94@Qpo-KGF^oUo{zoIEc zuAvZ8#f#Lhu&6x47<6J9OxtiQDjekGwJGTm3NxwZ&DT+K z?FBu-w;r6vcV^2(Hv3OyTShofO5Le?M~nV+buQ{sZo0}A%U(rpLV%kKcYgF6nHE&$j4oO zWL+y$*LB6VYqz2!_#^xD?UJLlq>Pia{i6FBIg;52xlpqCw?~{p`M=selJP}FHj69bzg=>6MdF*J8=yk89Kqb$VcGstujT%%O&r6fT zm{?w=kvMYq%rg&3-Wu&&dli#ZQR%tk+HRXIKlAMUNb9WTBaPXM8xO=od5kwcC6%J@ zj+{Ddc6V@)X0mo~OL1`NeP`*A@UB&RoVVNL>C-60$jQ9ddU7g7UQe>yC6*HGDtaN! z4=r@XfqLCz3LDX?Nh-=84#(MUTs8tx7<^(f0))Fb$HfwnAk4^&r>HSNo}wVW0z6YB z=q?~INzOn40aul({}OZ_TG2kxfo)>=zBoxEVB0AmdVrz;SeeNrodmmySqCZxp@~l> za_VuW1HEt%$6@{vfJnrm04iZ%A#k4g`Z#bh0(ic#2EtN+*#haH(}98u1n`VpN#|eT zAjO)d4(*cl6uIyO+l^=<;w-k7q41j(n^R9z{t>nJp{wujn{rNcH|*P@9Lo7^<7DJ7 z^0Zu0XIuKCPyKar)9Q-{KQXPj>`l36)d%$I9IYLYlcBfAdTE+uWH<$IJnG7pi*vs; z`7nLI!;L(`+I-EB-&cZ6k3PZ&f7;8Z>_iZHjWw*K0KfcJXZoa6JmQw^`;;wf ze_uqQn9Lj#zCWur94mU2F`0P)2YN*qToQrpVB~>@B0`8_z;6I8yfP!`8sQO1K#ffz zoeu^aP^tPLSOo_PQ{zDd7K@;P@Vvf!Dq($~X|+v~nq&<$JldgsxQae4a;@NJ}6Bah{+M2LEwiQIO6XlQbGcC+66zV8>~ z#0 zhuJ}=WdnYP<{5JIX1bpahvS_Fx%pd6JIBv>kN8ig*pu&Qd5s2U_Vqk-C)?BG?W*or zM;KKeHr-ROwtsEDNouMV;fbhiyD#Q5*H-toT*hI#cq(mtt0nYBr!Bcub1rfLUsB|k zZm(&wTm0xsU+_bFcUJ-1h}{lzjxUvV3I&_yq$%FYE3w4S+i5|rF3YOM4@?@P=bvdV zb*feKq7`nn6fl?eH0R}gFzwPJC%8+kq0cH{of1+VEbueOu1yN8Cj*<+#_?=vI#^2gI=&a60J1mwu$GeUCs9g~3Ea=PU;X4x0(6wf_#rRve z#9-s5RejKWX=XBATK=^VEjZidg1hOVF^@l=+E?NqdCm3O&}}C4*3Zs_a+o#UM1B3e zQchi1N9AnX1#R1;Uvnqc)#_uKp0|JYwb>%YY2A|Ld}B-N%fB^s%}JO}wkeRyEF}AB zjI^~Zq?DSPB;|EJ`pIL8J^pxoUX0{|dGs@Le=CQ`lX+9|@fkbA_O(JS=SHu-coAx2 zd(q0whkev;Cn%_dyd)E0jP`H5eNO(9Xvk4g$iuOGdyWL4$CIZ>52!QdJ?>oo3Dy=q^f`&IP0+Qda?rrWI8C3kDgEqlQ$nMR#S zbq`d0)e!C`5tC+9=yns@=&#C@N+53~$yr-(=> zAC*cN_+gQN2@*evMR6F38vzU;a1dA+sYn7m`~WlzSO##g(+q%t0x&^QAQ&ASxPu?Q z%nX7k&>jk6YE0aK$}Zx47_ou?awR@F#>XGYO(pM~n4kSfDGaOnoUz^>FuJILZM*32 zVlR?FyY|MM$RSOrs;G1WbMuXf||e^ndd7%^)a*J`;bH4?C* zejoGtOycd~=X%!O+hYt5Op_zxdWWn!hOQ2;w=VSrU@f2kM1Hg(+cw7V=f@q6w;=v#Zvricz5dZ+C3g7^%6cA;Bc~8WFKfvR= z@K_TPRt!`i+E_6D-k$`_%SjNH$0q^48&P0pA1r3ldDGw~03%owgrW+9+JF^|IcA^| z1R=D=VH`wT^7)uA^!yn>1be%qf6VkdLk`clSLM$V?K9sH_vuzby>Qn!Vq$<(4_!;-G{a-fp1bI_ zf}N=7cO^wL^c+vN(eZetS_SWh3zOSh;;QqN$*$PCN$;iC6xqg)c^8MJi!OFMnAP|x zZPr&R=E*^fRjE)vuoEK$z-$U&K41jU4W6)_fcnvZ)gZnC@*%M@Yl~gD=)hWVkAa~0 z#-1S()&op5B2XJ`V<5%?gMx%lBEYb~8h}uVK5PaMI0!|vLETWJ5yVimgDeP?V9ihz za7P?7q{j*1K!D8f5Yr7Dgl7g(I0#7`^MUXjIOUR5(#~QEZ{d{jmdMEo3imC!nc4^C zE-NnCigW5Ne>V9Q4sVhz81!8)4=3aXL@c*%M0B*g65ytvQ1I|*xBHUf_saCACa*=P zdn?}l+jN3T?M-x$=Zk)~ca?!kE)p{fj!ozwucpMZG=V0|j{?=Ra!pk;V`C3GFnzab ze4g&l-^5>&T3nbg<6_aS${DWmI#hJO-b>}o@*{x^*_Uj;cg3ii=p0{;(KIS|sK1}9 zC$s%+y$O7)E&aeSY@55hOV*dTBaSU}Gm4?I~Mo~93jjWP={ z>*Ik@0_Xz$dBR^P`w`%QK=J|b1PEgoKN^l6*~Tik&j=mnAT2 z8Yoqo^5bY(ez^eXmY#f;TC{dotE7o0YbF=t$kRhBrYDoOywmEwWXqvN7kK(*%MD$% z)##_aL$*U&g;M0B6t5Wd4VH0)RwOCd&FRgy#8fA{=gG8_!^j~b+IqgSQdOT{*w$w^ z%YMw=Y1{#i>Z+7uy|*sCq*Yr_BZrIPJUa%D62vd<60r)CLjjQ?8$pQZ787p2>ec9DFTDVmk1)-M2A4 zbKxS@>!y_XBF(<9NrSK791ZZxo$YKdifCB7!~ZIxUQ>sh5Ho5QDzz!4omvs{d&^S5S74;2EY^0lNf&q0k?>PPX?|L4mNH-mV+KT z=*t1qrvmA5x4w{!847r!c&@~?S)E)H-rqdE&{Jz#k~`JA{?b?f>Q0d@eahm)qB2wc zQ>%uj`C1$0W^J>zvXFVPQJ_NhA>N|<$rY3`t<;OpsxC!AAF2u%W}W42SWC-ztj+KV z16zWeZL@w=@hAN{EK{b&r_k0odg~b3-%&@mQIuw}M-O!sa$+ql&}`&3C@yI+Ua)FD zA)%|yYE|QYEH=h$OrLDDpU|l{YH55wba^qR^3rdrPRU$z)o)|IFrgcUkNRJnZTT#B zoss^au~8@|^{FelT4uOfv$N#RSo0kgdd=(3BiokmnCXi>#Wyw|b|p+aY|NK-Q!`m*=pW08)#V;*D2tpCug_VLyH@fZL8MiifY_9dzZ7`EO_q^=BLA> z1vbAKX3jp#HB6hMCI8+z&ZRD*Mtk{_aI#;mgUf5>6@w2emgBcvdtW}aoX?x|WlzX@ zQPbXEQt~xd;I;+s#rv3$*Q&y~>dsZRDkdpfQ-NG>)b)7RguW!xNhe6D&S#lc^H(aR z@6WvNo@;HARcl>Tk;%PLl&P4rlUIM6WO+D{Gi_|;>^^Rdr}ISrT8O-d#!Ly$r*ctc{On@~prpyp0+KS?Qrj-wcKxJ|a^HxNH0N;?MrOw!_}%UxE92UE@(JYqb2cH@GTNdgAW3Ms;6 zF?ds_AvP1`G!^?evExz$QjCH26%$YGmoBqh^{zUamhG%(9H=K}{Jxsf{diq@;H6O~ zNs{2Gb99Dgukuu;gk}AqES6HjvgKLEL2}mh{LD*H&-LqDJH8UoaN@l z$rRC{sSfSX;u(>t3;7bLva`X{u9B`o;;mtZLS?~SZf~XeJ~^+fxHwPOhE`%CpSES6 z4$iZQd=V>8A5|2z{`sT3jQ;Rx6OAd`GyIwF<5m_QR684YdsP>dKCH5BFV}3oUFF@| z8ug`?_{Egbqeg}1>LvZg>b!GmyOywY<+F=(8P*PbXFlt^y?MhyTJklf;89N+`JJAd zFu&K>b@R9M@yUhw&*H_6-I<*q*VW&h&#;MbUlWR5(bDdy>4@#T|MgC2@du%@n!uw2 z!VS$j7h=jAvi`TA4HeqeCcq1Fz7Ll=Jn`$#(QUo3f-je=BUeMxcA9?>Y*-rGi zUBe6Oqt~<+=<~@Rmr}iC?_tF4%VP>+LhO}LTB2f)%sR%6)~$9+&3r>YB<--1G|%}Y zh~4-JeY-<(@mz1s??q3Nu9(+X<-ODMSPU8dgcp((5-`>&&Zl>-$H(}o(_5F1+57*3 zl@K{<&c>6C5h(|6%QY9-@}A3(W+R_5sVe(mT7TR&+QmWwOKOz45+(x%ukCW9&s3d} z58X62VB2iZ846vTl$w8ZRw2%o{i2C=Eswi@fHG#ib8-xJOvgLxCe!TAe1sA7*9IbV>s|<0OmRMrKzxMLxtxD9|rSBKquseIlbQk~IZ427a1!AVG29^rGc%P1F{@~t$>9wFFHaTB zk(N(>I_VMq%hCdUW3i0QVf5wUZdJW=BRg)s#%s)uU8V8IrD?YcW;Zr_SF$JwcYICm zCKmD1xMlXCyI)=s_gJ=&H3>zdOjlfX31_O3;4;3ndv!-%|Ed(i1D<1NFea`&Nmp{(#;P+3QCa*?^kgXDm^=5BZ zo98>WqMNjN-7oA9UhoZA`%*C1>b5B@{k-dwqt}Z_Gr{{$Blx5>>l6(Z&TU3 zLRatCM9I_S&iAjmIx8d-&20zeQ)l~072HChk=n6#yDR5e-uUZwpAKeUnxkE8Tzu@G z9%3o|#_O=N0s45u)yJJpr22@6%`uZ%5${*O;=B0*qP;i%2zZ?|sa3%`d(B!pBcI=C z?;S6fHZ`;{zOmV_UX^ZjyZC4FoAdV!6!!!Cx3qLmgOS#fEu~{_rGfM=FSQSeubjRK zX%_CnXMi{Mi29x&ejrD5Px0|MY?;)ES?O$rOcV(8gh=*^CO6wS?tH5o?N|-n8VhuK z&Ao>&h{60M9kFEhN=KzF(cgH&W_YCAwAeL&uIf4d`p=tt5<)RS0dW&grq1oD-RLl1 zP?N3NGe~jPempf3_le1hR*=xWkem}D(-UFk(XM4e;)={~f8H)(rf&T7n#)_I3-8Z- zJMH0UV>2po%ivG0oYV68A6QQIWQJk7=*ep@!FO2#blZAk?#YgKh~$Kfve8Wox9jNK z#u=Omw1^ii@mpgJ75xw+;Q;zzCK~5~7=YC@2p~~_NH79{fDj-<)OmZiABn~>bAo6B z2mWA04uMO8yNV!mH2vU5=m=FoG)e^j0|Jt7#!SUipu^J#Q3E3+L_pvxAS8iB@zq3% z#(F2T@6i`BS73U1swAj6SMzqA(wQ9lr<<au9^zO3$CTkh7i$;St;9zC8G}lGJQoRTLy#tHX(V{S$f*VmvWq0I3%YpLLaht z?IsPma}MoTVt7i7YgDyZC$9(wyq@swRcyFl$1eI($wkk2r_g}aLeyO5o~F0EVoH<4 z^PJIE?c39#Pv~!+wp|MdlX+D6gkLhRDgUM~bL?xWTT+@vPpY*~NJP0b({0aH7223$ zNc_B>U*9CYHsP(c|7a$5Z8j^=u`s0|^-lWR5{c02g?Gmv@VnU9dGCO??ma?R3#pd6 zT-)r!acF#tSbWYF#9H;e(9N`v`V+0nySvpqe*_efyRw~o zB0Oq8A$8@d-pR?%JK1a;5504u(w%a(;QbfP`-`*p#@C-^m0QQ$t@z?Ls5I^r95d=B zG+MBtBkp^zwD4_}=+~8EN0qMnN_#J3FV%7(*&XvU(|3aS$r;I?ihmU=*A5hGwrVo^ znEZR*%G!3Va34N1@JUa&+1;Qs_GbkLW_K1HuFHPORc`2;ShuUnKix9dBzW4KtG72V zdd=$aPY%DGw26ErbQy1%zU=7{4dv#n`ptCVF=f~^n~h-5hN6kBnp8buC_F<_6~Pg zlrge)R;bU*y7~)$_L3|2`!?$+L|jc^E6-(_X5(A@nolQJMP#mo2fmEeI--|;s2mDw z*>+LiI~O%Cy+4hrG@Y9EE6tHLY=wD{4BM6aJ(6vg6)d9?#C&Kibi9 zc*}?mU(PkDF7DBtqO|vE3%R3gE20-UW~=kXq$L!FuNv^|bK99IKIxzZ6^x~3Awx%) zc2E)5B&JY)Oi_s#xyL#nHA^>YtTG7|GsmB&48wO$&zDB?*?Pz7F@C zU$C*$xMXbf^TOz#_R$x2`3=vXb-YVdIII5y(e3e%bKdnKdpyP2>tJK3IKqFHx|@y5 zv!kAEzH%I^sP1BxaIwNd>n{a%MUD65I+;wXdnUL0R{PD0s&*}6QW71W^tAka-T7me zf@ypEZij1c>bIbznVk>24t>u45nWzA)`;;Z9zRyJEVMBd0>#J7DSYfz4%V%4NwaQ5g8WRCGJi44uv;2YSi(_0>IUR!flpdH)3n$)mzo&2g$ zHXyY)QzG~yUvjhn!$+oBWJKeBBsDjOCTiX*Wcn+3%!+KIXjL*==Y2ZJ;LhM{3mN&^ z7-_uy!EKGH^gw@{VX4`%gqgIZEB>Cv*jjdIQQfHzk{-jRwv^&I*N5dTLx-Ihb1$a_ z|JRYH2K#ryMeNA2WPa8)3;cd?o|1YUJ!hiBJ^c0bBVjeu;qE)nh5rM^jlCVO={B{& zt27FXheQo6z2_J37)#N&ZZ0y470FcW&%Lr^>2x_uJ}u>`Mj8@gjDBGR&p~AfP;ub> zNijQk1$N#^|CcrjC`ZuAfhgSgm1EOBsY%j>K;WDN8Dp~y$^XZ&BqIrk4Oo#VMzjhP z8R3v8)z^5y%1lNO7hhbFU+HINw8rOv$ff7Xzri2=eU^9&_Q$h#Gu6e(?t_$YE#Z9$ z7t88{4&{ha>J?>M(6ENYvki@|aydB~Aguen^kmzbOK#`4Xoj!r%)85tRLw6mwd<2B zDhqQ56+$V0k;>Uf3Y8UfpB(zR;zggzX;>}$4WrOL>uPz|J#9C)p@3St^HhWr^<|W~ zN6O@a_o3ev7O59O1xMBuc7byvm%QF@9FNxjX2+c@UJd$lUD{##9gzsPa z&9dsr#@6t02<3ND>D`p6G984?p5ajz87HG}Nj2)D?@|YaO@xe~>jtW^+C|k&kAJiZfqjKJPkjS;A%Flsa5! zk&C8Xt(mA=GN>s1`BJoCS%+O;Yoof=);7P(YyNbKPviQ$X}?R{Y+aSeEm`labpGle%iZ%LZV8>VD?G&aFCc!oxS!%oUqjGR&Ux1l+JT)}(q0(?5?~b-tb0rTp?# z^N>@8QE_uYlN+S+5MpsV@yY;KGV~=AgFQhhJupx~7Yfk0aAt^yQJKSWauw8(!u~Tm z34@bffXw&96PpxdF&>;GuNc{GI04CsT$I7fE!a*{-XPk{Xd(fX!o8zw;i5Jk#^`Gw zDh}(qhu-b@xudI~8v)^=m7#Ol?_XBT%Z}f3c^_}mi1=GS?A#idwdHc=p39?FJ76UC zw93nO$esm5r>1yVzxaHy-LA(Qg0O3g>s|5%%Cp2=(oxL2n>yZD@?R3yZb z-&yi_BT3#WaVPDlK3nB+mE7p9=2tehPpe$hH(zzgY!|5A<-!`;-K@#F!Cl{Q(e6=w z=|+WXC3P;=Z~iw8{&x-MBow7R5fd>e6XEux+#>VhH2gH>gO`f20N58!89iwi_G89k98LEkK`L|y6FRfH6KK%4zx#sIlayf`yr{O z4eh6GkgL^tI4WjkqQ=ffZe6y?QPhWa!rxh-(xTJq`Y)qi(MvNJKor_ui zJxA2};}<7Wyyo2imeBWj~)S$x$a69(AGKb_xU=WcQdFX?oE@i0Ecq;KSgxgv4$;s-VzGn}XRoWBG=f?n=0 zq4erj5x8C^7zZ&WX~x*&BdJgX2>mJMMiMJXVU;|}!-!Wq2A z#0FyL4AYjO3x8=uZAKQE8^?rFW^{6J98QZfJ_C9h|7*pj{BOPnMPx)jP9lZu65MP| zI6dA32ETz|B8rjRVXbU|c<+lZW|q8y@}u=}0r%ijb+p37lsWS@uN#LFD7rkWw;s>FWWLuW{2 zn@gP)_2R2hWZH4OarkU@$hnntI(t}{+40@_d%5r&+)$~wnq0&?+>+e^QzK{8q+lvbkbX&i6a{-oyvCHMzF?p zoqN+T@Kz>x?Cx`&l8?5jxft(;sef~HB?`NibZusjSmDXbym`0Ne@CiWu5B*a9P0lj zkCUs6p&H~KscHsRuWYA$PTemGsXg{A_tuYJS&8v*L>k{y+L(w|YyZvX_xIB&CIED^zvks8AWD3 z{o7Ptu>nT4-a{M;X^%T^(3(fsp~#u4w%(ru5d zxBnDv___9^`5%kaz~fTQO~LI?811mz_Xtd`QrRvI{$@k8;x8 zZdj4StKK|m_}zMGPmF4#WM`Pu)MnqEX?`v`ve|#dq(iyTkDDgTn)+f&<*VGCQR6$A zsA@j^qUErm^q5NIFkZZ&e9=ZYKr&m7_WPNhzc{VqyKAJ>Uh#A!`uYgE`$-6?yskH9 zPx|e_;#JM5Yhv~E@S=J1S+N_X@0;b`0}OuhNtzX~9x_Q+f;h|#7;gl$Q=ZtUq691i z>f%8e4@aDl8#9tkkTs7{D>(m)_5OdV^p_n9QmjZa3`yXI14oh#xP8DbB?4AVfw)5S z4A33wbjPX@C?`XLrjyy-_*Nbsw3uu5}ywYl&#G)n)roxBgSiD#H@l1bu-@_=Ca2ax(x4fgmBj32rb?vG;ko=g_O6)){?kjHbtt--G4F(bLwc zqcc<^vns7-p+E_uL*E{~S4Un+tv!#;uhnG6vwxO&$xK~Oy8GSn8o6C!C};(C`L45F ztXzV%<@{na)!3_>iIQ{V5~tdFWPmrHtHzTW)UNfC=WWT+OhXrkBJX!kxmeA<`iALu z^js%(J+u8S6n?z>n}W_6XNnf-mHD`ZaTBe0uKPpnXh7ywXw7wBf>sY@%OTc3%Q$i zsxtqQ=ytmy(7MWQRih<+E0q%7A);xu`2j<_lxj(7T3?3k$lH%L{H$HaB<7Xon$A*M zmZtDhE?r^av>>4$dE?tl`X7|1R)eKopErgd{>?Y{la^_J`bA=o9(~H{{L^Q-T}c9y z`_jbYf0SdEUPqo@RA@u3!FP)4EY-l8p;sG0eBlEsRr)*p0D|;*(->9lN)BRu|aM;o#XW_xzHcrdytC^WZljC2x9JH$jbrjdgF@bN? zZp4nW}&W<|I?~TsoHTMt}pyg@}qa(u2hwnxj_O6 zfYrbQ#snkw-X~x|updPLX#mEP1+IkD)g#1lp({q<%KtSoLG||uw;97ZxZwl{PSQp) zNpLe7%NYuezy$JQfW`3Kj93l0hREI)AH!@KM#7XyRU($$4GznHGCfVp57SaGYL*b( zJUBO+VrrEYssR|Ru74x8vwl}gL1#yhzh9i`l`b4~7zYJ-(tXHTcsZtsQgX4lw#Nh3 zcsfQ(z@PwbAaVXSZABj48947sy%~JVU$e#YG;JZ*Pb*{5aoe#_ATO|cRD|7K4tW|K zKg<>CL_{8B?_WB_w}=h0NC_9SJLdw2?O&3&5MygbqEPnPo8ojVZ%UsO1=7hhwiJ^( zCbHJnBj{pM&B)VL7s)o6+#kyo9g($R{$+l%BmdZ+nG#cOL_V}%?-b-EKj|(lV?~#) zFX#1^CLmJ`6+IlIP6tQN#paU>;4)d(Vaq1ePaS?c_LTE6TGFjAgg4f8xm_M^Kl%7~ zG{3W4>)*Ka59*r)Cy6TeY{B=Mzfm5%uXDi2jhSC0p6Q;+Nt<19k<-0Jua&p-Cc1de zi=OEjeX`~{*TNg2c0}$eG?kD>vxnY%Tu1gBlU;~rDz_a=@K6&v#_eFUWn&8JJqXRqXyu&(%NwH09qeGM~oC9_7u+6Q^d)Dm^X9 z{;0#DcVA?qsa>Sv)T!VH8QlSfW3HAP)0UKcVs*}Q^aRtZWu1_*!0g8JpTjp=SHXOC zP3}-~q`gw}slBjTv4#&s4>H zyZPm`s*SHE-z<5l=l;&pJZKtKKJuWysxoi#2$4?UEf{lMD^{%FDEH*gm=3R|_RL(L zs;RXg9m#Duy?D_w^FB4sNSU|-F6Qe2!R3~Z?Kn?Q-BN+29kWXX8 zR`5s&g9N4{quliWoHr;%ltcj4@7!!8E-Y-D+vGNMm}hqTeD}^5Gfa(6<*NRTP~%sH zqw{uloHZ?OTu%Brnw3$Vox4|@Q)%;e^2JiTNq!3#M=a!w48+pJdBxJi2pD|?U8@2> zC1h{kj5eVVVhnq5a=AOwUDWEU96W1kobA~*xgGXLMxV>~(U}t^fZN%18n+K;WE%8*fvEMZ4;e8&xCVyO7G&TSo zS@;DQnRNEGjrI?p{%FUD0bv;CjQ}z znNDWDmK8f9&Tr-rdqKQIco}@y8`H?+OK00%@pa@^a{>4a$;!t9|2&8rBkwNFr9Y%} zjC-nCZ@X+a7EcAQnKBxRw;aPCIr257*Eojnj+;68H4sJK$Z*ad){X{#{5iyzRem8$ zdg$*RVXV%A!e52 zf@$G@9Vis|SohGYY?UXxn+`8lK2{i*l$rZn-g#WQ+Yr+4qtE?;W^86z`!-aa5~{hk zSvr6%FXOm~Q2(NH6g8v0aBh1ULgY2=>dlf|px6Y<9e|QMWDXcjMqCebRW8yllSVZvDi2 zee=5ZtNu@VYuCCn&wUR(yk-zVo`3vwy8Pa^C;+PTEWUM*~2F^=+@a~ho<-7hg@mn68n&e3{U zbFE#*uu_kvGRp_}Yf(q7N)AyWK4w^z0Q2zL_*`j*#cUj=&TKKyYJ6*R4OX)Zl%sra z?20A01RB>l?8yhAG$L8BjPhe5iWefs=!w%Ia;y`=LW=2C%ScP@<7N9(qovu-4tpFJ zkl6-Tzwy4x)jKXYRr^iAPQZa1T`q*qlhC1M3s8BeX>&Tc{1kkVV3CZk!RHa*!)Y!FD%C;RtdT>DI@+L(OxX)EWoW9-x`hv zqj@%y4ie6HMwrXCw(|xbyf`X z_Cab)1|$(OZTJ$+p6@~XMS<1{4|804do<%~P%Hl=EG<)qD;;N(AB!z~s5P_N94S6i zo%_4q0F4ryFL_SbkvbjmYCJ-2H+AXBx$U`$FXK(8R&3;~0-wFfy7XK+6ths@d_D{P zL`tD~XL!Ug+^S5d1KoVf(a|GGhHe!{vez2cTv&1wJMGnGG%YLD!b>iSH*9_UL7_%} zeXz@&->AK1=L0nq^RjZ&$}(2$hqAw-6n_6xo&6_G=_FO-iQUf8Vf2Np@pE38Lqcc~ z`!^NIbO*o6pT@dU_v1D=v>`YIsxyFH76%&fzyYK01w26M2E>A82Z^*3&2rMU$3OrB z^L0RiTAmSbkb*!Z6XOYBBp{L}g$VI4ztKt7xVZE4w%LqNT_dOEnwev}#{74EN2e+> zOf_oWFErbDt?g~YIefzEBtp3NFsHi9u1#Vx7vKG1vJ^UFD!&n@3xO%9&_9Vcqve9v z>3M~c5vvlY2Q+>z-{V4uJ>HtiSi^!7)v}wt0bwd;RuRNg5gQe(YqAD{f%$Ltn{*sK zh^)yIdxhsck<*HM>Ou|fT=T%F+Wl_f9mq!ehA1vWX~ z&oAJR8@PWmf#C?UW3j9c4N2F92d9l1v=HwO25;>7RUSA=)sFM6VEVVmBpa3x0?>dG6kHTBb%3>a}?)2Q8@nc&MC-r zH>c|DlcM?aLNmuw|JJR|NgC1Fs?#?dOZpEqq!_a~6wfbB{Z{yI0!I4#y@HT9Um!&$VOBWGL=$Uk$n6 zF3R;jy`SS0d2L{J?jLiM^Y#x3zgX2pJ?XJ%`~Ivp8DzEV)dEvarwsn+W91&R?!^?|hex;kxTn1RQ^OZ>6zN4z4OmDv6{hNq1N_f# z#r*hK9+nL!B7q|TBXSaz7%3*OZ5at+VEFtuj0Qsj51&!6|EqmV0u)q$v{E!GF9n0Z zL7>c(osl~N)u{xqbK|gRWYtF_w3PN*RfA0TixO8JvpdOsNVLEm&SS*g!&|cQB@>T_ zZX7msKAxAm=%Bafc=fV_Os-1*>Tr0GmU?T1kG68*(!-fE8e3bA)^mv;g5u}9ol$&h z+}jd4i(1+nN2GJdg}ave4$zs1ocaa?k+s33#N>d3sq-&Oj2h850Rntyv5m0U=0F{S zTBjJ`>Ht%W8IP&x#~{(IF(#j5OqdsaTE}uQ)$r!ai{2qVm9U!*d&*?6L%$J6KZp{o z7R;61b0!LmqOzUzNwQwW1BMjDYeD9x1A6ME1CW1fcHD55Je!sqmnjc{t(VsfD#cQu z3sYP;-fVS9DJE$memU|z($5<&e}EY~8yfPwppjA>pw)6!x}TDFm9Rpcu21?+{o_@g zDotWY>A^>hzlXYyn;*=@CD|#Rkq`W!>-yWJF4arM-g&9D(?WCK`n=ixwC%j+ao&k- zxM;U*=cz@&?71C@jh>Ax1;u&&z-1?Meo-lf8Ik!HcXeL}{>wMEBQup$^XhLL)v7j0 zccbe>z)lQ)VXh zXlD0L?P{(?busD8nI4NAd;PXLS`;pNTyA@4T<%NAMgRV(_t|W;>)TS=EwwhG33;(s z6gu;(*5hA|Ed3puc7 zfAtP=fnE?^P?ul?+C+h$TW<8LIgkkp-mjXy<`AWfgoOB*u`ECxl9O1AdkTIHproCl@j3< zbR_wt$%I{xm?BigNZZ7i9A*h&B#}osbf38E{d3^&NHRpBPvzJwMiTZ3wef{G!rD!I z4YrpGu9VBtCtH{e;$~oHa?c22SRh&7bOE=67Z--1qqtvCnlN7NElz0!X>=2@2D_`? z`CMe^kb0E;wn(NbcjVTB4DQ)7n^r`pW6@^Q-YIsg@yxULLWIyODm0B*ODA+9f%T*K zRzt|X^Yks=M58Bw>dbuXOV#+&CE?``ALDV~4czVDiAK&#Ply3N^Ry;*lbHHpP` zpO0)+{cV1e(~>HL^DO1IAWPp6nrg_We8W!(1dfhl1|+_586@*Bb-wQ!EMM#VDw8or zLGPW~b{!YI^PJlXO&zD@;dbA(d4F0CEAdZt$dA5YmskBIZjDb;fp(OF`fP!oACph0 zlgTm3&_Sr)&F=&R>X5Zwxqp{4u2bX&EVfzUsUq#SU(79-0}DyySCpWBY7k( z=Q|nh8Vkt7^|D|Y%9s(O1f*nGtlB7Ec#^gp0U)M;B1*`J(cuhI3cuXDxd~n@?+h?f zxA=g3Lvph|gieZHP;4g{qj(UU)L}@z5p~eD0+lzU`;0ifbvy_+H-ZTUPp0mE{&v>T z^osd44~~s@@4mq!KHbbUdXE;@1@l@*d&C*89CM6eWwhZ#cHm7SfpM@Eho#3D0x&#~ z_PhV9p^5RI1Nn+NFk-EoHu@ZcMa*KVZBtB`2B-1*xY8Ebjq*?%QYh?((dhbhFXmYv z;;0Zaq|L;IHNY}q2@pvb1+4@d03Q@U1^FHz%~y{1K?F@s~Lxn;CvDt7|y)*Y7}=Y=;bj$$v=Z zILPruxu;^r%n|VlciL`9N_|;p|9dQBU8rxjF}dEoX5{Namt8&W>g&8$9;@7S zZ zTpZ)K81+-2#~C8yk;+I$dl5Dci^>_5CdBB0_SC~L^^9lvR1xvXn7cQH&-Di9i&^Yc zshPHd%JG`Y7*RxmTT+%DfN~`Q&kKaj85`&j2Pd2~#*rXxshklG3~#jF1F*M9R?uPL zw(?`TCzk^nO-TZt4W(4sOg}Vc#Oo^<9<^XJD#Y=CE+j5WgmR{E%*JkooN;al0=O3c z0s+4DA=V4(1r58cpY2E=#Jqz&trbDPQgu9rfyFeYyEG76U+C3543{Btr~y$t@eUCbMQd@3}J z+Wq?%gR}Tu2Gt>`PuVn{qa2wp$rt5 zGMob#RV)lbW1&|t(0z(zRPpG6W6^lVMiJB^O(Vf&aFPjv7>FI!aNa_BUPdsn>Qx7UkgE7r7hE3Z8zr@^Ue{(s_dM+rY%y7W>rKitF#*RQ);vx zioXf}hoiRhhHSPq!j9bW@jTVhL9j-`QezM6{l;X@;*aQ;C11&AdX>=guupguqX99R;dEWlc_L3vo{0bqX!ngFL6u|hbbj`Dx$_;QR?2pJm!rY9Hy z83AY+V+hDb0G0LTC_)SqJEPd1!KJ`RVZs8ygRKKR;M@(6_Nr1%($+XvsCg{3A{JJ? z-F>+Qeb?}sC2E9-W03I$WE97<%@cS10-cc|0~K9tABk|LtH5UzY{5dO2oPih!O)^4 zK%+xeN)C~ogVFUo3i_RaJPHK?8wMIeH0;6)1eSS0_m?10jz%L9goII^*E{IiZUoP^ zT;fpaU!`PoSr4Z(X_#vKt4HX{^8zur_TSC$e-zLtpFi8v&3nB29|$M(S5mr%LAkv~S70S^nr;og0pkcVGJvjNCw}fi1Iqqun}GTTspQLELtL>-BRw~tn`Zo5u@iR4o;}ORVC ztfnP;)2h-kDLG2DAx&mn5q=S&zMpXql^}5fUMtQ+^}%84+PK)@?HIlLqMltK)d4|R zIQRh`x-7lG8DYHpuXKQWF=2uDU}t;@={+%Pa~Z=n60brQC01yb+-$pPtW;!s*r9cV?d`gLo+f=sV zjzN7TVNoPzB!dRiAp0n)jFmuSuObp9tsYqI>DX&_@K`2&kiVrzLkO|9XQ}sQaCr_y zVIGfTeXZmY5^Ej{ug@a`iu8f41B0L|(GuB2 z6wtyD8w^eW-+AN==r2zsFhP;*a4>_u3#YZA%=AgSQ-~umai#6rK}2iR$Q6O}vAYs0 z_(v!kQgoJ%LdT3UYw+temE|Fb7S$V#_etCn)8w*FULmWkgAR%aVhGZni(c}h*boeh^&ThpWE>k4h0mHM#cs@`pMPgPuj z9_0P{tW;x2Ul{h-DRFGjU+Z$s%u!mxRN$a0CiA7?-ePUBn0JH9ipbJp2&HLevTi{1 zn(jB{iPx^derCm*Yg6(=|IwQw2U=S-Pit_~HeM zHTF;spg#J0Z8(7!0z3&}8Kecu1aK)b&Yp1y@W;Sz0G+?!fdmW+TqOeZ-?1S?#z}2; zaS$~JLUaCkxe;8~u>6!7cZaKoMGSYC#Ys6tYGRc?8@W2J>bOixQyI>^(?K*7{`-R zQLM5MBRDJT=uqsF9v>}^ju6h&GPSGE$r;v_Xo<;{ns+MG{3)ZnA{VHIXnWu?Odaq! z>y%~&LifTLT&autc+r_NkDVHOZ+^0FuCZ!zb(XlM)BI*+#B19)zHQ|5cjv^i^I-=< zh;Iu#M)qO^iIx8HZ8$&6FjSfkMYJ$ULm3}EE@gY<;MGSvF4nxhQYVo_`fE<&D;UePss^){F;NC2D4yT+Ng$R? zO|(z0KGNtbOdISmp}eNmMa;iQzjJD9HgSU37PHv;2v!_~tC9YWxT~@|MRC#lf}g8j zogXOtBd{aS$+fa>o?K}s(Ydp^Rgj#MWm^pJiMQgZQ*m7ONOm}w8ljB@Q)bv8kkNn% zGC=Cx0Fog$oNxl|&xYYv#`0p}L^uHpK!VtjY>fQ_dLsbNGo0}j{4SVEweTE?z0WRxD4{B^!vC zW%SVROf@`{xq0K=;p3jZ4+|lT4(k>}pLeq2>m8ZfP_ShFEH`2MS;nQQULgpIEd zM>dHIU(l*GR($|Gu2FG(sF^ux!(9W7g6vBVgtK7T7gioJqSoxAoY^Fx7>v@11zyky zH?}|2z-SagwCH+q!!(d=Bha*5UNj^m#4-YW8W7}XAl3q`QY@TD!LVYS86BEqXk2ca z{j@S_JNC14MXb4loW$ zkk=7$6b2lzY*1~;Lj-G{o`mfMcZIqFDG@-zLI_~S15X@m@gNR%FBme;2x9X(nFD}i zf;FUw9^6h;_uS>WwcgPeXprQlH*P+ zpJCw6-TMXWqzve1uER<90X{F3jby;L8M#p)cEkfL z_>tnJ^OaQAdY<^X8jJTfG+=SDU|Fz0RxlnMU;-2yiVcFbKnyb*^ zd0Jk#S=-}U@7NdbqK^x@zJ<4BIqbZ>j+`~9VT)R=4yY*KW8b;xTrCv?t|;+SRqL?$ z1-4#3u*mg1jQZB8WT`WRBuMGzkJMYsLfwCM+j72k-%AhIzG7DtM4^g)t0czzo120Xh_7 zl;)n4>82nLlVC_d3!p{8CkThc2dEfP@fW=k$Ub#$K{S-T%dCPDln}Gz*4}1eHv&wU z(hEm%g8yu!=K8u#QKzHK_*CQ*F|*3h&?giX=S%xs7PMydFk#Z6QiNz{g{`7j=%<&} z&1pBO)i(FKyT4JQR2Mr8B-E;=Kz3yG7FZjp;(@WjH0uMf9C5E7g3$_C7KrJDT$cz? zFE=1hFZ7FjFbGG6EkW?g2Kc$^NB}B$XaLL(EMrdx#}ya4yY;ZY%e9P+W z(S_Bykj}D}_n9^cmzW>9|0WwY@SJvpX{gX|H|tUNH~2k6UiEcj4TqtmdHN@tKlv)q z!a?XuYEKKMbF*A220|11w!q+}Z7?V8 z11t{9qy*5NwE$s|OiIyxdqf!HHw;eB0P>CsGN_0K8HYGZ}u;b9m~ z(M-B01LOWfKcN}8eTMA9K?Dr!mpOFOK~&Bt@HDI#!*DRqY$6CK5Mw5RSwNLBz)3b@ zvc#ZtFeu24!WU1+a%W5PW=n&y6@9z#cbx02l_`DF5FlhQUWd&BZKi}btUn!bR@Gre zRYY}M{dSJ!wa^%C5mnFkT25gmYZLy)e=2I4{?oueKkUl1Y>B`JG<7682UMN=JEePY zdlcbvxomK^v2(WCImOUxnv-MFqex%aJd7#GM62Gnb$pnu?*>VwxCscEZXpQ1_OYxasb{1h#NEnq!d?_);o4MqRD`TTi39j`vofzO0s}p+lVZw?Bs_?axVt#2oO2oE%;U2Ckfj+3Hh${Cg7Ry z7%`qb2!xv04hb9x5*&|p7pH@PCG0F*MoQ9i>hr;xxKB5=h6I+0FLAb?^9c>rTWimO z9Cc}YXVa$r2=>i%^h$_2zYFic-c=hJ`~~rmMite!H}1u#~u??EpZ>jEJe@=!a6FpQ(! z3?JHjcO-bF*s#j{;`2apj$6Su!h{9$Z=5|ln30>X@4s0QHW0`!?C05JoHUc5bh}on zGI}e?!i`%S3=|ZH6XL+; z#wvj^k4XtIF#n8KkOjKI(319GdIR_f1Tn@Ua$_Or_H6(e|env+(+-4Z*0Wxh%ZSk1V{$nYn4a_i4QyAwesQbQ+bn8X7CyHgPaHWW3ok zv9ZR;Jb+bQrWdpnVWZzT?U&z32ulSKl$NNI87H;6s~8l1(SIqSa?cAJ1>|OxiawVWcU7-mNnE_@?D6)lUzj@G!LTQ9IF*`Ql9gPRAID{w z)3Wct7&RaSIDv_c<=22C2F5oS*m^RR6M+R5n;`Fupdeg17((d6iDF>4aj=VnA(sr3 z0;olXJsuBUjlf0)b|H{O4BKsf&`UYmByYr5*^H6ui5>AeDP_nZbtmb8AW?Ze*k2pU zcD1UIQ>`k$KPOgT7j0qoa1IvPv|s_wUaD73+u_7=$Rr%a{fNKLjB$%7Uj(l%ewfg6 z3&Mmi})Uk9XXEUl&;qK-N?)KpIgis&A41ft=}HX)M*V8U6Z4YaSrQW`5Io6@HyOHCu1Ud z&9LHrR-hfrHFlqCYpOg~9a}pkXV#RX2ivc14Od4h2sc#=c&Q@L8^o2@DhuSjn}__E z9dQZAfqEH9RJEkQMtW{ULPV|CQ#2)4voeAZ;H1^bubG^GojojjbE0ZqtBY`6gZN%y z2O(l1l&>tJZD$A9?BzU4(2Tp)x%rX_vvAtkbvwo!6FuEsN)=jjoeXYsyvL=+Qp4KW zvbt<)jlcZ*4*-DY^4i)VRygQxy}z~@uY-o<4UuNk%Uop!eS z>YH4&X36lX+~D$R1&_6vfv&c2)bXo*8-Gp}Q={3gOjL*F!K<%&gBs1j;nyed>RKYP zjXdf>oHJ7;RlZ;2j}1E$Gx+tiKAH$6a>UP-uTLT_lGIXH(`S^}Fh1yfRC0NOuNUkz zF6aKT1B7EloP>eR1OaKwh&zH&f%g+s4A^}kaS&VJ#_}2f+aH2M&?u0(U}zucZ%cs* zvtiu8TfhzreLEZ=Yi{IwL<)@>@nQz>!Jx9Ip2T2)*b#q2;MoD9S^$Tf%K>~)=)W~( z>?2;_3L3Qog14~U>s|-IF&6^xKp-0wD#Y|F0}oKBAx1zGR0x2$c3+F2lFE*IL#YvT zw4fAW&RU3x^V0btKj$yoz4&=tLO6|$mk z-YzLcqqb(I{ca;#YG_4q1EKmC-G__H%-h7{d5f?5QLoHfYvkF6G(KJotFW8p`zTx^ zuRO4)K}%DsF-e@bC_(<0yX|>t{cfCCqo}dq7mb;)@+19s3mNJ%+bNOzL9!0rn3>B@ zt8Q~VS*74#=K9x(kqwzi&dbPMs23urggOmFe@$8z%cm3eD%FywX6Ve|p7F&-b0H-+CFf zF+>Y?EQ~=#y1!qGRB$$_O8$M#uw1Ytz&P!eZ`tw_Ev*vkwb`rZG4`rfP2oR|4S&xm z+!|UjukaX~=~v-v)5Q3T&S~uI$hGj51X71O-g92OXdzi*^6{zhs<-W3%JU~^Ls>k& zAtH0bPrTp7ptZ6HtsmhlH{iNx*XlqKcKM+3=E6#D$QdVN+>0pka^&Z0H)o~!wg1#B zbr~sMh>jV~xV)0~+_x<@!^F`t+qGJz{Da8^24B5-{PJsAd!$r|4UZx9)fB)4GgBG( zz3+RK%2$h$m^V`CmxRqdKLpoF^%NYn8H)eG$Fz8BLe96&<`BU%gpR%PGP=?(wP>)A z@UDYKSM1&o{%gLcw6iA@U+z9zR$W2S*|}x^pnh&sWcZfzUGII<2BVh;4oQ;d#_=+# z&Wj;=25ZX#>-_c0YH{a_@}ubx zK#A}_>x(vMuu*O1sjIZGmQU2r=r0PC@KtGCHm(|^R&An0K^LN(rekKemLt{El~Yxd z=aB!UdU;nj(@Jc=Ve!2SYDt|U>h=ajrkf>CqS}nJ_)d!s+&xdJ+8A{^`ZQ*&9%dM! z8B9`a2$C9_yd2eNAE16iA?A`M&s>p@aQGW(l}Y)%H!(b<4>=k(W|pD8U9yFFQDi+? zFJ4yNB6mS1EEtJ+I3ft5xnQg_NVmcn*3|}>E6u_~0{K^l=>(!$5R}50ase>xKp2UG zg9ys(RCKWn9S6Yh{`wAazhDRf5HeE!f9-vTTT@Bb_@!7t6a+y;Wut&J;Q|Q|5LjC1 z1nEs!Lb!y4KuDvC8xcXIDN+Q2t`zAa-2x~ID5#(`6&s=`U1bH=vai2Mvajp&{J#I- z`{v1$duQgHJ9E#OIdkruGcyqI%iuX?0Fzu01c`DW4sU*06_OH4!RLbs#}tPn@^Q)^ ztDA?*g?!{ztXEK54*%}vvS0Lo4ti8(CTdNTjr`kl{a)8YSWOt+?~r9vqJ-$%8Pf4| zCw%3(`7p1BT6SW=$!m>P1gLwQ!XLzQ{mXo9wm*0cc)T4`*-B|1+Z}0Zq@bYR=7%qH%Cs{ zB5q?lxUr4o=JUsfaR)ss&b&IcFH>^kPV6$>sGx;@<(=Z@(GOreE2a#<(!Q3}m;m|n z?S`k5jYq(0B((FpEZgIrN^`%VOHv(Un`S(j&;GRWbY{UPD3tipyx+ZVceM+( zUP_@~sw1i`Ygfk4)Y%PlDb+Y1-F<5`&$Tr@YdoH~sU_>LeipXXWUID5UhO%zr?D2^ z?v}=iNgeJZPO$gZEvnfLPj>ue^w9g`h2Ww$$HVB@;etGsBYwJjkP;t{9t^!&erdvf z+aP+_aN8ySu5T@|9hXa%AK>de4u0iceor~^%;a;{l^s4s?&WX1%a@ej8oYiH=iVG} z>$m&T{y`T!b|@<76Se)DTrR#}@TQNmKHO7eDT^_XnAU@wgt|*#fP=!9`1_g zYB&wA*rnpu>#_Gz+)o%!1Ta1+ysiW>w44T$bbeMfrUui7$w%-Budqz8pzB%a9 z9ejNDM3oLyDnvf<#p};jn<3sBdDQZ8{7!AH?cpS?#8l%KX>9%N54m;ROP3V%C8P|Zd`Y}ra;O)+vJRFz>YHuFB{epp4ccm!Cvo8HDg<}W(FJ1l4D9=$`{TX zk@YQ$@>_Q==Q~j5&bQlI9$h}WS8kwFVczM%1;s6c-I+f5j9ttveboWD4y`++n~N1! zh+M-|KZ`krt*233$q|j_b=2KY^&KOU!|5|m23@PMv%XJSmik)jeJ)WBWS{+5R=T1& zo;!1o(B->N?Rs-oNBGl4?R-DC`UbE1zwH89H#(pGtu3ETKh-)Zt2GzeKp6LUweb&{ zeAL@oIeZt|uWL*xDQ%3Q*$2_Bt&A>&2%-ECFh~nqS%c4F-s$ra-~fBU!PTI@gI|&e zn3UtL1&5gj9T`E141y>CvmbrQ-S)h9u$F$rMgGmVgFV5)?nm**t6N~nO?7@PnBM8% z*1x1rDO9M};a-Wo<3|4_7w9t+zvtpt1tWa|`$2lW{j{0OI7#vC5|y$TSw$ZZr>|;` zm;OBz8uz)n=XjZ1QG-q1d4o;g&NSa2I}%{fK$K}jDDJH1+tvlH_IJ;my-3mqUVAS~ z;tt*=+KhQnZWin3)e6%TSorde%}M6Z1~NpfIXA!G|NNdWmqZUk{2&j`B9|!_7E{6H zlnTA(Dt=tck;F>KsVU|{>ibC*!qXzdoHZJSv+i21D%Qdm>ONNISMl;Z`aSOCy7<{_ zSs_(8wcBf;WNSidkSq4CP`!DcaIdt7x#s<}{dH3K5kjR;+%N`X)Oud~7BVzhy zuM%!n%FnvyyAA~yYhKlPYb9}Oa|U&33x(D=)Uf_QsH>H!39~G~ah-U96g~WM&#MM; zHsK+Tu3eF`OKULWosy*Tn+bi@tR)$BX z54z43NR~L55{%`K6khi2UZb{W2%mcrv)q1QIaS(W2^mB8Zf|*=zX5(}^4YcfH_6IP ztrSeUuR-ny{7$z;uvs}b%cL%RIH4_E#nt`H)1ylVD2^;+ri``Ut-5!Oq;n64Uq9-- zwzb}3Y4GpN%zdec19#mxC!VQ{?LA-bqp+1OGt>Y^ARz=?} z51jpS;*C<{DaUcT$BTRLzm|q~Ui8IR^qPJa4H}^I9g8{VusP6G7MrM9(iBxg2YY?y zH>tXpFS~4Zn!9nMq`E&ned8}(1>45bH=$bD3pFX7bRSp;K&nhoIIalD)pl-54 zu3If(^>RI(r^gR?O%07`y2S+~?|-%0;pI%8o1W5Qjudh68XAL|ov(JID`!$0YVUvB zV0|@j##qxmE`4z;nQ7@DOTk(6PeyKVaJ(eVT?Gxf!` zmWu=Z-G6q8gziyD{u^yu^Wq}*N*>qa^~H-wU2Jb}Zhi&VY{Zc{ z?087u(qP3cJ$xJbY0g%o%gl&N3uN5AuIIK_On!fWem%USZAOBWR#PiJGm;8}7`ne+S5y8oml?09Q7WQ2aG zoz-AJRyji@O~?lk2ltN2IQ5=q6S@pU2{^Q@^>ZaRk4c?%-pTLJTDN-DEbrUekU(@g z;1@+Ms>zS_@eCf@GK_Uu|IyKt(xB8B|EUe5P9QM0Ug@X|ZC{$uJo_a}e`s?py^VEY zy8X-0_N{Uq_x#j{Gm}lZ65>(h7RmzhxU;#ehwPB)QFQR?$h*Kj1{M1*WCW<=euzAl z40+o0@9G!zB$u>_z7lrH{`4VA?3)f}9o?0bt6rU{UUf?7{PDJ$k{Ia{d+%_zRhIu; z`kxJc>7Pss?{qb@o@D%eWWh*QJ>X;DEOzOv>O4A|8SvIU?ZfZ3eYzP3ykz3~n9trA zWK_7OIs3R@z;m5fo;9sH zRW3Gd{_&4^SCA8TAAe;0)bO!;eurnxS5IPv=G4J|7nC6i|SC2?_FSPs))!axtx>9tx=R}1ug{J3!q5INUrArQW z{#d4Mfb5RuuH>NEp_oUQr3!IB=G~fJe~(Ia4@c}H=ed03aY=?o`kk=Tf%8Fs&OLv+ ztX{oZF!8Ip;Ule$Ss(UjjGl@;Gxf|bw{6?ys^Z%%lq)W3u1x)PnT4`59U+x35cLR7 zb^M!)rea~4QCF6@g-GwhOx=M7>9?s(#D0~NQQ+A5!yP&hhtV&cyYUxZtK zQ{R*6q~2H5nR26^_wh zO4Ujp65@bQdXzBupcg%1CzJuinn)~bWWnT$S_`E1(d0KwT=c~y1(cBL&FcQ?(#ugr zE;-!~gx00UKipxeeV%sr(YJV8x|^|v#jl(9*mBn&zxAg76|d?*maV z51%2VEFl3*E<7v4dSP<{n5pM!B2XM5$xlK8OxUj^@h0+BySf5g+|Q~8HV`vp6|)M& zUj`nd)O)_xcD(jvTd=py8nnl3{NwOcTPC z()rPAYa=3_-SfG5?QC(&@S^O~9GRDGhmy}v|LUo7EosT|Ik+!Jg?J zV_tP-pi^3uN&&g}>-XXA$r9DhUFAnR$e{PwW5Ed#T*XoQVWF2w!?vwJ-*~QrmyEY} zz25)QxX^p8y>xw}&%=%yp-`UR{!&m#r<)}1kT-{h&$RKA?m znk`57p?cMK6=aAHYIEDfu*Db_HusO!#7au`=4xMC_q$FJ3OTM%OZOngXp^huV%z64n(yyqwT@&o4PoZK z2**bzY$}R^B{XjRrZ7BRk?H62McWZpT$i2rtfQ^G?SpB5mDK~OqM-|~hq-In#H*T| zDQwc6_eNV26+6a1yZRqU5WI{v z&D;05f55&<|8!;7!Gjy6?e{9Jqg&3Gc~j^GV|puv7e>EB!I`PzQ@NMwwOFe;6RrnG z{-eq?SyY-`*r{o1q~qYn4u@k*H4YpSiE zz6M{sICrbo3V*)Om-uSru=87Q!uqKW&!?ezs;^hOzC%>TgsH^+mt#wbP6`3K|Wa zaCLlsVjIxTdy7g#cL&#NaWQ=Z4^y|?J0Coepspc0slzrp69q0{r^#NmQ{|sH-I{UW zGvOn!^jfAFyZER=ikLIG_Q1U+Q+XEjH(?#TST|PQM$8V_?UObubD*^A(om4J@>*~R zfdd4bGa)`NdCPpeQ4nA^;3eR#NYCVp-R;f=IqOJzoy~8P3L>`EKX^oUDV%p zt)tBw;k*?nk)=~HV``r^?~7;4DQs@GZyEn8368j2NTs}wN550gUUPi$(A~W;;scq~ z{yXwgmmBn7V?V1e3}$y#zfw5cEOn{EFDrQJ^-*0BoO)YVwFPx`-;V8>{tw19W~VEU z9L8(ACYYvBs|HqP=iV2P(Wx6U>{A`u73UT!Y-DJ9w~yja24HP8A_!YOC$$`PPR^N# zwN7KSNZa?1=addk%`L^Zc6`ddAW_@tS&KA3t52>^bbLg1OEvH;l|gIod@ELK=|}E3 zZy4UKJlX2e(|lH>YSaql4SS~yws=tRTH5}y3G-V*i}T%fj^#ijx%tvQ!aP5D)pLMI zVt`Kk0L--*4wNBnfdZoyN7!80PK9F*@g}T5uLZ7-95E;fcm;^bOMoe?Ag&P5N3KhY zr~=m~frk_a0ux?*KY_I=ih#QrkSNc;ZPWc)4J`*)!cA)Qym;q1fkN;lV=*rS_ZXO9f8ik8yhNcgXko1g7t zl*9s`^E%-E#4g8$oTczTs}=+4Y0q!yx<=tO-4=&pPLFrwXI#KBA2ZV=}UHGzk;wR~E^M#*a<{>n^&Oa$Bs@tVI=GuPbK z891`W$LPIY3&Gi4R|f5sKd@8&anXn1(;YoH;{+os)CuDQw}FQN&)0ZTC*gWfUPn1_ zc>uaR7aR<(gNIHg15TiXEt2*FH)SBh)(QciF)$X1av-2<0G5CNQpv(V9D=c5g13ah z2CU@+JcL96%wb5{@<~W%p|XyUrP*cK1xX*r9LKws5*)h_juhSpcm?nVHen3dQ^g*5 zJ^{Of5DFwfkUbb&AqWF})^e1o#`hfKM9IqsENSMDy+eT;? z=Q@Z%P*|>z8X&-aVR`U2Qud-8K3Ro0JWm-wMhKDuJ8FOhIsg=anM(;J+41QBd|LPc zc=$@$NkP)QTnGXd6mar_4qyO?3xV}0=D^}(o(#@oK%G1dJV$xmQ7D-MGI^8+0&Ewj z8zssCbIJC?plk9BM54g$9@y^rfyZk8Tr;qv0+Gj)gS=8g1i8SY===CU4&=}c+@?ei zqlEb!0u_Szdm{j41t8n=k0KHzZ~z;@!UEpBGo)n4dkE!mGFAeD#1OA!PiQ4Xjf z?@ve>{5Z-4lsWJGQ&4*a^Ca^d%E@5&wo)-G3=k-w${gs1IRt_-2TC%B*7FQlc0s&? zIm(A~KoJmhLj+J#6s$5(=D~qrfda3O0ylgp{`YMVftLa1$RBS+B14!g211D7-O++6 z8_^h)4vKeUVgg%|S`e~g1M6YT%>JpHxLw*pw-9&_h;!n5?fiOie*ORN?|&%;IFFB5f!+z) z_V?fR-T1%8@V~SfKrX@(P}Z?`oBszXrvI-Lv-a1+f|wiZfY@Dma_G!$VVkwk>)4uu412-6$$Y$_Z5C@Ck5E27H;3z7aDWDZAhQ?$Af{|$q zm=y-{e#8hwHVc6x*eG2rnnzMHEriE<{?mv^2$@PHz<}>R;6Z&N2uvcI%C8U>%gdy* z2@Kvy32ws()KEbQ|0FU|x<-1i1;1gy7J||&erjwM{H6q3{CIT>egm_>!B+n*9w$J? z36OC=A>)2v*$ZIo1u*tM!Px(R!Lh<%yr4K20l13*+~p^5mwy0$U`4R0EOHbz4t5bR z*HeJ%DM0o73Dxrlj;|ooBaFrXO&MWCfHoRTWx@mjE2=XlLw*NG3`j-VfS1naHAn-lAt__CG0T_SXI;hFEIfRUl&j ztLf>ZVK*9$h0@dEOI~+!Jc7bt{?YT?Ku`4c*F~XGx&i;Hcwx_jgCU_kd)BUXwmJV@ zqR#L;T37Fz7fHC$^t(|1i$n?Y&Eu&Os5TQ38Eex61;)wu>`s6CzQIXP=Wk-rrHy*- zRlitw6|d3r1#2mH^jS(7UE8iarJ5CzzV`IJ+Y)=eAMX7wGDfAXnXg%MC+VXeo+i@h zn{~L?`S9!S`e&~<-Rj;w(yjbxYesW-xsIK;ouP<1M+kE3R*C!nM;9gw^x{|s61owK Q)icoBsG@>%vDx_l0J(x3$N&HU literal 0 HcmV?d00001 From ec3edca7e017fa4900df4bdc35b2667d252cddb8 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 17:39:36 -0600 Subject: [PATCH 24/61] Extract Text: Add missing body parameters Assisted-by: Codex --- src/pdfrest/client.py | 20 ++++++++++++++++++++ src/pdfrest/models/_internal.py | 5 +++++ tests/live/test_live_extract_text.py | 10 +++++++++- 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 627e72e7..e13f7811 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -2359,6 +2359,11 @@ def extract_text( file: PdfRestFile | Sequence[PdfRestFile], *, pages: PdfPageSelection | None = None, + full_text: Literal["off", "by_page", "document"] = "document", + preserve_line_breaks: Literal["off", "on"] = "off", + word_style: Literal["off", "on"] = "off", + word_coordinates: Literal["off", "on"] = "off", + output_type: Literal["json", "file"] = "json", output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, @@ -2370,6 +2375,11 @@ def extract_text( payload: dict[str, Any] = {"files": file} if pages is not None: payload["pages"] = pages + payload["full_text"] = full_text + payload["preserve_line_breaks"] = preserve_line_breaks + payload["word_style"] = word_style + payload["word_coordinates"] = word_coordinates + payload["output_type"] = output_type if output is not None: payload["output"] = output @@ -3306,6 +3316,11 @@ async def extract_text( file: PdfRestFile | Sequence[PdfRestFile], *, pages: PdfPageSelection | None = None, + full_text: Literal["off", "by_page", "document"] = "document", + preserve_line_breaks: Literal["off", "on"] = "off", + word_style: Literal["off", "on"] = "off", + word_coordinates: Literal["off", "on"] = "off", + output_type: Literal["json", "file"] = "json", output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, @@ -3317,6 +3332,11 @@ async def extract_text( payload: dict[str, Any] = {"files": file} if pages is not None: payload["pages"] = pages + payload["full_text"] = full_text + payload["preserve_line_breaks"] = preserve_line_breaks + payload["word_style"] = word_style + payload["word_coordinates"] = word_coordinates + payload["output_type"] = output_type if output is not None: payload["output"] = output diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 4e799c6e..831ad7bd 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -362,6 +362,11 @@ class ExtractTextPayload(BaseModel): BeforeValidator(_int_to_string), PlainSerializer(_serialize_page_ranges), ] = None + full_text: Literal["off", "by_page", "document"] = "document" + preserve_line_breaks: Literal["off", "on"] = "off" + word_style: Literal["off", "on"] = "off" + word_coordinates: Literal["off", "on"] = "off" + output_type: Literal["json", "file"] = "json" output: Annotated[ str | None, Field(serialization_alias="output", min_length=1, default=None), diff --git a/tests/live/test_live_extract_text.py b/tests/live/test_live_extract_text.py index 18d98f71..b76590ff 100644 --- a/tests/live/test_live_extract_text.py +++ b/tests/live/test_live_extract_text.py @@ -18,7 +18,14 @@ def test_live_extract_text_success( base_url=pdfrest_live_base_url, ) as client: uploaded = client.files.create_from_paths([resource])[0] - response = client.extract_text(uploaded, output=None) + response = client.extract_text( + uploaded, + output_type="json", + full_text="document", + preserve_line_breaks="on", + word_style="off", + word_coordinates="off", + ) assert isinstance(response, ExtractTextResponse) assert response.text @@ -39,4 +46,5 @@ def test_live_extract_text_invalid_pages( client.extract_text( uploaded, extra_body={"pages": "last-1"}, + output_type="json", ) From 3e736da52d87ae0679c3c7929aeb2256fe42cacb Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 17:51:02 -0600 Subject: [PATCH 25/61] Translate PDF: Fix name of destination language parameter Assisted-by: Codex --- src/pdfrest/client.py | 28 +++++++--------------- src/pdfrest/models/_internal.py | 9 +++---- tests/live/test_live_translate_pdf_text.py | 6 ++--- tests/test_translate_pdf_text.py | 16 ++++++------- 4 files changed, 21 insertions(+), 38 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index e13f7811..408d567a 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -2246,8 +2246,7 @@ def translate_pdf_text( self, file: PdfRestFile | Sequence[PdfRestFile], *, - target_language: str, - source_language: str | None = None, + output_language: str, pages: PdfPageSelection | None = None, output_format: TranslateOutputFormat = "markdown", output: str | None = None, @@ -2260,12 +2259,10 @@ def translate_pdf_text( payload: dict[str, Any] = { "files": file, - "target_language": target_language, + "output_language": output_language, "output_format": output_format, "output_type": "json", } - if source_language is not None: - payload["source_language"] = source_language if pages is not None: payload["pages"] = pages if output is not None: @@ -2290,8 +2287,7 @@ def translate_pdf_text_to_file( self, file: PdfRestFile | Sequence[PdfRestFile], *, - target_language: str, - source_language: str | None = None, + output_language: str, pages: PdfPageSelection | None = None, output_format: TranslateOutputFormat = "markdown", output: str | None = None, @@ -2304,12 +2300,10 @@ def translate_pdf_text_to_file( payload: dict[str, Any] = { "files": file, - "target_language": target_language, + "output_language": output_language, "output_format": output_format, "output_type": "file", } - if source_language is not None: - payload["source_language"] = source_language if pages is not None: payload["pages"] = pages if output is not None: @@ -3203,8 +3197,7 @@ async def translate_pdf_text( self, file: PdfRestFile | Sequence[PdfRestFile], *, - target_language: str, - source_language: str | None = None, + output_language: str, pages: PdfPageSelection | None = None, output_format: TranslateOutputFormat = "markdown", output: str | None = None, @@ -3217,12 +3210,10 @@ async def translate_pdf_text( payload: dict[str, Any] = { "files": file, - "target_language": target_language, + "output_language": output_language, "output_format": output_format, "output_type": "json", } - if source_language is not None: - payload["source_language"] = source_language if pages is not None: payload["pages"] = pages if output is not None: @@ -3247,8 +3238,7 @@ async def translate_pdf_text_to_file( self, file: PdfRestFile | Sequence[PdfRestFile], *, - target_language: str, - source_language: str | None = None, + output_language: str, pages: PdfPageSelection | None = None, output_format: TranslateOutputFormat = "markdown", output: str | None = None, @@ -3261,12 +3251,10 @@ async def translate_pdf_text_to_file( payload: dict[str, Any] = { "files": file, - "target_language": target_language, + "output_language": output_language, "output_format": output_format, "output_type": "file", } - if source_language is not None: - payload["source_language"] = source_language if pages is not None: payload["pages"] = pages if output is not None: diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 831ad7bd..eba57bed 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -439,13 +439,10 @@ class TranslatePdfTextPayload(BaseModel): ), PlainSerializer(_serialize_as_first_file_id), ] - target_language: Annotated[ - str, Field(serialization_alias="target_language", min_length=1) + output_language: Annotated[ + str, + Field(serialization_alias="output_language", min_length=1), ] - source_language: Annotated[ - str | None, - Field(serialization_alias="source_language", min_length=1, default=None), - ] = None pages: Annotated[ list[AscendingPageRange] | None, Field(serialization_alias="pages", min_length=1, default=None), diff --git a/tests/live/test_live_translate_pdf_text.py b/tests/live/test_live_translate_pdf_text.py index fdb1e2ae..0ade6cd3 100644 --- a/tests/live/test_live_translate_pdf_text.py +++ b/tests/live/test_live_translate_pdf_text.py @@ -20,7 +20,7 @@ def test_live_translate_pdf_text_success( uploaded = client.files.create_from_paths([resource])[0] response = client.translate_pdf_text( uploaded, - target_language="fr", + output_language="fr", output_format="plaintext", ) @@ -42,7 +42,7 @@ def test_live_translate_pdf_text_invalid_output_format( with pytest.raises(PdfRestApiError, match="error"): client.translate_pdf_text( uploaded, - target_language="es", + output_language="es", extra_body={"output_format": "invalid-format"}, ) @@ -59,7 +59,7 @@ def test_live_translate_pdf_text_file_success( uploaded = client.files.create_from_paths([resource])[0] response = client.translate_pdf_text_to_file( uploaded, - target_language="fr", + output_language="fr", output_format="plaintext", ) diff --git a/tests/test_translate_pdf_text.py b/tests/test_translate_pdf_text.py index 8b45c88c..1eab644d 100644 --- a/tests/test_translate_pdf_text.py +++ b/tests/test_translate_pdf_text.py @@ -50,7 +50,7 @@ def test_translate_payload_rejects_invalid_mime() -> None: ValidationError, match="Must be a PDF, Markdown, or plain text file" ): TranslatePdfTextPayload.model_validate( - {"files": [image_file], "target_language": "fr"} + {"files": [image_file], "output_language": "fr"} ) @@ -66,8 +66,7 @@ def test_translate_pdf_text_json_success(monkeypatch: pytest.MonkeyPatch) -> Non payload_dump = TranslatePdfTextPayload.model_validate( { "files": [input_file], - "target_language": "fr", - "source_language": "en", + "output_language": "fr", "pages": ["1-2"], "output_format": "plaintext", "output_type": "json", @@ -96,8 +95,7 @@ def handler(request: httpx.Request) -> httpx.Response: with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: response = client.translate_pdf_text( input_file, - target_language="fr", - source_language="en", + output_language="fr", pages=["1-2"], output_format="plaintext", output="translation", @@ -119,7 +117,7 @@ def test_translate_pdf_text_request_customization( payload_dump = TranslatePdfTextPayload.model_validate( { "files": [input_file], - "target_language": "es", + "output_language": "es", "output_type": "file", } ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) @@ -151,7 +149,7 @@ def handler(request: httpx.Request) -> httpx.Response: with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: response = client.translate_pdf_text_to_file( input_file, - target_language="es", + output_language="es", extra_query={"trace": "true"}, extra_headers={"X-Debug": "sync"}, extra_body={"debug": True}, @@ -177,7 +175,7 @@ async def test_async_translate_pdf_text_success( monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(2)) payload_dump = TranslatePdfTextPayload.model_validate( - {"files": [input_file], "target_language": "de", "output_type": "json"} + {"files": [input_file], "output_language": "de", "output_type": "json"} ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) seen: dict[str, int] = {"post": 0} @@ -202,7 +200,7 @@ def handler(request: httpx.Request) -> httpx.Response: async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.translate_pdf_text( input_file, - target_language="de", + output_language="de", ) assert seen == {"post": 1} From 541f2ed4dfb151e71c5a5b6cd96acfd5fe24fc3c Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 17:54:39 -0600 Subject: [PATCH 26/61] Translate PDF live test: Fix expected field name Assisted-by: Codex --- src/pdfrest/models/public.py | 2 +- tests/live/test_live_translate_pdf_text.py | 2 +- tests/test_translate_pdf_text.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index 8e25bfb0..c34a354a 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -358,7 +358,7 @@ class TranslatePdfTextResponse(BaseModel): model_config = ConfigDict(extra="allow") - translation: Annotated[ + translated_text: Annotated[ str | None, Field( description="Inline translation content when output_type is json.", diff --git a/tests/live/test_live_translate_pdf_text.py b/tests/live/test_live_translate_pdf_text.py index 0ade6cd3..c254fd5e 100644 --- a/tests/live/test_live_translate_pdf_text.py +++ b/tests/live/test_live_translate_pdf_text.py @@ -25,7 +25,7 @@ def test_live_translate_pdf_text_success( ) assert isinstance(response, TranslatePdfTextResponse) - assert response.translation + assert response.translated_text assert response.input_id == uploaded.id diff --git a/tests/test_translate_pdf_text.py b/tests/test_translate_pdf_text.py index 1eab644d..ce9c8078 100644 --- a/tests/test_translate_pdf_text.py +++ b/tests/test_translate_pdf_text.py @@ -103,7 +103,7 @@ def handler(request: httpx.Request) -> httpx.Response: assert seen == {"post": 1} assert isinstance(response, TranslatePdfTextResponse) - assert response.translation == "Bonjour" + assert response.translated_text == "Bonjour" assert response.input_id == input_file.id assert response.output_id is None assert response.output_url is None @@ -205,5 +205,5 @@ def handler(request: httpx.Request) -> httpx.Response: assert seen == {"post": 1} assert isinstance(response, TranslatePdfTextResponse) - assert response.translation == "Hallo" + assert response.translated_text == "Hallo" assert response.input_id == input_file.id From 2e8f3711b53fc2381e9c02f552fc951af7e57484 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 17:57:21 -0600 Subject: [PATCH 27/61] Extract Text live test: Fix incorrect return field Assisted-by: Codex --- src/pdfrest/models/public.py | 4 +++- tests/live/test_live_extract_text.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index c34a354a..882d1b1b 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -397,9 +397,11 @@ class ExtractTextResponse(BaseModel): model_config = ConfigDict(extra="allow") - text: Annotated[ + full_text: Annotated[ str | None, Field( + alias="fullText", + validation_alias=AliasChoices("full_text", "fullText"), description="Inline extracted text when output_type is json.", default=None, ), diff --git a/tests/live/test_live_extract_text.py b/tests/live/test_live_extract_text.py index b76590ff..bfbeb2cc 100644 --- a/tests/live/test_live_extract_text.py +++ b/tests/live/test_live_extract_text.py @@ -28,7 +28,7 @@ def test_live_extract_text_success( ) assert isinstance(response, ExtractTextResponse) - assert response.text + assert response.full_text assert response.input_id == uploaded.id From fc55159ab29219c8642620319c6cc8580b911ebb Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 18:01:29 -0600 Subject: [PATCH 28/61] Extract Text test: Remove lingering `.text` --- tests/test_extract_text.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_extract_text.py b/tests/test_extract_text.py index 048a636a..f8f01eaa 100644 --- a/tests/test_extract_text.py +++ b/tests/test_extract_text.py @@ -72,7 +72,7 @@ def handler(request: httpx.Request) -> httpx.Response: assert seen == {"post": 1} assert isinstance(response, ExtractTextResponse) - assert response.text == "Example extracted text" + assert response.full_text == "Example extracted text" assert response.input_id == input_file.id assert response.output_id is None assert response.output_url is None @@ -164,5 +164,5 @@ def handler(request: httpx.Request) -> httpx.Response: assert seen == {"post": 1} assert isinstance(response, ExtractTextResponse) - assert response.text == "Async text" + assert response.full_text == "Async text" assert response.input_id == input_file.id From 1bed8c0c7bb880ec322f49d8f2b400527c803c34 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 18:14:59 -0600 Subject: [PATCH 29/61] Convert to Markdown: Excise `output_format` completely Assisted-by: Codex --- src/pdfrest/client.py | 4 ---- src/pdfrest/models/_internal.py | 4 ---- tests/test_convert_to_markdown.py | 3 --- 3 files changed, 11 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 408d567a..fcdd894c 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -2176,7 +2176,6 @@ def convert_to_markdown( *, pages: PdfPageSelection | None = None, output_type: SummaryOutputType = "json", - output_format: SummaryOutputFormat = "markdown", page_break_comments: Literal["on", "off"] | None = None, output: str | None = None, extra_query: Query | None = None, @@ -2189,7 +2188,6 @@ def convert_to_markdown( payload: dict[str, Any] = { "files": file, "output_type": output_type, - "output_format": output_format, } if pages is not None: payload["pages"] = pages @@ -3127,7 +3125,6 @@ async def convert_to_markdown( *, pages: PdfPageSelection | None = None, output_type: SummaryOutputType = "json", - output_format: SummaryOutputFormat = "markdown", page_break_comments: Literal["on", "off"] | None = None, output: str | None = None, extra_query: Query | None = None, @@ -3140,7 +3137,6 @@ async def convert_to_markdown( payload: dict[str, Any] = { "files": file, "output_type": output_type, - "output_format": output_format, } if pages is not None: payload["pages"] = pages diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index eba57bed..7475d18c 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -402,10 +402,6 @@ class ConvertToMarkdownPayload(BaseModel): output_type: Annotated[ SummaryOutputType, Field(serialization_alias="output_type", default="json") ] = "json" - output_format: Annotated[ - SummaryOutputFormat, - Field(serialization_alias="output_format", default="markdown"), - ] = "markdown" page_break_comments: Annotated[ Literal["on", "off"] | None, Field(serialization_alias="page_break_comments", default=None), diff --git a/tests/test_convert_to_markdown.py b/tests/test_convert_to_markdown.py index 88c9135a..2ca6d219 100644 --- a/tests/test_convert_to_markdown.py +++ b/tests/test_convert_to_markdown.py @@ -57,7 +57,6 @@ def test_convert_to_markdown_json_success(monkeypatch: pytest.MonkeyPatch) -> No "pages": ["1-3"], "output": "md", "output_type": "json", - "output_format": "markdown", "page_break_comments": "on", } ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) @@ -87,7 +86,6 @@ def handler(request: httpx.Request) -> httpx.Response: pages=["1-3"], output="md", output_type="json", - output_format="markdown", page_break_comments="on", ) @@ -108,7 +106,6 @@ def test_convert_to_markdown_request_customization( { "files": [input_file], "output_type": "file", - "output_format": "markdown", "page_break_comments": "off", } ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) From c0d1750cce43dc4f51979e2c99dc03635115ce9f Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 18:29:31 -0600 Subject: [PATCH 30/61] Extract Text test: Fix expected fields Assisted-by: Codex --- tests/test_extract_text.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/tests/test_extract_text.py b/tests/test_extract_text.py index f8f01eaa..242be3ac 100644 --- a/tests/test_extract_text.py +++ b/tests/test_extract_text.py @@ -42,7 +42,16 @@ def test_extract_text_json_success(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(1)) payload_dump = ExtractTextPayload.model_validate( - {"files": [input_file], "pages": ["1-3"], "output": "text"} + { + "files": [input_file], + "pages": ["1-3"], + "output": "text", + "full_text": "document", + "preserve_line_breaks": "off", + "word_style": "off", + "word_coordinates": "off", + "output_type": "json", + } ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) seen: dict[str, int] = {"post": 0} @@ -55,7 +64,7 @@ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response( 200, json={ - "text": "Example extracted text", + "fullText": "Example extracted text", "inputId": str(input_file.id), }, ) @@ -84,7 +93,15 @@ def test_extract_text_request_customization( monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(1)) payload_dump = ExtractTextPayload.model_validate( - {"files": [input_file], "output": "file-output"} + { + "files": [input_file], + "output": "file-output", + "full_text": "document", + "preserve_line_breaks": "off", + "word_style": "off", + "word_coordinates": "off", + "output_type": "json", + } ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) output_id = str(PdfRestFileID.generate()) captured_timeout: dict[str, float | dict[str, float] | None] = {} @@ -138,7 +155,14 @@ async def test_async_extract_text_success( monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(2)) payload_dump = ExtractTextPayload.model_validate( - {"files": [input_file]} + { + "files": [input_file], + "full_text": "document", + "preserve_line_breaks": "off", + "word_style": "off", + "word_coordinates": "off", + "output_type": "json", + } ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) seen: dict[str, int] = {"post": 0} @@ -150,10 +174,7 @@ def handler(request: httpx.Request) -> httpx.Response: assert payload == payload_dump return httpx.Response( 200, - json={ - "text": "Async text", - "inputId": str(input_file.id), - }, + json={"fullText": "Async text", "inputId": str(input_file.id)}, ) msg = f"Unexpected request {request.method} {request.url}" raise AssertionError(msg) From 431642156dd2de0e2bf08d92d830760c4cd39991 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 18:42:24 -0600 Subject: [PATCH 31/61] Translate PDF test: Fix expected translated text field Assisted-by: Codex --- src/pdfrest/models/public.py | 2 ++ tests/test_translate_pdf_text.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index 882d1b1b..76c3416e 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -361,6 +361,8 @@ class TranslatePdfTextResponse(BaseModel): translated_text: Annotated[ str | None, Field( + alias="translated_text", + validation_alias=AliasChoices("translated_text", "translatedText"), description="Inline translation content when output_type is json.", default=None, ), diff --git a/tests/test_translate_pdf_text.py b/tests/test_translate_pdf_text.py index ce9c8078..6769f5c5 100644 --- a/tests/test_translate_pdf_text.py +++ b/tests/test_translate_pdf_text.py @@ -84,7 +84,7 @@ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response( 200, json={ - "translation": "Bonjour", + "translated_text": "Bonjour", "inputId": str(input_file.id), }, ) @@ -189,7 +189,7 @@ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response( 200, json={ - "translation": "Hallo", + "translated_text": "Hallo", "inputId": str(input_file.id), }, ) From ab2e6d831b5b98fbbbb33f828a187a2bcae2f551 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 19 Dec 2025 18:44:14 -0600 Subject: [PATCH 32/61] Translate PDF test: Fix unexpected GET Assisted-by: Codex --- tests/test_translate_pdf_text.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_translate_pdf_text.py b/tests/test_translate_pdf_text.py index 6769f5c5..47df9ba6 100644 --- a/tests/test_translate_pdf_text.py +++ b/tests/test_translate_pdf_text.py @@ -142,6 +142,16 @@ def handler(request: httpx.Request) -> httpx.Response: "inputId": str(input_file.id), }, ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=_make_markdown_file(output_id).model_dump( + mode="json", by_alias=True + ), + ) msg = f"Unexpected request {request.method} {request.url}" raise AssertionError(msg) From 97821c4b7c4bb188a3b92bc446836251fd214045 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Tue, 6 Jan 2026 15:17:31 -0600 Subject: [PATCH 33/61] Split Summarize PDF method by response file type - Add additional method to summarize to output file - Use PdfRestFileBasedResponse for summarize to file Assisted-by: Codex --- src/pdfrest/client.py | 104 +++++++++++++-- src/pdfrest/models/__init__.py | 4 +- src/pdfrest/models/public.py | 4 +- tests/live/test_live_summarize_pdf_text.py | 25 +++- tests/test_summarize_pdf_text.py | 147 +++++++++++++++++++-- 5 files changed, 256 insertions(+), 28 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index fcdd894c..59c83897 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -60,9 +60,9 @@ translate_httpx_error, ) from .models import ( - PdfRestDeletionResponse, ConvertToMarkdownResponse, ExtractTextResponse, + PdfRestDeletionResponse, PdfRestErrorResponse, PdfRestFile, PdfRestFileBasedResponse, @@ -78,14 +78,14 @@ from .models._internal import ( BasePdfRestGraphicPayload, BmpPdfRestPayload, - DeletePayload, ConvertToMarkdownPayload, + DeletePayload, ExtractImagesPayload, ExtractTextPayload, GifPdfRestPayload, JpegPdfRestPayload, - PdfCompressPayload, OcrPdfPayload, + PdfCompressPayload, PdfFlattenAnnotationsPayload, PdfFlattenFormsPayload, PdfFlattenTransparenciesPayload, @@ -2134,21 +2134,24 @@ def summarize_pdf_text( summary_format: SummaryFormat = "overview", pages: PdfPageSelection | None = None, output_format: SummaryOutputFormat = "markdown", - output_type: SummaryOutputType = "json", output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, ) -> SummarizePdfTextResponse: - """Summarize the textual content of a PDF, Markdown, or text document.""" + """Summarize the textual content of a PDF, Markdown, or text document. + + Always requests JSON output and returns the inline summary response defined in + the pdfRest API reference. + """ payload: dict[str, Any] = { "files": file, "target_word_count": target_word_count, "summary_format": summary_format, "output_format": output_format, - "output_type": output_type, + "output_type": "json", } if pages is not None: payload["pages"] = pages @@ -2170,6 +2173,44 @@ def summarize_pdf_text( raw_payload = self._send_request(request) return SummarizePdfTextResponse.model_validate(raw_payload) + def summarize_pdf_text_to_file( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + target_word_count: int | None = 400, + summary_format: SummaryFormat = "overview", + pages: PdfPageSelection | None = None, + output_format: SummaryOutputFormat = "markdown", + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Summarize a document and return the result as a downloadable file.""" + + payload: dict[str, Any] = { + "files": file, + "target_word_count": target_word_count, + "summary_format": summary_format, + "output_format": output_format, + "output_type": "file", + } + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/summarized-pdf-text", + payload=payload, + payload_model=SummarizePdfTextPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def convert_to_markdown( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -2668,7 +2709,6 @@ def compress_pdf( extra_body=extra_body, timeout=timeout, ) - def flatten_transparencies( self, @@ -3083,21 +3123,24 @@ async def summarize_pdf_text( summary_format: SummaryFormat = "overview", pages: PdfPageSelection | None = None, output_format: SummaryOutputFormat = "markdown", - output_type: SummaryOutputType = "json", output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, ) -> SummarizePdfTextResponse: - """Summarize the textual content of a PDF, Markdown, or text document.""" + """Summarize the textual content of a PDF, Markdown, or text document. + + Always requests JSON output and returns the inline summary response defined in + the pdfRest API reference. + """ payload: dict[str, Any] = { "files": file, "target_word_count": target_word_count, "summary_format": summary_format, "output_format": output_format, - "output_type": output_type, + "output_type": "json", } if pages is not None: payload["pages"] = pages @@ -3119,6 +3162,44 @@ async def summarize_pdf_text( raw_payload = await self._send_request(request) return SummarizePdfTextResponse.model_validate(raw_payload) + async def summarize_pdf_text_to_file( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + target_word_count: int | None = 400, + summary_format: SummaryFormat = "overview", + pages: PdfPageSelection | None = None, + output_format: SummaryOutputFormat = "markdown", + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Summarize a document and return the result as a downloadable file.""" + + payload: dict[str, Any] = { + "files": file, + "target_word_count": target_word_count, + "summary_format": summary_format, + "output_format": output_format, + "output_type": "file", + } + if pages is not None: + payload["pages"] = pages + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/summarized-pdf-text", + payload=payload, + payload_model=SummarizePdfTextPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def convert_to_markdown( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -3659,7 +3740,6 @@ async def compress_pdf( extra_body=extra_body, timeout=timeout, ) - async def flatten_transparencies( self, @@ -3687,7 +3767,7 @@ async def flatten_transparencies( extra_body=extra_body, timeout=timeout, ) - + async def linearize_pdf( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/__init__.py b/src/pdfrest/models/__init__.py index 6ab74f89..eb9ee359 100644 --- a/src/pdfrest/models/__init__.py +++ b/src/pdfrest/models/__init__.py @@ -1,7 +1,7 @@ from .public import ( - PdfRestDeletionResponse, ConvertToMarkdownResponse, ExtractTextResponse, + PdfRestDeletionResponse, PdfRestErrorResponse, PdfRestFile, PdfRestFileBasedResponse, @@ -13,9 +13,9 @@ ) __all__ = [ - "PdfRestDeletionResponse", "ConvertToMarkdownResponse", "ExtractTextResponse", + "PdfRestDeletionResponse", "PdfRestErrorResponse", "PdfRestFile", "PdfRestFileBasedResponse", diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index 76c3416e..dd5c916b 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -20,9 +20,9 @@ from typing_extensions import override __all__ = ( - "PdfRestDeletionResponse", "ConvertToMarkdownResponse", "ExtractTextResponse", + "PdfRestDeletionResponse", "PdfRestErrorResponse", "PdfRestFile", "PdfRestFileBasedResponse", @@ -314,6 +314,8 @@ class PdfRestDeletionResponse(BaseModel): min_length=1, ), ] + + class SummarizePdfTextResponse(BaseModel): """Response returned by the summarize-pdf-text tool.""" diff --git a/tests/live/test_live_summarize_pdf_text.py b/tests/live/test_live_summarize_pdf_text.py index 25d287b0..27920fd8 100644 --- a/tests/live/test_live_summarize_pdf_text.py +++ b/tests/live/test_live_summarize_pdf_text.py @@ -3,7 +3,7 @@ import pytest from pdfrest import PdfRestApiError, PdfRestClient -from pdfrest.models import SummarizePdfTextResponse +from pdfrest.models import PdfRestFileBasedResponse, SummarizePdfTextResponse from ..resources import get_test_resource_path @@ -21,7 +21,6 @@ def test_live_summarize_pdf_text_success( response = client.summarize_pdf_text( uploaded, target_word_count=40, - output_type="json", summary_format="overview", ) @@ -30,6 +29,28 @@ def test_live_summarize_pdf_text_success( assert response.input_id == uploaded.id +def test_live_summarize_pdf_text_to_file_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = client.files.create_from_paths([resource])[0] + response = client.summarize_pdf_text_to_file( + uploaded, + target_word_count=40, + summary_format="overview", + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_files + assert response.output_file.id + assert response.input_id == uploaded.id + + def test_live_summarize_pdf_text_invalid_format( pdfrest_api_key: str, pdfrest_live_base_url: str, diff --git a/tests/test_summarize_pdf_text.py b/tests/test_summarize_pdf_text.py index 99f481f9..cbcf490b 100644 --- a/tests/test_summarize_pdf_text.py +++ b/tests/test_summarize_pdf_text.py @@ -7,10 +7,20 @@ from pydantic import ValidationError from pdfrest import AsyncPdfRestClient, PdfRestClient -from pdfrest.models import PdfRestFile, PdfRestFileID, SummarizePdfTextResponse +from pdfrest.models import ( + PdfRestFile, + PdfRestFileBasedResponse, + PdfRestFileID, + SummarizePdfTextResponse, +) from pdfrest.models._internal import SummarizePdfTextPayload -from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, make_pdf_file +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) def _make_text_file(file_id: str) -> PdfRestFile: @@ -96,7 +106,6 @@ def handler(request: httpx.Request) -> httpx.Response: summary_format="bullet_points", pages=["1-3"], output_format="plaintext", - output_type="json", output="summary", ) @@ -108,7 +117,66 @@ def handler(request: httpx.Request) -> httpx.Response: assert response.output_url is None -def test_summarize_pdf_text_request_customization( +def test_summarize_pdf_text_to_file_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = _make_text_file(str(PdfRestFileID.generate(1))) + payload_dump = SummarizePdfTextPayload.model_validate( + { + "files": [input_file], + "target_word_count": 200, + "summary_format": "bullet_points", + "pages": ["2-last"], + "output_format": "plaintext", + "output_type": "file", + "output": "summary", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + output_id = str(PdfRestFileID.generate()) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/summarized-pdf-text": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "outputId": output_id, + "inputId": str(input_file.id), + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + return httpx.Response( + 200, + json=build_file_info_payload(output_id, "summary.txt", "text/plain"), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.summarize_pdf_text_to_file( + input_file, + target_word_count=200, + summary_format="bullet_points", + pages=["2-last"], + output_format="plaintext", + output="summary", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.id == output_id + assert response.output_file.name == "summary.txt" + assert response.input_id == input_file.id + + +def test_summarize_pdf_text_to_file_request_customization( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) @@ -124,9 +192,11 @@ def test_summarize_pdf_text_request_customization( output_id = str(PdfRestFileID.generate()) captured_timeout: dict[str, float | dict[str, float] | None] = {} + seen: dict[str, int] = {"post": 0, "get": 0} def handler(request: httpx.Request) -> httpx.Response: if request.method == "POST" and request.url.path == "/summarized-pdf-text": + seen["post"] += 1 assert request.url.params["trace"] == "true" assert request.headers["X-Debug"] == "sync" captured_timeout["value"] = request.extensions.get("timeout") @@ -137,28 +207,36 @@ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response( 200, json={ - "outputUrl": f"https://api.pdfrest.com/resource/{output_id}?format=file", "outputId": output_id, "inputId": str(input_file.id), }, ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload(output_id, "summary.txt", "text/plain"), + ) msg = f"Unexpected request {request.method} {request.url}" raise AssertionError(msg) transport = httpx.MockTransport(handler) with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: - response = client.summarize_pdf_text( + response = client.summarize_pdf_text_to_file( input_file, - output_type="file", extra_query={"trace": "true"}, extra_headers={"X-Debug": "sync"}, extra_body={"debug": True}, timeout=0.25, ) - assert isinstance(response, SummarizePdfTextResponse) - assert response.output_id == output_id - assert response.output_url + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.id == output_id + assert response.output_file.name == "summary.txt" timeout_value = captured_timeout["value"] assert timeout_value is not None if isinstance(timeout_value, dict): @@ -199,9 +277,56 @@ def handler(request: httpx.Request) -> httpx.Response: transport = httpx.MockTransport(handler) async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: - response = await client.summarize_pdf_text(input_file, output_type="json") + response = await client.summarize_pdf_text(input_file) assert seen == {"post": 1} assert isinstance(response, SummarizePdfTextResponse) assert response.summary == "Async summary" assert response.input_id == input_file.id + + +@pytest.mark.asyncio +async def test_async_summarize_pdf_text_to_file_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + payload_dump = SummarizePdfTextPayload.model_validate( + {"files": [input_file], "output_type": "file"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + output_id = str(PdfRestFileID.generate()) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/summarized-pdf-text": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + for key, value in payload_dump.items(): + assert payload[key] == value + return httpx.Response( + 200, + json={ + "outputId": output_id, + "inputId": str(input_file.id), + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "async-summary.txt", "text/plain" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.summarize_pdf_text_to_file(input_file) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.id == output_id + assert response.input_id == input_file.id From 6f080ec2565651c88c8ebd6b597160b590701a4a Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Tue, 6 Jan 2026 16:41:38 -0600 Subject: [PATCH 34/61] Translate PDF: Improve response types - `TranslatePdfTextResponse` gets missing `source_languages` and `output_language` fields. - Adds `TranslatePdfTextFileResponse`. This inherits from `PdfRestFileBasedResponse` with additional Translate PDF fields. - Add `FileBasedResponse` TypeVar bound to `PdfRestFileBasedResponse` so classes like `TranslatePdfTextFileResponse` can reuse file fetch logic. Assisted-by: Codex --- src/pdfrest/client.py | 59 +++++++++++++--------- src/pdfrest/models/__init__.py | 2 + src/pdfrest/models/public.py | 44 ++++++++++++++++ tests/live/test_live_translate_pdf_text.py | 11 +++- tests/test_translate_pdf_text.py | 16 +++++- 5 files changed, 103 insertions(+), 29 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 59c83897..f2a09726 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -69,12 +69,10 @@ PdfRestFileID, PdfRestInfoResponse, SummarizePdfTextResponse, + TranslatePdfTextFileResponse, TranslatePdfTextResponse, UpResponse, ) - -__all__ = ("AsyncPdfRestClient", "PdfRestClient") - from .models._internal import ( BasePdfRestGraphicPayload, BmpPdfRestPayload, @@ -122,6 +120,9 @@ TranslateOutputFormat, ) +__all__ = ("AsyncPdfRestClient", "PdfRestClient") +FileResponseModel = TypeVar("FileResponseModel", bound=PdfRestFileBasedResponse) + DEFAULT_BASE_URL = "https://api.pdfrest.com" API_KEY_ENV_VAR = "PDFREST_API_KEY" API_KEY_HEADER_NAME = "Api-Key" @@ -986,11 +987,12 @@ def _post_file_operation( endpoint: str, payload: dict[str, Any], payload_model: type[BaseModel], + response_model: type[FileResponseModel] = PdfRestFileBasedResponse, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, - ) -> PdfRestFileBasedResponse: + ) -> FileResponseModel: job_options = payload_model.model_validate(payload) json_body = job_options.model_dump( mode="json", by_alias=True, exclude_none=True, exclude_unset=True @@ -1018,15 +1020,17 @@ def _post_file_operation( for file_id in output_ids ] - return PdfRestFileBasedResponse.model_validate( - { - "input_id": [str(file_id) for file_id in raw_response.input_id], - "output_file": [ - file.model_dump(mode="json", by_alias=True) for file in output_files - ], - "warning": raw_response.warning, - } - ) + response_payload: dict[str, Any] = { + "input_id": [str(file_id) for file_id in raw_response.input_id], + "output_file": [ + file.model_dump(mode="json", by_alias=True) for file in output_files + ], + "warning": raw_response.warning, + } + if raw_response.model_extra: + response_payload.update(raw_response.model_extra) + + return response_model.model_validate(response_payload) def send_request(self, request: _RequestModel) -> Any: return self._send_request(request) @@ -1250,11 +1254,12 @@ async def _post_file_operation( endpoint: str, payload: dict[str, Any], payload_model: type[BaseModel], + response_model: type[FileResponseModel] = PdfRestFileBasedResponse, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, - ) -> PdfRestFileBasedResponse: + ) -> FileResponseModel: job_options = payload_model.model_validate(payload) request = self.prepare_request( "POST", @@ -1290,15 +1295,17 @@ async def throttled_fetch_file_info(file_id: str) -> PdfRestFile: ) ) - return PdfRestFileBasedResponse.model_validate( - { - "input_id": [str(file_id) for file_id in raw_response.input_id], - "output_file": [ - file.model_dump(mode="json", by_alias=True) for file in output_files - ], - "warning": raw_response.warning, - } - ) + response_payload: dict[str, Any] = { + "input_id": [str(file_id) for file_id in raw_response.input_id], + "output_file": [ + file.model_dump(mode="json", by_alias=True) for file in output_files + ], + "warning": raw_response.warning, + } + if raw_response.model_extra: + response_payload.update(raw_response.model_extra) + + return response_model.model_validate(response_payload) async def send_request(self, request: _RequestModel) -> Any: return await self._send_request(request) @@ -2334,7 +2341,7 @@ def translate_pdf_text_to_file( extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, - ) -> PdfRestFileBasedResponse: + ) -> TranslatePdfTextFileResponse: """Translate textual content and receive a file-based response.""" payload: dict[str, Any] = { @@ -2356,6 +2363,7 @@ def translate_pdf_text_to_file( extra_headers=extra_headers, extra_body=extra_body, timeout=timeout, + response_model=TranslatePdfTextFileResponse, ) def extract_images( @@ -3323,7 +3331,7 @@ async def translate_pdf_text_to_file( extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, - ) -> PdfRestFileBasedResponse: + ) -> TranslatePdfTextFileResponse: """Translate textual content and receive a file-based response.""" payload: dict[str, Any] = { @@ -3345,6 +3353,7 @@ async def translate_pdf_text_to_file( extra_headers=extra_headers, extra_body=extra_body, timeout=timeout, + response_model=TranslatePdfTextFileResponse, ) async def extract_images( diff --git a/src/pdfrest/models/__init__.py b/src/pdfrest/models/__init__.py index eb9ee359..755bbaf7 100644 --- a/src/pdfrest/models/__init__.py +++ b/src/pdfrest/models/__init__.py @@ -8,6 +8,7 @@ PdfRestFileID, PdfRestInfoResponse, SummarizePdfTextResponse, + TranslatePdfTextFileResponse, TranslatePdfTextResponse, UpResponse, ) @@ -22,6 +23,7 @@ "PdfRestFileID", "PdfRestInfoResponse", "SummarizePdfTextResponse", + "TranslatePdfTextFileResponse", "TranslatePdfTextResponse", "UpResponse", ] diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index dd5c916b..aafe55f4 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -29,6 +29,7 @@ "PdfRestFileID", "PdfRestInfoResponse", "SummarizePdfTextResponse", + "TranslatePdfTextFileResponse", "TranslatePdfTextResponse", "UpResponse", ) @@ -360,6 +361,24 @@ class TranslatePdfTextResponse(BaseModel): model_config = ConfigDict(extra="allow") + source_languages: Annotated[ + list[str] | None, + Field( + alias="source_languages", + validation_alias=AliasChoices("source_languages", "sourceLanguages"), + description="Languages detected in the source content.", + default=None, + ), + ] = None + output_language: Annotated[ + str | None, + Field( + alias="output_language", + validation_alias=AliasChoices("output_language", "outputLanguage"), + description="Target language used for the translation.", + default=None, + ), + ] = None translated_text: Annotated[ str | None, Field( @@ -396,6 +415,31 @@ class TranslatePdfTextResponse(BaseModel): ] = None +class TranslatePdfTextFileResponse(PdfRestFileBasedResponse): + """File-based response returned by the translated-pdf-text tool.""" + + model_config = ConfigDict(extra="allow") + + source_languages: Annotated[ + list[str] | None, + Field( + alias="source_languages", + validation_alias=AliasChoices("source_languages", "sourceLanguages"), + description="Languages detected in the source content.", + default=None, + ), + ] = None + output_language: Annotated[ + str | None, + Field( + alias="output_language", + validation_alias=AliasChoices("output_language", "outputLanguage"), + description="Target language used for the translation.", + default=None, + ), + ] = None + + class ExtractTextResponse(BaseModel): """Response returned by the extracted-text tool.""" diff --git a/tests/live/test_live_translate_pdf_text.py b/tests/live/test_live_translate_pdf_text.py index c254fd5e..8c82039d 100644 --- a/tests/live/test_live_translate_pdf_text.py +++ b/tests/live/test_live_translate_pdf_text.py @@ -3,7 +3,10 @@ import pytest from pdfrest import PdfRestApiError, PdfRestClient -from pdfrest.models import PdfRestFileBasedResponse, TranslatePdfTextResponse +from pdfrest.models import ( + TranslatePdfTextFileResponse, + TranslatePdfTextResponse, +) from ..resources import get_test_resource_path @@ -26,6 +29,8 @@ def test_live_translate_pdf_text_success( assert isinstance(response, TranslatePdfTextResponse) assert response.translated_text + assert response.output_language == "fr" + assert response.source_languages assert response.input_id == uploaded.id @@ -63,6 +68,8 @@ def test_live_translate_pdf_text_file_success( output_format="plaintext", ) - assert isinstance(response, PdfRestFileBasedResponse) + assert isinstance(response, TranslatePdfTextFileResponse) assert response.output_files + assert response.output_language == "fr" + assert response.source_languages assert response.input_id == uploaded.id diff --git a/tests/test_translate_pdf_text.py b/tests/test_translate_pdf_text.py index 47df9ba6..c26c4c0c 100644 --- a/tests/test_translate_pdf_text.py +++ b/tests/test_translate_pdf_text.py @@ -9,8 +9,8 @@ from pdfrest import AsyncPdfRestClient, PdfRestClient from pdfrest.models import ( PdfRestFile, - PdfRestFileBasedResponse, PdfRestFileID, + TranslatePdfTextFileResponse, TranslatePdfTextResponse, ) from pdfrest.models._internal import TranslatePdfTextPayload @@ -86,6 +86,8 @@ def handler(request: httpx.Request) -> httpx.Response: json={ "translated_text": "Bonjour", "inputId": str(input_file.id), + "source_languages": ["en"], + "output_language": "fr", }, ) msg = f"Unexpected request {request.method} {request.url}" @@ -104,6 +106,8 @@ def handler(request: httpx.Request) -> httpx.Response: assert seen == {"post": 1} assert isinstance(response, TranslatePdfTextResponse) assert response.translated_text == "Bonjour" + assert response.source_languages == ["en"] + assert response.output_language == "fr" assert response.input_id == input_file.id assert response.output_id is None assert response.output_url is None @@ -140,6 +144,8 @@ def handler(request: httpx.Request) -> httpx.Response: "outputUrl": f"https://api.pdfrest.com/resource/{output_id}?format=file", "outputId": output_id, "inputId": str(input_file.id), + "source_languages": ["en"], + "output_language": "es", }, ) if request.method == "GET" and request.url.path == f"/resource/{output_id}": @@ -166,8 +172,10 @@ def handler(request: httpx.Request) -> httpx.Response: timeout=0.3, ) - assert isinstance(response, PdfRestFileBasedResponse) + assert isinstance(response, TranslatePdfTextFileResponse) assert response.output_file.id == output_id + assert response.output_language == "es" + assert response.source_languages == ["en"] timeout_value = captured_timeout["value"] assert timeout_value is not None if isinstance(timeout_value, dict): @@ -201,6 +209,8 @@ def handler(request: httpx.Request) -> httpx.Response: json={ "translated_text": "Hallo", "inputId": str(input_file.id), + "source_languages": ["en"], + "output_language": "de", }, ) msg = f"Unexpected request {request.method} {request.url}" @@ -216,4 +226,6 @@ def handler(request: httpx.Request) -> httpx.Response: assert seen == {"post": 1} assert isinstance(response, TranslatePdfTextResponse) assert response.translated_text == "Hallo" + assert response.source_languages == ["en"] + assert response.output_language == "de" assert response.input_id == input_file.id From ce5d12673f04d219003fe598e8cdfe65c131f614 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Tue, 6 Jan 2026 17:01:44 -0600 Subject: [PATCH 35/61] Revise and rename `extract_text` to `extract_pdf_text_to_file` - Fix `output_type` to "file" - Use `PdfRestFileBasedResponse` - Remove `ExtractTextResponse` Assisted-by: Codex --- src/pdfrest/client.py | 69 +++++----- ... => test_live_extract_pdf_text_to_file.py} | 16 +-- ...xt.py => test_extract_pdf_text_to_file.py} | 127 ++++++++++++------ 3 files changed, 126 insertions(+), 86 deletions(-) rename tests/live/{test_live_extract_text.py => test_live_extract_pdf_text_to_file.py} (75%) rename tests/{test_extract_text.py => test_extract_pdf_text_to_file.py} (56%) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index f2a09726..9a312d55 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -61,7 +61,6 @@ ) from .models import ( ConvertToMarkdownResponse, - ExtractTextResponse, PdfRestDeletionResponse, PdfRestErrorResponse, PdfRestFile, @@ -2395,7 +2394,7 @@ def extract_images( timeout=timeout, ) - def extract_text( + def extract_pdf_text_to_file( self, file: PdfRestFile | Sequence[PdfRestFile], *, @@ -2404,40 +2403,36 @@ def extract_text( preserve_line_breaks: Literal["off", "on"] = "off", word_style: Literal["off", "on"] = "off", word_coordinates: Literal["off", "on"] = "off", - output_type: Literal["json", "file"] = "json", output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, - ) -> ExtractTextResponse: - """Extract text content from a PDF.""" + ) -> PdfRestFileBasedResponse: + """Extract text content from a PDF and return a file-based response.""" - payload: dict[str, Any] = {"files": file} + payload: dict[str, Any] = { + "files": file, + "full_text": full_text, + "preserve_line_breaks": preserve_line_breaks, + "word_style": word_style, + "word_coordinates": word_coordinates, + "output_type": "file", + } if pages is not None: payload["pages"] = pages - payload["full_text"] = full_text - payload["preserve_line_breaks"] = preserve_line_breaks - payload["word_style"] = word_style - payload["word_coordinates"] = word_coordinates - payload["output_type"] = output_type if output is not None: payload["output"] = output - validated_payload = ExtractTextPayload.model_validate(payload) - request = self.prepare_request( - "POST", - "/extracted-text", - json_body=validated_payload.model_dump( - mode="json", by_alias=True, exclude_none=True, exclude_unset=True - ), + return self._post_file_operation( + endpoint="/extracted-text", + payload=payload, + payload_model=ExtractTextPayload, extra_query=extra_query, extra_headers=extra_headers, extra_body=extra_body, timeout=timeout, ) - raw_payload = self._send_request(request) - return ExtractTextResponse.model_validate(raw_payload) def preview_redactions( self, @@ -3385,7 +3380,7 @@ async def extract_images( timeout=timeout, ) - async def extract_text( + async def extract_pdf_text_to_file( self, file: PdfRestFile | Sequence[PdfRestFile], *, @@ -3394,40 +3389,36 @@ async def extract_text( preserve_line_breaks: Literal["off", "on"] = "off", word_style: Literal["off", "on"] = "off", word_coordinates: Literal["off", "on"] = "off", - output_type: Literal["json", "file"] = "json", output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, - ) -> ExtractTextResponse: - """Extract text content from a PDF.""" + ) -> PdfRestFileBasedResponse: + """Extract text content from a PDF and return a file-based response.""" - payload: dict[str, Any] = {"files": file} + payload: dict[str, Any] = { + "files": file, + "full_text": full_text, + "preserve_line_breaks": preserve_line_breaks, + "word_style": word_style, + "word_coordinates": word_coordinates, + "output_type": "file", + } if pages is not None: payload["pages"] = pages - payload["full_text"] = full_text - payload["preserve_line_breaks"] = preserve_line_breaks - payload["word_style"] = word_style - payload["word_coordinates"] = word_coordinates - payload["output_type"] = output_type if output is not None: payload["output"] = output - validated_payload = ExtractTextPayload.model_validate(payload) - request = self.prepare_request( - "POST", - "/extracted-text", - json_body=validated_payload.model_dump( - mode="json", by_alias=True, exclude_none=True, exclude_unset=True - ), + return await self._post_file_operation( + endpoint="/extracted-text", + payload=payload, + payload_model=ExtractTextPayload, extra_query=extra_query, extra_headers=extra_headers, extra_body=extra_body, timeout=timeout, ) - raw_payload = await self._send_request(request) - return ExtractTextResponse.model_validate(raw_payload) async def preview_redactions( self, diff --git a/tests/live/test_live_extract_text.py b/tests/live/test_live_extract_pdf_text_to_file.py similarity index 75% rename from tests/live/test_live_extract_text.py rename to tests/live/test_live_extract_pdf_text_to_file.py index bfbeb2cc..a96167f4 100644 --- a/tests/live/test_live_extract_text.py +++ b/tests/live/test_live_extract_pdf_text_to_file.py @@ -3,12 +3,12 @@ import pytest from pdfrest import PdfRestApiError, PdfRestClient -from pdfrest.models import ExtractTextResponse +from pdfrest.models import PdfRestFileBasedResponse from ..resources import get_test_resource_path -def test_live_extract_text_success( +def test_live_extract_pdf_text_to_file_success( pdfrest_api_key: str, pdfrest_live_base_url: str, ) -> None: @@ -18,21 +18,20 @@ def test_live_extract_text_success( base_url=pdfrest_live_base_url, ) as client: uploaded = client.files.create_from_paths([resource])[0] - response = client.extract_text( + response = client.extract_pdf_text_to_file( uploaded, - output_type="json", full_text="document", preserve_line_breaks="on", word_style="off", word_coordinates="off", ) - assert isinstance(response, ExtractTextResponse) - assert response.full_text + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_files assert response.input_id == uploaded.id -def test_live_extract_text_invalid_pages( +def test_live_extract_pdf_text_to_file_invalid_pages( pdfrest_api_key: str, pdfrest_live_base_url: str, ) -> None: @@ -43,8 +42,7 @@ def test_live_extract_text_invalid_pages( ) as client: uploaded = client.files.create_from_paths([resource])[0] with pytest.raises(PdfRestApiError): - client.extract_text( + client.extract_pdf_text_to_file( uploaded, extra_body={"pages": "last-1"}, - output_type="json", ) diff --git a/tests/test_extract_text.py b/tests/test_extract_pdf_text_to_file.py similarity index 56% rename from tests/test_extract_text.py rename to tests/test_extract_pdf_text_to_file.py index 242be3ac..a2ad457c 100644 --- a/tests/test_extract_text.py +++ b/tests/test_extract_pdf_text_to_file.py @@ -7,13 +7,27 @@ from pydantic import ValidationError from pdfrest import AsyncPdfRestClient, PdfRestClient -from pdfrest.models import ExtractTextResponse, PdfRestFile, PdfRestFileID +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID from pdfrest.models._internal import ExtractTextPayload from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, make_pdf_file -def test_extract_text_payload_rejects_non_pdf() -> None: +def _make_text_file(file_id: str, name: str = "extracted.txt") -> PdfRestFile: + return PdfRestFile.model_validate( + { + "id": file_id, + "name": name, + "url": f"https://api.pdfrest.com/resource/{file_id}", + "type": "text/plain", + "size": 64, + "modified": "2024-01-01T00:00:00Z", + "scheduledDeletionTimeUtc": None, + } + ) + + +def test_extract_pdf_text_payload_rejects_non_pdf() -> None: file_id = str(PdfRestFileID.generate()) text_file = PdfRestFile.model_validate( { @@ -30,7 +44,7 @@ def test_extract_text_payload_rejects_non_pdf() -> None: ExtractTextPayload.model_validate({"files": [text_file]}) -def test_extract_text_payload_invalid_page_range() -> None: +def test_extract_pdf_text_payload_invalid_page_range() -> None: file_repr = make_pdf_file(PdfRestFileID.generate(1)) with pytest.raises( ValidationError, match="The start page must be less than or equal to the end" @@ -38,9 +52,10 @@ def test_extract_text_payload_invalid_page_range() -> None: ExtractTextPayload.model_validate({"files": [file_repr], "pages": ["5-2"]}) -def test_extract_text_json_success(monkeypatch: pytest.MonkeyPatch) -> None: +def test_extract_pdf_text_to_file_success(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) payload_dump = ExtractTextPayload.model_validate( { "files": [input_file], @@ -50,11 +65,11 @@ def test_extract_text_json_success(monkeypatch: pytest.MonkeyPatch) -> None: "preserve_line_breaks": "off", "word_style": "off", "word_coordinates": "off", - "output_type": "json", + "output_type": "file", } ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) - seen: dict[str, int] = {"post": 0} + seen: dict[str, int] = {"post": 0, "get": 0} def handler(request: httpx.Request) -> httpx.Response: if request.method == "POST" and request.url.path == "/extracted-text": @@ -64,34 +79,41 @@ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response( 200, json={ - "fullText": "Example extracted text", - "inputId": str(input_file.id), + "inputId": [str(input_file.id)], + "outputId": [output_id], }, ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=_make_text_file(output_id).model_dump(mode="json", by_alias=True), + ) msg = f"Unexpected request {request.method} {request.url}" raise AssertionError(msg) transport = httpx.MockTransport(handler) with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: - response = client.extract_text( + response = client.extract_pdf_text_to_file( input_file, pages=["1-3"], output="text", ) - assert seen == {"post": 1} - assert isinstance(response, ExtractTextResponse) - assert response.full_text == "Example extracted text" + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) assert response.input_id == input_file.id - assert response.output_id is None - assert response.output_url is None + assert len(response.output_files) == 1 + assert response.output_file.id == output_id -def test_extract_text_request_customization( +def test_extract_pdf_text_request_customization( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) payload_dump = ExtractTextPayload.model_validate( { "files": [input_file], @@ -100,33 +122,42 @@ def test_extract_text_request_customization( "preserve_line_breaks": "off", "word_style": "off", "word_coordinates": "off", - "output_type": "json", + "output_type": "file", } ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) - output_id = str(PdfRestFileID.generate()) captured_timeout: dict[str, float | dict[str, float] | None] = {} def handler(request: httpx.Request) -> httpx.Response: if request.method == "POST" and request.url.path == "/extracted-text": assert request.url.params["trace"] == "true" assert request.headers["X-Debug"] == "sync" - captured_timeout["value"] = request.extensions.get("timeout") + captured_timeout["post"] = request.extensions.get("timeout") payload = json.loads(request.content.decode("utf-8")) assert payload == payload_dump | {"debug": True} return httpx.Response( 200, json={ - "outputUrl": f"https://api.pdfrest.com/resource/{output_id}?format=file", - "outputId": output_id, - "inputId": str(input_file.id), + "inputId": [str(input_file.id)], + "outputId": [output_id], }, ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["get"] = request.extensions.get("timeout") + return httpx.Response( + 200, + json=_make_text_file(output_id, "debug.txt").model_dump( + mode="json", by_alias=True + ), + ) msg = f"Unexpected request {request.method} {request.url}" raise AssertionError(msg) transport = httpx.MockTransport(handler) with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: - response = client.extract_text( + response = client.extract_pdf_text_to_file( input_file, output="file-output", extra_query={"trace": "true"}, @@ -135,25 +166,33 @@ def handler(request: httpx.Request) -> httpx.Response: timeout=0.35, ) - assert isinstance(response, ExtractTextResponse) - assert response.output_id == output_id - assert response.output_url - timeout_value = captured_timeout["value"] - assert timeout_value is not None - if isinstance(timeout_value, dict): + assert isinstance(response, PdfRestFileBasedResponse) + assert len(response.output_files) == 1 + post_timeout = captured_timeout["post"] + get_timeout = captured_timeout["get"] + assert post_timeout is not None + assert get_timeout is not None + if isinstance(post_timeout, dict): assert all( - component == pytest.approx(0.35) for component in timeout_value.values() + component == pytest.approx(0.35) for component in post_timeout.values() ) else: - assert timeout_value == pytest.approx(0.35) + assert post_timeout == pytest.approx(0.35) + if isinstance(get_timeout, dict): + assert all( + component == pytest.approx(0.35) for component in get_timeout.values() + ) + else: + assert get_timeout == pytest.approx(0.35) @pytest.mark.asyncio -async def test_async_extract_text_success( +async def test_async_extract_pdf_text_to_file_success( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) payload_dump = ExtractTextPayload.model_validate( { "files": [input_file], @@ -161,11 +200,11 @@ async def test_async_extract_text_success( "preserve_line_breaks": "off", "word_style": "off", "word_coordinates": "off", - "output_type": "json", + "output_type": "file", } ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) - seen: dict[str, int] = {"post": 0} + seen: dict[str, int] = {"post": 0, "get": 0} def handler(request: httpx.Request) -> httpx.Response: if request.method == "POST" and request.url.path == "/extracted-text": @@ -174,16 +213,28 @@ def handler(request: httpx.Request) -> httpx.Response: assert payload == payload_dump return httpx.Response( 200, - json={"fullText": "Async text", "inputId": str(input_file.id)}, + json={ + "inputId": [str(input_file.id)], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=_make_text_file(output_id, "async.txt").model_dump( + mode="json", by_alias=True + ), ) msg = f"Unexpected request {request.method} {request.url}" raise AssertionError(msg) transport = httpx.MockTransport(handler) async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: - response = await client.extract_text(input_file) + response = await client.extract_pdf_text_to_file(input_file) - assert seen == {"post": 1} - assert isinstance(response, ExtractTextResponse) - assert response.full_text == "Async text" + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert len(response.output_files) == 1 assert response.input_id == input_file.id From 629047ddd9f8a6a2348e8f124a8ecc9f678b7b2a Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 7 Jan 2026 09:26:09 -0600 Subject: [PATCH 36/61] PDF to Markdown: Use `PdfRestFileBasedResponse` Assisted-by: Codex --- src/pdfrest/client.py | 42 +++---- tests/live/test_live_convert_to_markdown.py | 11 +- tests/test_convert_to_markdown.py | 118 ++++++++++++++------ 3 files changed, 102 insertions(+), 69 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 9a312d55..9c96c49b 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -60,7 +60,6 @@ translate_httpx_error, ) from .models import ( - ConvertToMarkdownResponse, PdfRestDeletionResponse, PdfRestErrorResponse, PdfRestFile, @@ -115,7 +114,6 @@ PdfXType, SummaryFormat, SummaryOutputFormat, - SummaryOutputType, TranslateOutputFormat, ) @@ -2222,19 +2220,18 @@ def convert_to_markdown( file: PdfRestFile | Sequence[PdfRestFile], *, pages: PdfPageSelection | None = None, - output_type: SummaryOutputType = "json", page_break_comments: Literal["on", "off"] | None = None, output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, - ) -> ConvertToMarkdownResponse: - """Convert a PDF to Markdown.""" + ) -> PdfRestFileBasedResponse: + """Convert a PDF to Markdown and return a file-based response.""" payload: dict[str, Any] = { "files": file, - "output_type": output_type, + "output_type": "file", } if pages is not None: payload["pages"] = pages @@ -2243,20 +2240,15 @@ def convert_to_markdown( if output is not None: payload["output"] = output - validated_payload = ConvertToMarkdownPayload.model_validate(payload) - request = self.prepare_request( - "POST", - "/markdown", - json_body=validated_payload.model_dump( - mode="json", by_alias=True, exclude_none=True, exclude_unset=True - ), + return self._post_file_operation( + endpoint="/markdown", + payload=payload, + payload_model=ConvertToMarkdownPayload, extra_query=extra_query, extra_headers=extra_headers, extra_body=extra_body, timeout=timeout, ) - raw_payload = self._send_request(request) - return ConvertToMarkdownResponse.model_validate(raw_payload) def ocr_pdf( self, @@ -3208,19 +3200,18 @@ async def convert_to_markdown( file: PdfRestFile | Sequence[PdfRestFile], *, pages: PdfPageSelection | None = None, - output_type: SummaryOutputType = "json", page_break_comments: Literal["on", "off"] | None = None, output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, timeout: TimeoutTypes | None = None, - ) -> ConvertToMarkdownResponse: - """Convert a PDF to Markdown.""" + ) -> PdfRestFileBasedResponse: + """Convert a PDF to Markdown and return a file-based response.""" payload: dict[str, Any] = { "files": file, - "output_type": output_type, + "output_type": "file", } if pages is not None: payload["pages"] = pages @@ -3229,20 +3220,15 @@ async def convert_to_markdown( if output is not None: payload["output"] = output - validated_payload = ConvertToMarkdownPayload.model_validate(payload) - request = self.prepare_request( - "POST", - "/markdown", - json_body=validated_payload.model_dump( - mode="json", by_alias=True, exclude_none=True, exclude_unset=True - ), + return await self._post_file_operation( + endpoint="/markdown", + payload=payload, + payload_model=ConvertToMarkdownPayload, extra_query=extra_query, extra_headers=extra_headers, extra_body=extra_body, timeout=timeout, ) - raw_payload = await self._send_request(request) - return ConvertToMarkdownResponse.model_validate(raw_payload) async def ocr_pdf( self, diff --git a/tests/live/test_live_convert_to_markdown.py b/tests/live/test_live_convert_to_markdown.py index f86215af..e3390a67 100644 --- a/tests/live/test_live_convert_to_markdown.py +++ b/tests/live/test_live_convert_to_markdown.py @@ -3,7 +3,7 @@ import pytest from pdfrest import PdfRestApiError, PdfRestClient -from pdfrest.models import ConvertToMarkdownResponse +from pdfrest.models import PdfRestFileBasedResponse from ..resources import get_test_resource_path @@ -18,13 +18,10 @@ def test_live_convert_to_markdown_success( base_url=pdfrest_live_base_url, ) as client: uploaded = client.files.create_from_paths([resource])[0] - response = client.convert_to_markdown( - uploaded, - output_type="json", - ) + response = client.convert_to_markdown(uploaded) - assert isinstance(response, ConvertToMarkdownResponse) - assert response.markdown + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_files assert response.input_id == uploaded.id diff --git a/tests/test_convert_to_markdown.py b/tests/test_convert_to_markdown.py index 2ca6d219..22876eb8 100644 --- a/tests/test_convert_to_markdown.py +++ b/tests/test_convert_to_markdown.py @@ -7,12 +7,30 @@ from pydantic import ValidationError from pdfrest import AsyncPdfRestClient, PdfRestClient -from pdfrest.models import ConvertToMarkdownResponse, PdfRestFile, PdfRestFileID +from pdfrest.models import ( + PdfRestFile, + PdfRestFileBasedResponse, + PdfRestFileID, +) from pdfrest.models._internal import ConvertToMarkdownPayload from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, make_pdf_file +def _make_markdown_file(file_id: str, name: str = "markdown.md") -> PdfRestFile: + return PdfRestFile.model_validate( + { + "id": file_id, + "name": name, + "url": f"https://api.pdfrest.com/resource/{file_id}", + "type": "text/markdown", + "size": 64, + "modified": "2024-01-01T00:00:00Z", + "scheduledDeletionTimeUtc": None, + } + ) + + def test_convert_to_markdown_payload_rejects_non_pdf() -> None: file_id = str(PdfRestFileID.generate()) text_file = PdfRestFile.model_validate( @@ -48,20 +66,21 @@ def test_convert_to_markdown_payload_invalid_page_break_comments() -> None: ) -def test_convert_to_markdown_json_success(monkeypatch: pytest.MonkeyPatch) -> None: +def test_convert_to_markdown_success(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) payload_dump = ConvertToMarkdownPayload.model_validate( { "files": [input_file], "pages": ["1-3"], "output": "md", - "output_type": "json", + "output_type": "file", "page_break_comments": "on", } ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) - seen: dict[str, int] = {"post": 0} + seen: dict[str, int] = {"post": 0, "get": 0} def handler(request: httpx.Request) -> httpx.Response: if request.method == "POST" and request.url.path == "/markdown": @@ -72,10 +91,19 @@ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response( 200, json={ - "markdown": "# Title", - "inputId": str(input_file.id), + "inputId": [str(input_file.id)], + "outputId": [output_id], }, ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=_make_markdown_file(output_id).model_dump( + mode="json", by_alias=True + ), + ) msg = f"Unexpected request {request.method} {request.url}" raise AssertionError(msg) @@ -85,16 +113,13 @@ def handler(request: httpx.Request) -> httpx.Response: input_file, pages=["1-3"], output="md", - output_type="json", page_break_comments="on", ) - assert seen == {"post": 1} - assert isinstance(response, ConvertToMarkdownResponse) - assert response.markdown == "# Title" + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) assert response.input_id == input_file.id - assert response.output_id is None - assert response.output_url is None + assert len(response.output_files) == 1 def test_convert_to_markdown_request_customization( @@ -116,7 +141,7 @@ def handler(request: httpx.Request) -> httpx.Response: if request.method == "POST" and request.url.path == "/markdown": assert request.url.params["trace"] == "true" assert request.headers["X-Debug"] == "sync" - captured_timeout["value"] = request.extensions.get("timeout") + captured_timeout["post"] = request.extensions.get("timeout") payload = json.loads(request.content.decode("utf-8")) for key, value in payload_dump.items(): assert payload[key] == value @@ -124,11 +149,21 @@ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response( 200, json={ - "outputUrl": f"https://api.pdfrest.com/resource/{output_id}?format=file", - "outputId": output_id, - "inputId": str(input_file.id), + "inputId": [str(input_file.id)], + "outputId": [output_id], }, ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["get"] = request.extensions.get("timeout") + return httpx.Response( + 200, + json=_make_markdown_file(output_id, "debug.md").model_dump( + mode="json", by_alias=True + ), + ) msg = f"Unexpected request {request.method} {request.url}" raise AssertionError(msg) @@ -136,7 +171,6 @@ def handler(request: httpx.Request) -> httpx.Response: with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: response = client.convert_to_markdown( input_file, - output_type="file", extra_query={"trace": "true"}, extra_headers={"X-Debug": "sync"}, extra_body={"debug": True}, @@ -144,17 +178,24 @@ def handler(request: httpx.Request) -> httpx.Response: page_break_comments="off", ) - assert isinstance(response, ConvertToMarkdownResponse) - assert response.output_id == output_id - assert response.output_url - timeout_value = captured_timeout["value"] - assert timeout_value is not None - if isinstance(timeout_value, dict): + assert isinstance(response, PdfRestFileBasedResponse) + assert len(response.output_files) == 1 + post_timeout = captured_timeout["post"] + get_timeout = captured_timeout["get"] + assert post_timeout is not None + assert get_timeout is not None + if isinstance(post_timeout, dict): + assert all( + component == pytest.approx(0.4) for component in post_timeout.values() + ) + else: + assert post_timeout == pytest.approx(0.4) + if isinstance(get_timeout, dict): assert all( - component == pytest.approx(0.4) for component in timeout_value.values() + component == pytest.approx(0.4) for component in get_timeout.values() ) else: - assert timeout_value == pytest.approx(0.4) + assert get_timeout == pytest.approx(0.4) @pytest.mark.asyncio @@ -163,11 +204,12 @@ async def test_async_convert_to_markdown_success( ) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) payload_dump = ConvertToMarkdownPayload.model_validate( - {"files": [input_file], "output_type": "json", "page_break_comments": "off"} + {"files": [input_file], "output_type": "file", "page_break_comments": "off"} ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) - seen: dict[str, int] = {"post": 0} + seen: dict[str, int] = {"post": 0, "get": 0} def handler(request: httpx.Request) -> httpx.Response: if request.method == "POST" and request.url.path == "/markdown": @@ -178,20 +220,28 @@ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response( 200, json={ - "markdown": "Async md", - "inputId": str(input_file.id), + "inputId": [str(input_file.id)], + "outputId": [output_id], }, ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=_make_markdown_file(output_id, "async.md").model_dump( + mode="json", by_alias=True + ), + ) msg = f"Unexpected request {request.method} {request.url}" raise AssertionError(msg) transport = httpx.MockTransport(handler) async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: response = await client.convert_to_markdown( - input_file, output_type="json", page_break_comments="off" + input_file, page_break_comments="off" ) - assert seen == {"post": 1} - assert isinstance(response, ConvertToMarkdownResponse) - assert response.markdown == "Async md" - assert response.input_id == input_file.id + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert len(response.output_files) == 1 From d87563750f986abbacd34039c0f65a22093d3b34 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 7 Jan 2026 09:43:36 -0600 Subject: [PATCH 37/61] Add missing async live tests Assisted-by: Codex --- tests/live/test_live_convert_to_markdown.py | 39 ++++++++++++++- tests/live/test_live_convert_to_pdfx.py | 45 ++++++++++++++++- tests/live/test_live_convert_to_word.py | 44 +++++++++++++++- tests/live/test_live_extract_images.py | 39 ++++++++++++++- .../test_live_extract_pdf_text_to_file.py | 46 ++++++++++++++++- tests/live/test_live_flatten_pdf_forms.py | 41 ++++++++++++++- tests/live/test_live_graphic_conversions.py | 43 +++++++++++++++- tests/live/test_live_linearize_pdf.py | 41 ++++++++++++++- tests/live/test_live_ocr_pdf.py | 39 ++++++++++++++- tests/live/test_live_pdf_redactions.py | 50 ++++++++++++++++++- tests/live/test_live_summarize_pdf_text.py | 42 +++++++++++++++- tests/live/test_live_translate_pdf_text.py | 44 +++++++++++++++- 12 files changed, 501 insertions(+), 12 deletions(-) diff --git a/tests/live/test_live_convert_to_markdown.py b/tests/live/test_live_convert_to_markdown.py index e3390a67..23aa0e1c 100644 --- a/tests/live/test_live_convert_to_markdown.py +++ b/tests/live/test_live_convert_to_markdown.py @@ -2,7 +2,7 @@ import pytest -from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient from pdfrest.models import PdfRestFileBasedResponse from ..resources import get_test_resource_path @@ -25,6 +25,25 @@ def test_live_convert_to_markdown_success( assert response.input_id == uploaded.id +@pytest.mark.asyncio +async def test_live_async_convert_to_markdown_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = (await client.files.create_from_paths([resource]))[0] + response = await client.convert_to_markdown(uploaded, output="async-md") + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_files + assert response.output_file.name.startswith("async-md") + assert response.input_id == uploaded.id + + def test_live_convert_to_markdown_invalid_pages( pdfrest_api_key: str, pdfrest_live_base_url: str, @@ -40,3 +59,21 @@ def test_live_convert_to_markdown_invalid_pages( uploaded, extra_body={"pages": "last-1"}, ) + + +@pytest.mark.asyncio +async def test_live_async_convert_to_markdown_invalid_pages( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = (await client.files.create_from_paths([resource]))[0] + with pytest.raises(PdfRestApiError): + await client.convert_to_markdown( + uploaded, + extra_body={"pages": "last-1"}, + ) diff --git a/tests/live/test_live_convert_to_pdfx.py b/tests/live/test_live_convert_to_pdfx.py index a08088b0..c010798d 100644 --- a/tests/live/test_live_convert_to_pdfx.py +++ b/tests/live/test_live_convert_to_pdfx.py @@ -4,7 +4,7 @@ import pytest -from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient from pdfrest.models import PdfRestFile from pdfrest.types import PdfXType @@ -50,6 +50,31 @@ def test_live_convert_to_pdfx_success( assert output_file.name.startswith("pdfx-live") +@pytest.mark.asyncio +@pytest.mark.parametrize("output_type", PDFX_TYPES, ids=list(PDFX_TYPES)) +async def test_live_async_convert_to_pdfx_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_pdfx: PdfRestFile, + output_type: PdfXType, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.convert_to_pdfx( + uploaded_pdf_for_pdfx, + output_type=output_type, + output="async-pdfx", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async-pdfx") + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_pdfx.id) + + @pytest.mark.parametrize( "invalid_output_type", [ @@ -76,3 +101,21 @@ def test_live_convert_to_pdfx_invalid_output_type( output_type="PDF/X-1a", extra_body={"output_type": invalid_output_type}, ) + + +@pytest.mark.asyncio +async def test_live_async_convert_to_pdfx_invalid_output_type( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_pdfx: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError): + await client.convert_to_pdfx( + uploaded_pdf_for_pdfx, + output_type="PDF/X-1a", + extra_body={"output_type": "PDF/X-0"}, + ) diff --git a/tests/live/test_live_convert_to_word.py b/tests/live/test_live_convert_to_word.py index c3c5822e..1318ffa6 100644 --- a/tests/live/test_live_convert_to_word.py +++ b/tests/live/test_live_convert_to_word.py @@ -2,7 +2,7 @@ import pytest -from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient from pdfrest.models import PdfRestFile from ..resources import get_test_resource_path @@ -57,6 +57,31 @@ def test_live_convert_to_word_success( assert output_file.name.endswith(".docx") +@pytest.mark.asyncio +async def test_live_async_convert_to_word_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_word: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.convert_to_word( + uploaded_pdf_for_word, + output="async-word", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async-word") + assert ( + output_file.type + == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + ) + assert str(response.input_id) == str(uploaded_pdf_for_word.id) + + def test_live_convert_to_word_invalid_file_id( pdfrest_api_key: str, pdfrest_live_base_url: str, @@ -73,3 +98,20 @@ def test_live_convert_to_word_invalid_file_id( uploaded_pdf_for_word, extra_body={"id": "00000000-0000-0000-0000-000000000000"}, ) + + +@pytest.mark.asyncio +async def test_live_async_convert_to_word_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_word: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError): + await client.convert_to_word( + uploaded_pdf_for_word, + extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, + ) diff --git a/tests/live/test_live_extract_images.py b/tests/live/test_live_extract_images.py index faaff70d..bb096ff0 100644 --- a/tests/live/test_live_extract_images.py +++ b/tests/live/test_live_extract_images.py @@ -2,7 +2,7 @@ import pytest -from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient from pdfrest.models import PdfRestFileBasedResponse from ..resources import get_test_resource_path @@ -25,6 +25,25 @@ def test_live_extract_images_success( assert response.input_id == uploaded.id +@pytest.mark.asyncio +async def test_live_async_extract_images_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("duckhat.pdf") + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = (await client.files.create_from_paths([resource]))[0] + response = await client.extract_images(uploaded, output="async-images") + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_files + assert response.output_file.name.startswith("async-images") + assert response.input_id == uploaded.id + + def test_live_extract_images_invalid_pages( pdfrest_api_key: str, pdfrest_live_base_url: str, @@ -40,3 +59,21 @@ def test_live_extract_images_invalid_pages( uploaded, extra_body={"pages": "last-1"}, ) + + +@pytest.mark.asyncio +async def test_live_async_extract_images_invalid_pages( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("duckhat.pdf") + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = (await client.files.create_from_paths([resource]))[0] + with pytest.raises(PdfRestApiError): + await client.extract_images( + uploaded, + extra_body={"pages": "last-1"}, + ) diff --git a/tests/live/test_live_extract_pdf_text_to_file.py b/tests/live/test_live_extract_pdf_text_to_file.py index a96167f4..3c79238d 100644 --- a/tests/live/test_live_extract_pdf_text_to_file.py +++ b/tests/live/test_live_extract_pdf_text_to_file.py @@ -2,7 +2,7 @@ import pytest -from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient from pdfrest.models import PdfRestFileBasedResponse from ..resources import get_test_resource_path @@ -31,6 +31,32 @@ def test_live_extract_pdf_text_to_file_success( assert response.input_id == uploaded.id +@pytest.mark.asyncio +async def test_live_async_extract_pdf_text_to_file_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = (await client.files.create_from_paths([resource]))[0] + response = await client.extract_pdf_text_to_file( + uploaded, + full_text="document", + preserve_line_breaks="on", + word_style="off", + word_coordinates="off", + output="async-text", + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_files + assert response.output_file.name.startswith("async-text") + assert response.input_id == uploaded.id + + def test_live_extract_pdf_text_to_file_invalid_pages( pdfrest_api_key: str, pdfrest_live_base_url: str, @@ -46,3 +72,21 @@ def test_live_extract_pdf_text_to_file_invalid_pages( uploaded, extra_body={"pages": "last-1"}, ) + + +@pytest.mark.asyncio +async def test_live_async_extract_pdf_text_to_file_invalid_pages( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = (await client.files.create_from_paths([resource]))[0] + with pytest.raises(PdfRestApiError): + await client.extract_pdf_text_to_file( + uploaded, + extra_body={"pages": "last-1"}, + ) diff --git a/tests/live/test_live_flatten_pdf_forms.py b/tests/live/test_live_flatten_pdf_forms.py index c6ad7fdb..eeed4c16 100644 --- a/tests/live/test_live_flatten_pdf_forms.py +++ b/tests/live/test_live_flatten_pdf_forms.py @@ -2,7 +2,7 @@ import pytest -from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient from pdfrest.models import PdfRestFile from ..resources import get_test_resource_path @@ -54,6 +54,28 @@ def test_live_flatten_pdf_forms( assert output_file.name.endswith(".pdf") +@pytest.mark.asyncio +async def test_live_async_flatten_pdf_forms_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_with_forms: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.flatten_pdf_forms( + uploaded_pdf_with_forms, + output="async-flattened", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async-flattened") + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_with_forms.id) + + def test_live_flatten_pdf_forms_invalid_file_id( pdfrest_api_key: str, pdfrest_live_base_url: str, @@ -70,3 +92,20 @@ def test_live_flatten_pdf_forms_invalid_file_id( uploaded_pdf_with_forms, extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, ) + + +@pytest.mark.asyncio +async def test_live_async_flatten_pdf_forms_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_with_forms: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError): + await client.flatten_pdf_forms( + uploaded_pdf_with_forms, + extra_body={"id": "00000000-0000-0000-0000-000000000000"}, + ) diff --git a/tests/live/test_live_graphic_conversions.py b/tests/live/test_live_graphic_conversions.py index 2b68edb3..1f072946 100644 --- a/tests/live/test_live_graphic_conversions.py +++ b/tests/live/test_live_graphic_conversions.py @@ -5,7 +5,7 @@ import pytest -from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient from pdfrest.models import PdfRestFile from pdfrest.models._internal import ( BasePdfRestGraphicPayload, @@ -121,6 +121,28 @@ def uploaded_20_page_pdf( return client.files.create_from_paths([resource])[0] +@pytest.mark.asyncio +async def test_live_async_convert_to_png_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = (await client.files.create_from_paths([resource]))[0] + response = await client.convert_to_png( + uploaded, + output_prefix="async-png", + resolution=150, + ) + + assert response.output_files + assert all(file_info.type == "image/png" for file_info in response.output_files) + assert str(response.input_id) == str(uploaded.id) + + @pytest.mark.parametrize( ("_endpoint_label", "spec", "color_model"), _valid_color_cases(), @@ -269,6 +291,25 @@ def test_live_graphic_invalid_smoothing( ) +@pytest.mark.asyncio +async def test_live_async_graphic_invalid_smoothing( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = (await client.files.create_from_paths([resource]))[0] + with pytest.raises(PdfRestApiError): + await client.convert_to_png( + uploaded, + smoothing="none", + extra_body={"smoothing": "super-smooth"}, + ) + + @pytest.mark.parametrize( ("page_range", "expect_success"), [ diff --git a/tests/live/test_live_linearize_pdf.py b/tests/live/test_live_linearize_pdf.py index 59612691..09a671b4 100644 --- a/tests/live/test_live_linearize_pdf.py +++ b/tests/live/test_live_linearize_pdf.py @@ -2,7 +2,7 @@ import pytest -from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient from pdfrest.models import PdfRestFile from ..resources import get_test_resource_path @@ -54,6 +54,28 @@ def test_live_linearize_pdf( assert output_file.name.endswith(".pdf") +@pytest.mark.asyncio +async def test_live_async_linearize_pdf( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_linearize: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.linearize_pdf( + uploaded_pdf_for_linearize, + output="async-linearized", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async-linearized") + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_linearize.id) + + def test_live_linearize_pdf_invalid_file_id( pdfrest_api_key: str, pdfrest_live_base_url: str, @@ -70,3 +92,20 @@ def test_live_linearize_pdf_invalid_file_id( uploaded_pdf_for_linearize, extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, ) + + +@pytest.mark.asyncio +async def test_live_async_linearize_pdf_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_linearize: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError): + await client.linearize_pdf( + uploaded_pdf_for_linearize, + extra_body={"id": "00000000-0000-0000-0000-000000000000"}, + ) diff --git a/tests/live/test_live_ocr_pdf.py b/tests/live/test_live_ocr_pdf.py index 065a7022..96790f26 100644 --- a/tests/live/test_live_ocr_pdf.py +++ b/tests/live/test_live_ocr_pdf.py @@ -2,7 +2,7 @@ import pytest -from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient from pdfrest.models import PdfRestFileBasedResponse from ..resources import get_test_resource_path @@ -26,6 +26,25 @@ def test_live_ocr_pdf_success( assert response.input_id == uploaded.id +@pytest.mark.asyncio +async def test_live_async_ocr_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = (await client.files.create_from_paths([resource]))[0] + response = await client.ocr_pdf(uploaded, output="async-ocr") + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_files + assert response.output_file.name.startswith("async-ocr") + assert response.input_id == uploaded.id + + def test_live_ocr_pdf_invalid_pages( pdfrest_api_key: str, pdfrest_live_base_url: str, @@ -41,3 +60,21 @@ def test_live_ocr_pdf_invalid_pages( uploaded, extra_body={"pages": "last-1"}, ) + + +@pytest.mark.asyncio +async def test_live_async_ocr_pdf_invalid_pages( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = (await client.files.create_from_paths([resource]))[0] + with pytest.raises(PdfRestApiError): + await client.ocr_pdf( + uploaded, + extra_body={"pages": "last-1"}, + ) diff --git a/tests/live/test_live_pdf_redactions.py b/tests/live/test_live_pdf_redactions.py index 796785a1..8e425daa 100644 --- a/tests/live/test_live_pdf_redactions.py +++ b/tests/live/test_live_pdf_redactions.py @@ -4,7 +4,7 @@ import pytest -from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient from pdfrest.models import PdfRestFile from pdfrest.types import PdfRedactionInstruction, PdfRedactionPreset @@ -135,6 +135,36 @@ def test_live_redaction_preview_and_apply_multiple( assert final_file.type == "application/pdf" +@pytest.mark.asyncio +async def test_live_async_redaction_preview_and_apply( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_redaction: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + preview = await client.preview_redactions( + uploaded_pdf_for_redaction, + redactions=[{"type": "literal", "value": "quick brown fox"}], + output="async-redaction-preview", + ) + + preview_file = preview.output_files[0] + applied = await client.apply_redactions( + preview_file, + output="async-redaction-final", + ) + + assert preview.output_files + assert preview_file.name.endswith("async-redaction-preview.pdf") + assert applied.output_files + final_file = applied.output_files[0] + assert final_file.name.endswith("async-redaction-final.pdf") + assert final_file.type == "application/pdf" + + @pytest.mark.parametrize( "extra_body", [ @@ -167,3 +197,21 @@ def test_live_redactions_invalid_payloads( preview_file = preview.output_files[0] with pytest.raises(PdfRestApiError): client.apply_redactions(preview_file, extra_body=extra_body) + + +@pytest.mark.asyncio +async def test_live_async_redactions_invalid_payloads( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_redaction: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError): + await client.preview_redactions( + uploaded_pdf_for_redaction, + redactions=[{"type": "literal", "value": "placeholder"}], + extra_body={"rgb_color": "-1,-1,-1"}, + ) diff --git a/tests/live/test_live_summarize_pdf_text.py b/tests/live/test_live_summarize_pdf_text.py index 27920fd8..69f6252f 100644 --- a/tests/live/test_live_summarize_pdf_text.py +++ b/tests/live/test_live_summarize_pdf_text.py @@ -2,7 +2,7 @@ import pytest -from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient from pdfrest.models import PdfRestFileBasedResponse, SummarizePdfTextResponse from ..resources import get_test_resource_path @@ -51,6 +51,28 @@ def test_live_summarize_pdf_text_to_file_success( assert response.input_id == uploaded.id +@pytest.mark.asyncio +async def test_live_async_summarize_pdf_text_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = (await client.files.create_from_paths([resource]))[0] + response = await client.summarize_pdf_text( + uploaded, + target_word_count=30, + summary_format="overview", + ) + + assert isinstance(response, SummarizePdfTextResponse) + assert response.summary + assert response.input_id == uploaded.id + + def test_live_summarize_pdf_text_invalid_format( pdfrest_api_key: str, pdfrest_live_base_url: str, @@ -66,3 +88,21 @@ def test_live_summarize_pdf_text_invalid_format( uploaded, extra_body={"summary_format": "invalid-style"}, ) + + +@pytest.mark.asyncio +async def test_live_async_summarize_pdf_text_invalid_format( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = (await client.files.create_from_paths([resource]))[0] + with pytest.raises(PdfRestApiError, match="error"): + await client.summarize_pdf_text( + uploaded, + extra_body={"summary_format": "invalid-style"}, + ) diff --git a/tests/live/test_live_translate_pdf_text.py b/tests/live/test_live_translate_pdf_text.py index 8c82039d..1211b814 100644 --- a/tests/live/test_live_translate_pdf_text.py +++ b/tests/live/test_live_translate_pdf_text.py @@ -2,7 +2,7 @@ import pytest -from pdfrest import PdfRestApiError, PdfRestClient +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient from pdfrest.models import ( TranslatePdfTextFileResponse, TranslatePdfTextResponse, @@ -34,6 +34,29 @@ def test_live_translate_pdf_text_success( assert response.input_id == uploaded.id +@pytest.mark.asyncio +async def test_live_async_translate_pdf_text_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = (await client.files.create_from_paths([resource]))[0] + response = await client.translate_pdf_text( + uploaded, + output_language="es", + output_format="plaintext", + ) + + assert isinstance(response, TranslatePdfTextResponse) + assert response.translated_text + assert response.output_language == "es" + assert response.input_id == uploaded.id + + def test_live_translate_pdf_text_invalid_output_format( pdfrest_api_key: str, pdfrest_live_base_url: str, @@ -52,6 +75,25 @@ def test_live_translate_pdf_text_invalid_output_format( ) +@pytest.mark.asyncio +async def test_live_async_translate_pdf_text_invalid_output_format( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + resource = get_test_resource_path("report.pdf") + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + uploaded = (await client.files.create_from_paths([resource]))[0] + with pytest.raises(PdfRestApiError, match="error"): + await client.translate_pdf_text( + uploaded, + output_language="de", + extra_body={"output_format": "invalid-format"}, + ) + + def test_live_translate_pdf_text_file_success( pdfrest_api_key: str, pdfrest_live_base_url: str, From 57868d231f3ae45f786d9274da2cb1fa492d0e2f Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 7 Jan 2026 10:23:07 -0600 Subject: [PATCH 38/61] Add additional assertions to live tests for new tools - Now evaluates: - File names - MIME types - File sizes - Warnings in response - Input ID Assisted-by: Codex --- tests/live/test_live_convert_to_excel.py | 4 ++++ tests/live/test_live_convert_to_markdown.py | 11 +++++++++- tests/live/test_live_convert_to_powerpoint.py | 4 ++++ .../test_live_convert_xfa_to_acroforms.py | 4 ++++ tests/live/test_live_extract_images.py | 22 ++++++++++++++++--- .../test_live_extract_pdf_text_to_file.py | 11 +++++++++- tests/live/test_live_flatten_annotations.py | 4 ++++ .../live/test_live_flatten_transparencies.py | 4 ++++ tests/live/test_live_linearize_pdf.py | 4 ++++ tests/live/test_live_ocr_pdf.py | 9 +++++++- tests/live/test_live_rasterize_pdf.py | 4 ++++ tests/live/test_live_summarize_pdf_text.py | 6 ++++- tests/live/test_live_translate_pdf_text.py | 5 +++++ 13 files changed, 85 insertions(+), 7 deletions(-) diff --git a/tests/live/test_live_convert_to_excel.py b/tests/live/test_live_convert_to_excel.py index 26068b28..05c95699 100644 --- a/tests/live/test_live_convert_to_excel.py +++ b/tests/live/test_live_convert_to_excel.py @@ -50,6 +50,8 @@ def test_live_convert_to_excel_success( output_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) + assert output_file.size > 0 + assert response.warning is None assert str(response.input_id) == str(uploaded_pdf_for_excel.id) if output_name is not None: assert output_file.name.startswith(output_name) @@ -76,6 +78,8 @@ async def test_live_async_convert_to_excel_success( output_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) + assert output_file.size > 0 + assert response.warning is None assert str(response.input_id) == str(uploaded_pdf_for_excel.id) diff --git a/tests/live/test_live_convert_to_markdown.py b/tests/live/test_live_convert_to_markdown.py index 23aa0e1c..8219cc2f 100644 --- a/tests/live/test_live_convert_to_markdown.py +++ b/tests/live/test_live_convert_to_markdown.py @@ -22,6 +22,11 @@ def test_live_convert_to_markdown_success( assert isinstance(response, PdfRestFileBasedResponse) assert response.output_files + output_file = response.output_file + assert output_file.name.endswith(".md") + assert output_file.type == "text/markdown" + assert output_file.size > 0 + assert response.warning is None assert response.input_id == uploaded.id @@ -40,7 +45,11 @@ async def test_live_async_convert_to_markdown_success( assert isinstance(response, PdfRestFileBasedResponse) assert response.output_files - assert response.output_file.name.startswith("async-md") + output_file = response.output_file + assert output_file.name.startswith("async-md") + assert output_file.type == "text/markdown" + assert output_file.size > 0 + assert response.warning is None assert response.input_id == uploaded.id diff --git a/tests/live/test_live_convert_to_powerpoint.py b/tests/live/test_live_convert_to_powerpoint.py index f46da580..a7de4a00 100644 --- a/tests/live/test_live_convert_to_powerpoint.py +++ b/tests/live/test_live_convert_to_powerpoint.py @@ -50,6 +50,8 @@ def test_live_convert_to_powerpoint_success( output_file.type == "application/vnd.openxmlformats-officedocument.presentationml.presentation" ) + assert output_file.size > 0 + assert response.warning is None assert str(response.input_id) == str(uploaded_pdf_for_powerpoint.id) if output_name is not None: assert output_file.name.startswith(output_name) @@ -78,6 +80,8 @@ async def test_live_async_convert_to_powerpoint_success( output_file.type == "application/vnd.openxmlformats-officedocument.presentationml.presentation" ) + assert output_file.size > 0 + assert response.warning is None assert str(response.input_id) == str(uploaded_pdf_for_powerpoint.id) diff --git a/tests/live/test_live_convert_xfa_to_acroforms.py b/tests/live/test_live_convert_xfa_to_acroforms.py index 428fccb2..a3fe020a 100644 --- a/tests/live/test_live_convert_xfa_to_acroforms.py +++ b/tests/live/test_live_convert_xfa_to_acroforms.py @@ -47,6 +47,8 @@ def test_live_convert_xfa_to_acroforms_success( assert response.output_files output_file = response.output_file assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None assert str(response.input_id) == str(uploaded_pdf_for_acroforms.id) if output_name is not None: assert output_file.name.startswith(output_name) @@ -90,6 +92,8 @@ async def test_live_async_convert_xfa_to_acroforms_success( output_file = response.output_file assert output_file.name.startswith("async") assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None assert str(response.input_id) == str(uploaded_pdf_for_acroforms.id) diff --git a/tests/live/test_live_extract_images.py b/tests/live/test_live_extract_images.py index bb096ff0..211e8591 100644 --- a/tests/live/test_live_extract_images.py +++ b/tests/live/test_live_extract_images.py @@ -21,7 +21,15 @@ def test_live_extract_images_success( response = client.extract_images(uploaded) assert isinstance(response, PdfRestFileBasedResponse) - assert response.output_files + output_files = response.output_files + assert output_files + assert all(file.name for file in output_files) + assert all( + file.type and (file.type.startswith("image/") or file.type == "application/zip") + for file in output_files + ) + assert all(file.size > 0 for file in output_files) + assert response.warning is None assert response.input_id == uploaded.id @@ -39,8 +47,16 @@ async def test_live_async_extract_images_success( response = await client.extract_images(uploaded, output="async-images") assert isinstance(response, PdfRestFileBasedResponse) - assert response.output_files - assert response.output_file.name.startswith("async-images") + output_files = response.output_files + assert output_files + assert output_files[0].name.startswith("async-images") + assert all(file.name for file in output_files) + assert all( + file.type and (file.type.startswith("image/") or file.type == "application/zip") + for file in output_files + ) + assert all(file.size > 0 for file in output_files) + assert response.warning is None assert response.input_id == uploaded.id diff --git a/tests/live/test_live_extract_pdf_text_to_file.py b/tests/live/test_live_extract_pdf_text_to_file.py index 3c79238d..f9c10e74 100644 --- a/tests/live/test_live_extract_pdf_text_to_file.py +++ b/tests/live/test_live_extract_pdf_text_to_file.py @@ -28,6 +28,11 @@ def test_live_extract_pdf_text_to_file_success( assert isinstance(response, PdfRestFileBasedResponse) assert response.output_files + output_file = response.output_file + assert output_file.name.endswith(".txt") + assert output_file.type == "text/plain" + assert output_file.size > 0 + assert response.warning is None assert response.input_id == uploaded.id @@ -53,7 +58,11 @@ async def test_live_async_extract_pdf_text_to_file_success( assert isinstance(response, PdfRestFileBasedResponse) assert response.output_files - assert response.output_file.name.startswith("async-text") + output_file = response.output_file + assert output_file.name.startswith("async-text") + assert output_file.type == "text/plain" + assert output_file.size > 0 + assert response.warning is None assert response.input_id == uploaded.id diff --git a/tests/live/test_live_flatten_annotations.py b/tests/live/test_live_flatten_annotations.py index 9a669fe2..54f4f021 100644 --- a/tests/live/test_live_flatten_annotations.py +++ b/tests/live/test_live_flatten_annotations.py @@ -47,6 +47,8 @@ def test_live_flatten_annotations_success( assert response.output_files output_file = response.output_file assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None assert str(response.input_id) == str(uploaded_pdf_for_annotations.id) if output_name is not None: assert output_file.name.startswith(output_name) @@ -72,6 +74,8 @@ async def test_live_async_flatten_annotations_success( output_file = response.output_file assert output_file.name.startswith("async") assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None assert str(response.input_id) == str(uploaded_pdf_for_annotations.id) diff --git a/tests/live/test_live_flatten_transparencies.py b/tests/live/test_live_flatten_transparencies.py index f7a8bb49..c8a98e93 100644 --- a/tests/live/test_live_flatten_transparencies.py +++ b/tests/live/test_live_flatten_transparencies.py @@ -50,6 +50,8 @@ def test_live_flatten_transparencies_success( assert response.output_files output_file = response.output_file assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None assert str(response.input_id) == str(uploaded_pdf_for_transparencies.id) if output_name is not None: assert output_file.name.startswith(output_name) @@ -75,6 +77,8 @@ async def test_live_async_flatten_transparencies_success( output_file = response.output_file assert output_file.name.startswith("async") assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None assert str(response.input_id) == str(uploaded_pdf_for_transparencies.id) diff --git a/tests/live/test_live_linearize_pdf.py b/tests/live/test_live_linearize_pdf.py index 09a671b4..f0dc7359 100644 --- a/tests/live/test_live_linearize_pdf.py +++ b/tests/live/test_live_linearize_pdf.py @@ -47,6 +47,8 @@ def test_live_linearize_pdf( assert response.output_files output_file = response.output_file assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None assert str(response.input_id) == str(uploaded_pdf_for_linearize.id) if output_name is not None: assert output_file.name.startswith(output_name) @@ -73,6 +75,8 @@ async def test_live_async_linearize_pdf( output_file = response.output_file assert output_file.name.startswith("async-linearized") assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None assert str(response.input_id) == str(uploaded_pdf_for_linearize.id) diff --git a/tests/live/test_live_ocr_pdf.py b/tests/live/test_live_ocr_pdf.py index 96790f26..816bf697 100644 --- a/tests/live/test_live_ocr_pdf.py +++ b/tests/live/test_live_ocr_pdf.py @@ -22,7 +22,11 @@ def test_live_ocr_pdf_success( assert isinstance(response, PdfRestFileBasedResponse) assert response.output_files - assert response.output_file.id + output_file = response.output_file + assert output_file.name.endswith(".pdf") + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None assert response.input_id == uploaded.id @@ -42,6 +46,9 @@ async def test_live_async_ocr_pdf_success( assert isinstance(response, PdfRestFileBasedResponse) assert response.output_files assert response.output_file.name.startswith("async-ocr") + assert response.output_file.type == "application/pdf" + assert response.output_file.size > 0 + assert response.warning is None assert response.input_id == uploaded.id diff --git a/tests/live/test_live_rasterize_pdf.py b/tests/live/test_live_rasterize_pdf.py index 70f41c20..45e9402f 100644 --- a/tests/live/test_live_rasterize_pdf.py +++ b/tests/live/test_live_rasterize_pdf.py @@ -47,6 +47,8 @@ def test_live_rasterize_pdf_success( assert response.output_files output_file = response.output_file assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None assert str(response.input_id) == str(uploaded_pdf_for_rasterize.id) if output_name is not None: assert output_file.name.startswith(output_name) @@ -72,6 +74,8 @@ async def test_live_async_rasterize_pdf_success( output_file = response.output_file assert output_file.name.startswith("async") assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None assert str(response.input_id) == str(uploaded_pdf_for_rasterize.id) diff --git a/tests/live/test_live_summarize_pdf_text.py b/tests/live/test_live_summarize_pdf_text.py index 69f6252f..e846da8c 100644 --- a/tests/live/test_live_summarize_pdf_text.py +++ b/tests/live/test_live_summarize_pdf_text.py @@ -47,7 +47,11 @@ def test_live_summarize_pdf_text_to_file_success( assert isinstance(response, PdfRestFileBasedResponse) assert response.output_files - assert response.output_file.id + output_file = response.output_file + assert output_file.name.endswith(".md") + assert output_file.type == "text/markdown" + assert output_file.size > 0 + assert response.warning is None assert response.input_id == uploaded.id diff --git a/tests/live/test_live_translate_pdf_text.py b/tests/live/test_live_translate_pdf_text.py index 1211b814..eea4e7b8 100644 --- a/tests/live/test_live_translate_pdf_text.py +++ b/tests/live/test_live_translate_pdf_text.py @@ -112,6 +112,11 @@ def test_live_translate_pdf_text_file_success( assert isinstance(response, TranslatePdfTextFileResponse) assert response.output_files + output_file = response.output_file + assert output_file.name.endswith(".txt") + assert output_file.type == "text/plain" + assert output_file.size > 0 + assert response.warning is None assert response.output_language == "fr" assert response.source_languages assert response.input_id == uploaded.id From 73d94afc2c206cef0ca419244b62fabf6d46b61d Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 7 Jan 2026 12:05:04 -0600 Subject: [PATCH 39/61] Add match= expressions to live tests Assisted-by: Codex --- tests/live/test_live_compress_pdf.py | 4 ++-- tests/live/test_live_convert_to_excel.py | 4 ++-- tests/live/test_live_convert_to_markdown.py | 4 ++-- tests/live/test_live_convert_to_pdfx.py | 4 ++-- tests/live/test_live_convert_to_powerpoint.py | 4 ++-- tests/live/test_live_convert_to_word.py | 4 ++-- tests/live/test_live_convert_xfa_to_acroforms.py | 4 ++-- tests/live/test_live_delete.py | 4 ++-- tests/live/test_live_extract_images.py | 4 ++-- tests/live/test_live_extract_pdf_text_to_file.py | 4 ++-- tests/live/test_live_flatten_annotations.py | 4 ++-- tests/live/test_live_flatten_pdf_forms.py | 4 ++-- tests/live/test_live_flatten_transparencies.py | 4 ++-- tests/live/test_live_graphic_conversions.py | 12 ++++++------ tests/live/test_live_linearize_pdf.py | 4 ++-- tests/live/test_live_ocr_pdf.py | 4 ++-- tests/live/test_live_pdf_info.py | 2 +- tests/live/test_live_pdf_redactions.py | 6 +++--- tests/live/test_live_pdf_split_merge.py | 8 ++++---- tests/live/test_live_rasterize_pdf.py | 4 ++-- tests/live/test_live_summarize_pdf_text.py | 4 ++-- tests/live/test_live_translate_pdf_text.py | 4 ++-- 22 files changed, 50 insertions(+), 50 deletions(-) diff --git a/tests/live/test_live_compress_pdf.py b/tests/live/test_live_compress_pdf.py index 6ee8b365..0b3cdf66 100644 --- a/tests/live/test_live_compress_pdf.py +++ b/tests/live/test_live_compress_pdf.py @@ -158,7 +158,7 @@ def test_live_compress_pdf_invalid_level( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)compression"), ): client.compress_pdf( uploaded_pdf_for_compression, @@ -177,7 +177,7 @@ async def test_live_async_compress_pdf_invalid_level( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)compression"): await client.compress_pdf( uploaded_pdf_for_compression, compression_level="low", diff --git a/tests/live/test_live_convert_to_excel.py b/tests/live/test_live_convert_to_excel.py index 05c95699..f592aa40 100644 --- a/tests/live/test_live_convert_to_excel.py +++ b/tests/live/test_live_convert_to_excel.py @@ -93,7 +93,7 @@ def test_live_convert_to_excel_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"), ): client.convert_to_excel( uploaded_pdf_for_excel, @@ -111,7 +111,7 @@ async def test_live_async_convert_to_excel_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"): await client.convert_to_excel( uploaded_pdf_for_excel, extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, diff --git a/tests/live/test_live_convert_to_markdown.py b/tests/live/test_live_convert_to_markdown.py index 8219cc2f..760e1798 100644 --- a/tests/live/test_live_convert_to_markdown.py +++ b/tests/live/test_live_convert_to_markdown.py @@ -63,7 +63,7 @@ def test_live_convert_to_markdown_invalid_pages( base_url=pdfrest_live_base_url, ) as client: uploaded = client.files.create_from_paths([resource])[0] - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)page"): client.convert_to_markdown( uploaded, extra_body={"pages": "last-1"}, @@ -81,7 +81,7 @@ async def test_live_async_convert_to_markdown_invalid_pages( base_url=pdfrest_live_base_url, ) as client: uploaded = (await client.files.create_from_paths([resource]))[0] - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)page"): await client.convert_to_markdown( uploaded, extra_body={"pages": "last-1"}, diff --git a/tests/live/test_live_convert_to_pdfx.py b/tests/live/test_live_convert_to_pdfx.py index c010798d..df0e6695 100644 --- a/tests/live/test_live_convert_to_pdfx.py +++ b/tests/live/test_live_convert_to_pdfx.py @@ -94,7 +94,7 @@ def test_live_convert_to_pdfx_invalid_output_type( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)pdf.?x"), ): client.convert_to_pdfx( uploaded_pdf_for_pdfx, @@ -113,7 +113,7 @@ async def test_live_async_convert_to_pdfx_invalid_output_type( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)pdf.?x"): await client.convert_to_pdfx( uploaded_pdf_for_pdfx, output_type="PDF/X-1a", diff --git a/tests/live/test_live_convert_to_powerpoint.py b/tests/live/test_live_convert_to_powerpoint.py index a7de4a00..8a1209a2 100644 --- a/tests/live/test_live_convert_to_powerpoint.py +++ b/tests/live/test_live_convert_to_powerpoint.py @@ -95,7 +95,7 @@ def test_live_convert_to_powerpoint_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"), ): client.convert_to_powerpoint( uploaded_pdf_for_powerpoint, @@ -113,7 +113,7 @@ async def test_live_async_convert_to_powerpoint_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"): await client.convert_to_powerpoint( uploaded_pdf_for_powerpoint, extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, diff --git a/tests/live/test_live_convert_to_word.py b/tests/live/test_live_convert_to_word.py index 1318ffa6..3ec6a334 100644 --- a/tests/live/test_live_convert_to_word.py +++ b/tests/live/test_live_convert_to_word.py @@ -92,7 +92,7 @@ def test_live_convert_to_word_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"), ): client.convert_to_word( uploaded_pdf_for_word, @@ -110,7 +110,7 @@ async def test_live_async_convert_to_word_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"): await client.convert_to_word( uploaded_pdf_for_word, extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, diff --git a/tests/live/test_live_convert_xfa_to_acroforms.py b/tests/live/test_live_convert_xfa_to_acroforms.py index a3fe020a..8e882a3b 100644 --- a/tests/live/test_live_convert_xfa_to_acroforms.py +++ b/tests/live/test_live_convert_xfa_to_acroforms.py @@ -66,7 +66,7 @@ def test_live_convert_xfa_to_acroforms_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"), ): client.convert_xfa_to_acroforms( uploaded_pdf_for_acroforms, @@ -107,7 +107,7 @@ async def test_live_async_convert_xfa_to_acroforms_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"): await client.convert_xfa_to_acroforms( uploaded_pdf_for_acroforms, extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, diff --git a/tests/live/test_live_delete.py b/tests/live/test_live_delete.py index 75727fef..52bdf6fd 100644 --- a/tests/live/test_live_delete.py +++ b/tests/live/test_live_delete.py @@ -57,7 +57,7 @@ def test_live_delete_files_invalid_id( base_url=pdfrest_live_base_url, ) as client: uploaded = client.files.create_from_paths([resource])[0] - with pytest.raises(ValidationError): + with pytest.raises(ValidationError, match=r"(?i)ids?"): client.files.delete(uploaded, extra_body={"ids": token_urlsafe(16)}) @@ -72,7 +72,7 @@ async def test_live_async_delete_files_invalid_id( base_url=pdfrest_live_base_url, ) as client: uploaded = (await client.files.create_from_paths([resource]))[0] - with pytest.raises(ValidationError): + with pytest.raises(ValidationError, match=r"(?i)ids?"): await client.files.delete(uploaded, extra_body={"ids": token_urlsafe(16)}) diff --git a/tests/live/test_live_extract_images.py b/tests/live/test_live_extract_images.py index 211e8591..3410622a 100644 --- a/tests/live/test_live_extract_images.py +++ b/tests/live/test_live_extract_images.py @@ -70,7 +70,7 @@ def test_live_extract_images_invalid_pages( base_url=pdfrest_live_base_url, ) as client: uploaded = client.files.create_from_paths([resource])[0] - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)page"): client.extract_images( uploaded, extra_body={"pages": "last-1"}, @@ -88,7 +88,7 @@ async def test_live_async_extract_images_invalid_pages( base_url=pdfrest_live_base_url, ) as client: uploaded = (await client.files.create_from_paths([resource]))[0] - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)page"): await client.extract_images( uploaded, extra_body={"pages": "last-1"}, diff --git a/tests/live/test_live_extract_pdf_text_to_file.py b/tests/live/test_live_extract_pdf_text_to_file.py index f9c10e74..75784540 100644 --- a/tests/live/test_live_extract_pdf_text_to_file.py +++ b/tests/live/test_live_extract_pdf_text_to_file.py @@ -76,7 +76,7 @@ def test_live_extract_pdf_text_to_file_invalid_pages( base_url=pdfrest_live_base_url, ) as client: uploaded = client.files.create_from_paths([resource])[0] - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)page"): client.extract_pdf_text_to_file( uploaded, extra_body={"pages": "last-1"}, @@ -94,7 +94,7 @@ async def test_live_async_extract_pdf_text_to_file_invalid_pages( base_url=pdfrest_live_base_url, ) as client: uploaded = (await client.files.create_from_paths([resource]))[0] - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)page"): await client.extract_pdf_text_to_file( uploaded, extra_body={"pages": "last-1"}, diff --git a/tests/live/test_live_flatten_annotations.py b/tests/live/test_live_flatten_annotations.py index 54f4f021..b97b08b0 100644 --- a/tests/live/test_live_flatten_annotations.py +++ b/tests/live/test_live_flatten_annotations.py @@ -89,7 +89,7 @@ def test_live_flatten_annotations_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"), ): client.flatten_annotations( uploaded_pdf_for_annotations, @@ -107,7 +107,7 @@ async def test_live_async_flatten_annotations_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"): await client.flatten_annotations( uploaded_pdf_for_annotations, extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, diff --git a/tests/live/test_live_flatten_pdf_forms.py b/tests/live/test_live_flatten_pdf_forms.py index eeed4c16..5bff7304 100644 --- a/tests/live/test_live_flatten_pdf_forms.py +++ b/tests/live/test_live_flatten_pdf_forms.py @@ -86,7 +86,7 @@ def test_live_flatten_pdf_forms_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"), ): client.flatten_pdf_forms( uploaded_pdf_with_forms, @@ -104,7 +104,7 @@ async def test_live_async_flatten_pdf_forms_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"): await client.flatten_pdf_forms( uploaded_pdf_with_forms, extra_body={"id": "00000000-0000-0000-0000-000000000000"}, diff --git a/tests/live/test_live_flatten_transparencies.py b/tests/live/test_live_flatten_transparencies.py index c8a98e93..7da1eb40 100644 --- a/tests/live/test_live_flatten_transparencies.py +++ b/tests/live/test_live_flatten_transparencies.py @@ -92,7 +92,7 @@ def test_live_flatten_transparencies_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"), ): client.flatten_transparencies( uploaded_pdf_for_transparencies, @@ -110,7 +110,7 @@ async def test_live_async_flatten_transparencies_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"): await client.flatten_transparencies( uploaded_pdf_for_transparencies, extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, diff --git a/tests/live/test_live_graphic_conversions.py b/tests/live/test_live_graphic_conversions.py index 1f072946..3a09af9d 100644 --- a/tests/live/test_live_graphic_conversions.py +++ b/tests/live/test_live_graphic_conversions.py @@ -190,7 +190,7 @@ def test_live_graphic_invalid_color_model( uploaded = client.files.create_from_paths([resource])[0] client_method = getattr(client, spec.method_name) resolution = _resolution_bounds(payload_model)[0] - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)color"): client_method( uploaded, resolution=resolution, @@ -235,7 +235,7 @@ def test_live_graphic_resolution_bounds( if should_raise: call_kwargs["extra_body"] = {"resolution": base_resolution + offset} - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)resolution"): client_method(uploaded, **call_kwargs) else: response = client_method(uploaded, **call_kwargs) @@ -283,7 +283,7 @@ def test_live_graphic_invalid_smoothing( ) as client: uploaded = client.files.create_from_paths([resource])[0] client_method = getattr(client, spec.method_name) - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)smooth"): client_method( uploaded, smoothing="none", @@ -302,7 +302,7 @@ async def test_live_async_graphic_invalid_smoothing( base_url=pdfrest_live_base_url, ) as client: uploaded = (await client.files.create_from_paths([resource]))[0] - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)smooth"): await client.convert_to_png( uploaded, smoothing="none", @@ -357,7 +357,7 @@ def test_live_png_page_range_variants( ) assert str(response.input_id) == str(uploaded_20_page_pdf.id) else: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)page"): client.convert_to_png( uploaded_20_page_pdf, output_prefix=f"live-range-{case_id}", @@ -389,7 +389,7 @@ def test_live_png_page_range_invalid_overrides( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)page"), ): client.convert_to_png( uploaded_20_page_pdf, diff --git a/tests/live/test_live_linearize_pdf.py b/tests/live/test_live_linearize_pdf.py index f0dc7359..523ea0d5 100644 --- a/tests/live/test_live_linearize_pdf.py +++ b/tests/live/test_live_linearize_pdf.py @@ -90,7 +90,7 @@ def test_live_linearize_pdf_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"), ): client.linearize_pdf( uploaded_pdf_for_linearize, @@ -108,7 +108,7 @@ async def test_live_async_linearize_pdf_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"): await client.linearize_pdf( uploaded_pdf_for_linearize, extra_body={"id": "00000000-0000-0000-0000-000000000000"}, diff --git a/tests/live/test_live_ocr_pdf.py b/tests/live/test_live_ocr_pdf.py index 816bf697..5109bad5 100644 --- a/tests/live/test_live_ocr_pdf.py +++ b/tests/live/test_live_ocr_pdf.py @@ -62,7 +62,7 @@ def test_live_ocr_pdf_invalid_pages( base_url=pdfrest_live_base_url, ) as client: uploaded = client.files.create_from_paths([resource])[0] - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)page"): client.ocr_pdf( uploaded, extra_body={"pages": "last-1"}, @@ -80,7 +80,7 @@ async def test_live_async_ocr_pdf_invalid_pages( base_url=pdfrest_live_base_url, ) as client: uploaded = (await client.files.create_from_paths([resource]))[0] - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)page"): await client.ocr_pdf( uploaded, extra_body={"pages": "last-1"}, diff --git a/tests/live/test_live_pdf_info.py b/tests/live/test_live_pdf_info.py index 977fe87d..7ec91828 100644 --- a/tests/live/test_live_pdf_info.py +++ b/tests/live/test_live_pdf_info.py @@ -111,7 +111,7 @@ def test_live_pdf_info_invalid_query( PdfRestClient( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)quer"), ): client.query_pdf_info( uploaded_pdf, diff --git a/tests/live/test_live_pdf_redactions.py b/tests/live/test_live_pdf_redactions.py index 8e425daa..5473e428 100644 --- a/tests/live/test_live_pdf_redactions.py +++ b/tests/live/test_live_pdf_redactions.py @@ -183,7 +183,7 @@ def test_live_redactions_invalid_payloads( base_url=pdfrest_live_base_url, ) as client: if "redactions" in extra_body: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)redaction"): client.preview_redactions( uploaded_pdf_for_redaction, redactions=[{"type": "literal", "value": "placeholder"}], @@ -195,7 +195,7 @@ def test_live_redactions_invalid_payloads( redactions=[{"type": "literal", "value": "placeholder"}], ) preview_file = preview.output_files[0] - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)rgb"): client.apply_redactions(preview_file, extra_body=extra_body) @@ -209,7 +209,7 @@ async def test_live_async_redactions_invalid_payloads( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)rgb"): await client.preview_redactions( uploaded_pdf_for_redaction, redactions=[{"type": "literal", "value": "placeholder"}], diff --git a/tests/live/test_live_pdf_split_merge.py b/tests/live/test_live_pdf_split_merge.py index 979f7b1e..5a58912c 100644 --- a/tests/live/test_live_pdf_split_merge.py +++ b/tests/live/test_live_pdf_split_merge.py @@ -198,7 +198,7 @@ def test_live_split_pdf_invalid_pages( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)page"), ): client.split_pdf( split_source, @@ -270,7 +270,7 @@ def test_live_merge_pdfs_invalid_pages( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)page"), ): client.merge_pdfs( sources, @@ -373,7 +373,7 @@ def test_live_split_pdf_page_range_variants( output_pages = client.query_pdf_info(response.output_files[0]).page_count assert output_pages == len(expected_pages) else: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)page"): client.split_pdf( split_source, page_groups=[selection if not requires_override else "1"], @@ -446,7 +446,7 @@ def test_live_merge_pdf_page_range_variants( output_info = client.query_pdf_info(response.output_file) assert output_info.page_count == expected_total_pages else: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)page"): client.merge_pdfs( sources, output_prefix=f"live-merge-range-{case_id}", diff --git a/tests/live/test_live_rasterize_pdf.py b/tests/live/test_live_rasterize_pdf.py index 45e9402f..df7cb260 100644 --- a/tests/live/test_live_rasterize_pdf.py +++ b/tests/live/test_live_rasterize_pdf.py @@ -89,7 +89,7 @@ def test_live_rasterize_pdf_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError), + pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"), ): client.rasterize_pdf( uploaded_pdf_for_rasterize, @@ -107,7 +107,7 @@ async def test_live_async_rasterize_pdf_invalid_file_id( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - with pytest.raises(PdfRestApiError): + with pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"): await client.rasterize_pdf( uploaded_pdf_for_rasterize, extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, diff --git a/tests/live/test_live_summarize_pdf_text.py b/tests/live/test_live_summarize_pdf_text.py index e846da8c..be8fb802 100644 --- a/tests/live/test_live_summarize_pdf_text.py +++ b/tests/live/test_live_summarize_pdf_text.py @@ -87,7 +87,7 @@ def test_live_summarize_pdf_text_invalid_format( base_url=pdfrest_live_base_url, ) as client: uploaded = client.files.create_from_paths([resource])[0] - with pytest.raises(PdfRestApiError, match="error"): + with pytest.raises(PdfRestApiError, match=r"(?i)summary"): client.summarize_pdf_text( uploaded, extra_body={"summary_format": "invalid-style"}, @@ -105,7 +105,7 @@ async def test_live_async_summarize_pdf_text_invalid_format( base_url=pdfrest_live_base_url, ) as client: uploaded = (await client.files.create_from_paths([resource]))[0] - with pytest.raises(PdfRestApiError, match="error"): + with pytest.raises(PdfRestApiError, match=r"(?i)summary"): await client.summarize_pdf_text( uploaded, extra_body={"summary_format": "invalid-style"}, diff --git a/tests/live/test_live_translate_pdf_text.py b/tests/live/test_live_translate_pdf_text.py index eea4e7b8..8824ff0d 100644 --- a/tests/live/test_live_translate_pdf_text.py +++ b/tests/live/test_live_translate_pdf_text.py @@ -67,7 +67,7 @@ def test_live_translate_pdf_text_invalid_output_format( base_url=pdfrest_live_base_url, ) as client: uploaded = client.files.create_from_paths([resource])[0] - with pytest.raises(PdfRestApiError, match="error"): + with pytest.raises(PdfRestApiError, match=r"(?i)output\s*format"): client.translate_pdf_text( uploaded, output_language="es", @@ -86,7 +86,7 @@ async def test_live_async_translate_pdf_text_invalid_output_format( base_url=pdfrest_live_base_url, ) as client: uploaded = (await client.files.create_from_paths([resource]))[0] - with pytest.raises(PdfRestApiError, match="error"): + with pytest.raises(PdfRestApiError, match=r"(?i)output\s*format"): await client.translate_pdf_text( uploaded, output_language="de", From 34fc14e027626ea8445507016e6fef971fd1b780 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 7 Jan 2026 14:04:48 -0600 Subject: [PATCH 40/61] Fix Translate PDF test regex matches Assisted-by: Codex --- tests/live/test_live_translate_pdf_text.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/live/test_live_translate_pdf_text.py b/tests/live/test_live_translate_pdf_text.py index 8824ff0d..00701242 100644 --- a/tests/live/test_live_translate_pdf_text.py +++ b/tests/live/test_live_translate_pdf_text.py @@ -67,7 +67,10 @@ def test_live_translate_pdf_text_invalid_output_format( base_url=pdfrest_live_base_url, ) as client: uploaded = client.files.create_from_paths([resource])[0] - with pytest.raises(PdfRestApiError, match=r"(?i)output\s*format"): + with pytest.raises( + PdfRestApiError, + match=r"invalid-format is not a valid input for 'output_format'", + ): client.translate_pdf_text( uploaded, output_language="es", @@ -86,7 +89,10 @@ async def test_live_async_translate_pdf_text_invalid_output_format( base_url=pdfrest_live_base_url, ) as client: uploaded = (await client.files.create_from_paths([resource]))[0] - with pytest.raises(PdfRestApiError, match=r"(?i)output\s*format"): + with pytest.raises( + PdfRestApiError, + match=r"invalid-format is not a valid input for 'output_format'", + ): await client.translate_pdf_text( uploaded, output_language="de", From 906a905654baa2320729f482502ca5c6ec58dacd Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 7 Jan 2026 14:48:49 -0600 Subject: [PATCH 41/61] Fix expected file format from Extract Text Assisted-by: Codex --- tests/live/test_live_extract_pdf_text_to_file.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/live/test_live_extract_pdf_text_to_file.py b/tests/live/test_live_extract_pdf_text_to_file.py index 75784540..d6e58652 100644 --- a/tests/live/test_live_extract_pdf_text_to_file.py +++ b/tests/live/test_live_extract_pdf_text_to_file.py @@ -29,8 +29,8 @@ def test_live_extract_pdf_text_to_file_success( assert isinstance(response, PdfRestFileBasedResponse) assert response.output_files output_file = response.output_file - assert output_file.name.endswith(".txt") - assert output_file.type == "text/plain" + assert output_file.name.endswith(".json") + assert output_file.type == "application/json" assert output_file.size > 0 assert response.warning is None assert response.input_id == uploaded.id @@ -60,7 +60,8 @@ async def test_live_async_extract_pdf_text_to_file_success( assert response.output_files output_file = response.output_file assert output_file.name.startswith("async-text") - assert output_file.type == "text/plain" + assert output_file.name.endswith(".json") + assert output_file.type == "application/json" assert output_file.size > 0 assert response.warning is None assert response.input_id == uploaded.id From 4050ab0f6b76345ac5fd276b7ddfcf887095ec44 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 7 Jan 2026 15:21:27 -0600 Subject: [PATCH 42/61] Convert to PNG live tests: Fix expected error Assisted-by: Codex --- tests/live/test_live_graphic_conversions.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/live/test_live_graphic_conversions.py b/tests/live/test_live_graphic_conversions.py index 3a09af9d..a78f8d0f 100644 --- a/tests/live/test_live_graphic_conversions.py +++ b/tests/live/test_live_graphic_conversions.py @@ -389,7 +389,10 @@ def test_live_png_page_range_invalid_overrides( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError, match=r"(?i)page"), + pytest.raises( + PdfRestApiError, + match=r"There was an issue processing your file\. Validate all fields and try again\.", + ), ): client.convert_to_png( uploaded_20_page_pdf, From 76d5f8830f57bf8de5c4d4ea1e4fec7769523126 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 7 Jan 2026 15:38:11 -0600 Subject: [PATCH 43/61] Redact PDF live test: Fix expected error message Assisted-by: Codex --- tests/live/test_live_pdf_redactions.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/live/test_live_pdf_redactions.py b/tests/live/test_live_pdf_redactions.py index 5473e428..3fda6d42 100644 --- a/tests/live/test_live_pdf_redactions.py +++ b/tests/live/test_live_pdf_redactions.py @@ -183,7 +183,13 @@ def test_live_redactions_invalid_payloads( base_url=pdfrest_live_base_url, ) as client: if "redactions" in extra_body: - with pytest.raises(PdfRestApiError, match=r"(?i)redaction"): + with pytest.raises( + PdfRestApiError, + match=( + r"The JSON data provided is not properly formatted\. Please check " + r"your syntax and try again\." + ), + ): client.preview_redactions( uploaded_pdf_for_redaction, redactions=[{"type": "literal", "value": "placeholder"}], From 34ed3f138b093708a927d94db88d54b4c5a46b6f Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 7 Jan 2026 16:39:26 -0600 Subject: [PATCH 44/61] Convert XFA: Allow `warning` in response Assisted-by: Codex --- .../test_live_convert_xfa_to_acroforms.py | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/live/test_live_convert_xfa_to_acroforms.py b/tests/live/test_live_convert_xfa_to_acroforms.py index 8e882a3b..dba38304 100644 --- a/tests/live/test_live_convert_xfa_to_acroforms.py +++ b/tests/live/test_live_convert_xfa_to_acroforms.py @@ -7,6 +7,10 @@ from ..resources import get_test_resource_path +WARNING_NO_XFA_FORMS = ( + "No XFA forms were detected in the input PDF. No output was produced." +) + @pytest.fixture(scope="module") def uploaded_pdf_for_acroforms( @@ -44,12 +48,16 @@ def test_live_convert_xfa_to_acroforms_success( ) as client: response = client.convert_xfa_to_acroforms(uploaded_pdf_for_acroforms, **kwargs) + assert str(response.input_id) == str(uploaded_pdf_for_acroforms.id) + if response.warning is not None: + assert response.warning == WARNING_NO_XFA_FORMS + assert response.output_files == [] + return + assert response.output_files output_file = response.output_file assert output_file.type == "application/pdf" assert output_file.size > 0 - assert response.warning is None - assert str(response.input_id) == str(uploaded_pdf_for_acroforms.id) if output_name is not None: assert output_file.name.startswith(output_name) else: @@ -88,13 +96,17 @@ async def test_live_async_convert_xfa_to_acroforms_success( uploaded_pdf_for_acroforms, output="async" ) + assert str(response.input_id) == str(uploaded_pdf_for_acroforms.id) + if response.warning is not None: + assert response.warning == WARNING_NO_XFA_FORMS + assert response.output_files == [] + return + assert response.output_files output_file = response.output_file assert output_file.name.startswith("async") assert output_file.type == "application/pdf" assert output_file.size > 0 - assert response.warning is None - assert str(response.input_id) == str(uploaded_pdf_for_acroforms.id) @pytest.mark.asyncio From ff00e2271395b73bde98147c8ba175d641119670 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 7 Jan 2026 16:49:43 -0600 Subject: [PATCH 45/61] Add Convert to PDF/A (Archive PDF) and tests Assisted-by: Codex --- src/pdfrest/client.py | 62 +++++ src/pdfrest/models/_internal.py | 33 +++ src/pdfrest/types/__init__.py | 2 + src/pdfrest/types/public.py | 2 + tests/live/test_live_convert_to_pdfa.py | 147 +++++++++++ tests/test_convert_to_pdfa.py | 309 ++++++++++++++++++++++++ 6 files changed, 555 insertions(+) create mode 100644 tests/live/test_live_convert_to_pdfa.py create mode 100644 tests/test_convert_to_pdfa.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 9c96c49b..1415d73d 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -94,6 +94,7 @@ PdfRestRawFileResponse, PdfSplitPayload, PdfToExcelPayload, + PdfToPdfaPayload, PdfToPdfxPayload, PdfToPowerpointPayload, PdfToWordPayload, @@ -106,6 +107,7 @@ ) from .types import ( ALL_PDF_INFO_QUERIES, + PdfAType, PdfInfoQuery, PdfMergeInput, PdfPageSelection, @@ -2810,6 +2812,36 @@ def rasterize_pdf( timeout=timeout, ) + def convert_to_pdfa( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output_type: PdfAType, + output: str | None = None, + rasterize_if_errors_encountered: Literal["on", "off"] | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Convert a PDF to a specified PDF/A version.""" + + payload: dict[str, Any] = {"files": file, "output_type": output_type} + if output is not None: + payload["output"] = output + if rasterize_if_errors_encountered is not None: + payload["rasterize_if_errors_encountered"] = rasterize_if_errors_encountered + + return self._post_file_operation( + endpoint="/pdfa", + payload=payload, + payload_model=PdfToPdfaPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def convert_to_pdfx( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -3832,6 +3864,36 @@ async def rasterize_pdf( timeout=timeout, ) + async def convert_to_pdfa( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output_type: PdfAType, + output: str | None = None, + rasterize_if_errors_encountered: Literal["on", "off"] | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously convert a PDF to a specified PDF/A version.""" + + payload: dict[str, Any] = {"files": file, "output_type": output_type} + if output is not None: + payload["output"] = output + if rasterize_if_errors_encountered is not None: + payload["rasterize_if_errors_encountered"] = rasterize_if_errors_encountered + + return await self._post_file_operation( + endpoint="/pdfa", + payload=payload, + payload_model=PdfToPdfaPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def convert_to_pdfx( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 7475d18c..3245436a 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -22,6 +22,7 @@ from pdfrest.types.public import PdfRedactionPreset from ..types import ( + PdfAType, PdfInfoQuery, PdfXType, SummaryFormat, @@ -813,6 +814,38 @@ class PdfToPowerpointPayload(BaseModel): ] = None +class PdfToPdfaPayload(BaseModel): + """Adapt caller options into a pdfRest-ready PDF/A request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output_type: Annotated[PdfAType, Field(serialization_alias="output_type")] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + rasterize_if_errors_encountered: Annotated[ + Literal["on", "off"] | None, + Field( + serialization_alias="rasterize_if_errors_encountered", + default=None, + ), + ] = None + + class PdfToPdfxPayload(BaseModel): """Adapt caller options into a pdfRest-ready PDF/X request payload.""" diff --git a/src/pdfrest/types/__init__.py b/src/pdfrest/types/__init__.py index adf09638..94952a99 100644 --- a/src/pdfrest/types/__init__.py +++ b/src/pdfrest/types/__init__.py @@ -2,6 +2,7 @@ from .public import ( ALL_PDF_INFO_QUERIES, + PdfAType, PdfInfoQuery, PdfMergeInput, PdfMergeSource, @@ -19,6 +20,7 @@ __all__ = [ "ALL_PDF_INFO_QUERIES", + "PdfAType", "PdfInfoQuery", "PdfMergeInput", "PdfMergeSource", diff --git a/src/pdfrest/types/public.py b/src/pdfrest/types/public.py index 915968cf..df753f2b 100644 --- a/src/pdfrest/types/public.py +++ b/src/pdfrest/types/public.py @@ -14,6 +14,7 @@ __all__ = ( "ALL_PDF_INFO_QUERIES", + "PdfAType", "PdfInfoQuery", "PdfMergeInput", "PdfMergeSource", @@ -102,6 +103,7 @@ class PdfMergeSource(TypedDict, total=False): PdfMergeInput = PdfRestFile | PdfMergeSource | tuple[PdfRestFile, PdfPageSelection] +PdfAType = Literal["PDF/A-1b", "PDF/A-2b", "PDF/A-2u", "PDF/A-3b", "PDF/A-3u"] PdfXType = Literal["PDF/X-1a", "PDF/X-3", "PDF/X-4", "PDF/X-6"] SummaryFormat = Literal[ diff --git a/tests/live/test_live_convert_to_pdfa.py b/tests/live/test_live_convert_to_pdfa.py new file mode 100644 index 00000000..5d39d009 --- /dev/null +++ b/tests/live/test_live_convert_to_pdfa.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +from typing import cast, get_args + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile +from pdfrest.types import PdfAType + +from ..resources import get_test_resource_path + +PDFA_TYPES: tuple[PdfAType, ...] = cast(tuple[PdfAType, ...], get_args(PdfAType)) +PDFA_TYPE_PARAMS = [ + pytest.param(output_type, id=output_type) for output_type in PDFA_TYPES +] + + +@pytest.fixture(scope="module") +def uploaded_pdf_for_pdfa( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize("output_type", PDFA_TYPE_PARAMS) +def test_live_convert_to_pdfa_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_pdfa: PdfRestFile, + output_type: PdfAType, +) -> None: + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.convert_to_pdfa( + uploaded_pdf_for_pdfa, + output_type=output_type, + output="pdfa-live", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_pdfa.id) + assert output_file.name.startswith("pdfa-live") + + +@pytest.mark.asyncio +@pytest.mark.parametrize("output_type", PDFA_TYPE_PARAMS) +async def test_live_async_convert_to_pdfa_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_pdfa: PdfRestFile, + output_type: PdfAType, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.convert_to_pdfa( + uploaded_pdf_for_pdfa, + output_type=output_type, + output="async-pdfa", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async-pdfa") + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_pdfa.id) + + +def test_live_convert_to_pdfa_with_rasterize_option( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_pdfa: PdfRestFile, +) -> None: + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.convert_to_pdfa( + uploaded_pdf_for_pdfa, + output_type="PDF/A-2b", + rasterize_if_errors_encountered="on", + output="pdfa-rasterize", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("pdfa-rasterize") + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_for_pdfa.id) + + +@pytest.mark.parametrize( + "invalid_output_type", + [ + pytest.param("PDF/A-0", id="pdfa-0"), + pytest.param("PDF/A-99", id="pdfa-99"), + pytest.param("pdf/a-2b", id="lowercase"), + ], +) +def test_live_convert_to_pdfa_invalid_output_type( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_pdfa: PdfRestFile, + invalid_output_type: str, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError, match=r"(?i)pdf.?a"), + ): + client.convert_to_pdfa( + uploaded_pdf_for_pdfa, + output_type="PDF/A-1b", + extra_body={"output_type": invalid_output_type}, + ) + + +@pytest.mark.asyncio +async def test_live_async_convert_to_pdfa_invalid_output_type( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_pdfa: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError, match=r"(?i)pdf.?a"): + await client.convert_to_pdfa( + uploaded_pdf_for_pdfa, + output_type="PDF/A-1b", + extra_body={"output_type": "PDF/A-0"}, + ) diff --git a/tests/test_convert_to_pdfa.py b/tests/test_convert_to_pdfa.py new file mode 100644 index 00000000..c678af17 --- /dev/null +++ b/tests/test_convert_to_pdfa.py @@ -0,0 +1,309 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import PdfToPdfaPayload +from pdfrest.types import PdfAType + +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) + + +@pytest.mark.parametrize( + "output_type", + [ + pytest.param("PDF/A-1b", id="pdfa-1b"), + pytest.param("PDF/A-2b", id="pdfa-2b"), + pytest.param("PDF/A-2u", id="pdfa-2u"), + pytest.param("PDF/A-3b", id="pdfa-3b"), + pytest.param("PDF/A-3u", id="pdfa-3u"), + ], +) +def test_convert_to_pdfa_success( + monkeypatch: pytest.MonkeyPatch, output_type: PdfAType +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + payload_dump = PdfToPdfaPayload.model_validate( + {"files": [input_file], "output_type": output_type, "output": "archive"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdfa": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "archive.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_to_pdfa( + input_file, + output_type=output_type, + output="archive", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "archive.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + assert response.warning is None + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "output_type", + [ + pytest.param("PDF/A-1b", id="async-pdfa-1b"), + pytest.param("PDF/A-2b", id="async-pdfa-2b"), + pytest.param("PDF/A-2u", id="async-pdfa-2u"), + pytest.param("PDF/A-3b", id="async-pdfa-3b"), + pytest.param("PDF/A-3u", id="async-pdfa-3u"), + ], +) +async def test_async_convert_to_pdfa_success( + monkeypatch: pytest.MonkeyPatch, output_type: PdfAType +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + payload_dump = PdfToPdfaPayload.model_validate( + {"files": [input_file], "output_type": output_type} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdfa": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload(output_id, "async.pdf", "application/pdf"), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_to_pdfa( + input_file, + output_type=output_type, + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + + +def test_convert_to_pdfa_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdfa": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["output_type"] == "PDF/A-3b" + assert payload["rasterize_if_errors_encountered"] == "on" + assert payload["debug"] == "yes" + assert payload["id"] == str(input_file.id) + assert payload["output"] == "custom" + return httpx.Response( + 200, + json={"inputId": [input_file.id], "outputId": [output_id]}, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "custom.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_to_pdfa( + input_file, + output_type="PDF/A-3b", + output="custom", + rasterize_if_errors_encountered="on", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": "yes"}, + timeout=0.33, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.33) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.33) + + +@pytest.mark.asyncio +async def test_async_convert_to_pdfa_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdfa": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["output_type"] == "PDF/A-2u" + assert payload["id"] == str(input_file.id) + assert payload["extra"] == {"note": "async"} + assert payload["rasterize_if_errors_encountered"] == "off" + return httpx.Response( + 200, + json={"inputId": [input_file.id], "outputId": [output_id]}, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "async-custom.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_to_pdfa( + input_file, + output_type="PDF/A-2u", + rasterize_if_errors_encountered="off", + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"extra": {"note": "async"}}, + timeout=0.72, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.72) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.72) + + +def test_convert_to_pdfa_validation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + png_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, + match=( + "Input should be 'PDF/A-1b', 'PDF/A-2b', 'PDF/A-2u', " + "'PDF/A-3b' or 'PDF/A-3u'" + ), + ), + ): + client.convert_to_pdfa(pdf_file, output_type=None) # type: ignore[arg-type] + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="Must be a PDF file"), + ): + client.convert_to_pdfa(png_file, output_type="PDF/A-2b") + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="PDF/A-1b"), + ): + client.convert_to_pdfa(pdf_file, output_type="PDF/A-4") # type: ignore[arg-type] + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ), + ): + client.convert_to_pdfa( + [pdf_file, make_pdf_file(PdfRestFileID.generate())], + output_type="PDF/A-2b", + ) From d5a8f131e9d8404c9de09d79d52a9e4e63818433 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Thu, 8 Jan 2026 10:29:52 -0600 Subject: [PATCH 46/61] Update src/pdfrest/models/public.py Omit mention of `output_type` as it is not relevant to the caller. Co-authored-by: Kevin A. Mitchell --- src/pdfrest/models/public.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index aafe55f4..aa57d9de 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -325,7 +325,7 @@ class SummarizePdfTextResponse(BaseModel): summary: Annotated[ str | None, Field( - description="Inline summary content when output_type is json.", + description="Summary content", default=None, ), ] = None From aa107cc7880ed448c77232010d0322efbeeda2ec Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Thu, 8 Jan 2026 10:31:29 -0600 Subject: [PATCH 47/61] Remove unused `ConvertToMarkdownResponse` class Assisted-by: Codex --- src/pdfrest/models/__init__.py | 2 -- src/pdfrest/models/public.py | 44 ---------------------------------- 2 files changed, 46 deletions(-) diff --git a/src/pdfrest/models/__init__.py b/src/pdfrest/models/__init__.py index 755bbaf7..c3242970 100644 --- a/src/pdfrest/models/__init__.py +++ b/src/pdfrest/models/__init__.py @@ -1,5 +1,4 @@ from .public import ( - ConvertToMarkdownResponse, ExtractTextResponse, PdfRestDeletionResponse, PdfRestErrorResponse, @@ -14,7 +13,6 @@ ) __all__ = [ - "ConvertToMarkdownResponse", "ExtractTextResponse", "PdfRestDeletionResponse", "PdfRestErrorResponse", diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index aa57d9de..b7de144f 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -20,7 +20,6 @@ from typing_extensions import override __all__ = ( - "ConvertToMarkdownResponse", "ExtractTextResponse", "PdfRestDeletionResponse", "PdfRestErrorResponse", @@ -485,49 +484,6 @@ class ExtractTextResponse(BaseModel): ] = None -class ConvertToMarkdownResponse(BaseModel): - """Response returned by the markdown conversion tool.""" - - model_config = ConfigDict(extra="allow") - - markdown: Annotated[ - str | None, - Field( - description="Inline markdown content when output_type is json.", - default=None, - ), - ] = None - input_id: Annotated[ - PdfRestFileID, - Field( - validation_alias=AliasChoices("input_id", "inputId"), - description="The id of the input file.", - ), - ] - output_url: Annotated[ - HttpUrl | None, - Field( - alias="outputUrl", - validation_alias=AliasChoices("output_url", "outputUrl"), - description="Download URL for file output.", - default=None, - ), - ] = None - output_id: Annotated[ - PdfRestFileID | None, - Field( - alias="outputId", - validation_alias=AliasChoices("output_id", "outputId"), - description="The id of the generated output when output_type is file.", - default=None, - ), - ] = None - warning: Annotated[ - str | None, - Field(description="A warning that was generated during markdown conversion."), - ] = None - - class PdfRestInfoResponse(BaseModel): """A response containing the output from the /info route.""" From 4f719a99134e253a46188b169bd1e1c74898327b Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Thu, 8 Jan 2026 15:15:36 -0600 Subject: [PATCH 48/61] Remove unused fields from Summarize PDF response Assisted-by: Codex --- src/pdfrest/models/public.py | 18 ------------------ tests/test_summarize_pdf_text.py | 2 -- 2 files changed, 20 deletions(-) diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index b7de144f..097ed276 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -335,24 +335,6 @@ class SummarizePdfTextResponse(BaseModel): description="The id of the input file.", ), ] - output_url: Annotated[ - HttpUrl | None, - Field( - alias="outputUrl", - validation_alias=AliasChoices("output_url", "outputUrl"), - description="Download URL for file output.", - default=None, - ), - ] = None - output_id: Annotated[ - PdfRestFileID | None, - Field( - alias="outputId", - validation_alias=AliasChoices("output_id", "outputId"), - description="The id of the generated output when output_type is file.", - default=None, - ), - ] = None class TranslatePdfTextResponse(BaseModel): diff --git a/tests/test_summarize_pdf_text.py b/tests/test_summarize_pdf_text.py index cbcf490b..cf2d7cb9 100644 --- a/tests/test_summarize_pdf_text.py +++ b/tests/test_summarize_pdf_text.py @@ -113,8 +113,6 @@ def handler(request: httpx.Request) -> httpx.Response: assert isinstance(response, SummarizePdfTextResponse) assert response.summary == "Key points..." assert response.input_id == input_file.id - assert response.output_id is None - assert response.output_url is None def test_summarize_pdf_text_to_file_success( From 242a36c7dc98e14ae942f266c883715dbe146e17 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Thu, 8 Jan 2026 16:05:31 -0600 Subject: [PATCH 49/61] Translate PDF: Remove unused response fields Assisted-by: Codex --- src/pdfrest/models/public.py | 18 ------------------ tests/test_translate_pdf_text.py | 2 -- 2 files changed, 20 deletions(-) diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index 097ed276..08c6a7e6 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -376,24 +376,6 @@ class TranslatePdfTextResponse(BaseModel): description="The id of the input file.", ), ] - output_url: Annotated[ - HttpUrl | None, - Field( - alias="outputUrl", - validation_alias=AliasChoices("output_url", "outputUrl"), - description="Download URL for file output.", - default=None, - ), - ] = None - output_id: Annotated[ - PdfRestFileID | None, - Field( - alias="outputId", - validation_alias=AliasChoices("output_id", "outputId"), - description="The id of the generated output when output_type is file.", - default=None, - ), - ] = None class TranslatePdfTextFileResponse(PdfRestFileBasedResponse): diff --git a/tests/test_translate_pdf_text.py b/tests/test_translate_pdf_text.py index c26c4c0c..fc7eadcf 100644 --- a/tests/test_translate_pdf_text.py +++ b/tests/test_translate_pdf_text.py @@ -109,8 +109,6 @@ def handler(request: httpx.Request) -> httpx.Response: assert response.source_languages == ["en"] assert response.output_language == "fr" assert response.input_id == input_file.id - assert response.output_id is None - assert response.output_url is None def test_translate_pdf_text_request_customization( From 38e209c122fe4eef6e69e1ad019e686fd5b4ee70 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Thu, 8 Jan 2026 16:23:31 -0600 Subject: [PATCH 50/61] Extract Text response: Remove unused fields Assisted-by: Codex --- src/pdfrest/models/public.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index 08c6a7e6..8d1131fa 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -424,24 +424,6 @@ class ExtractTextResponse(BaseModel): description="The id of the input file.", ), ] - output_url: Annotated[ - HttpUrl | None, - Field( - alias="outputUrl", - validation_alias=AliasChoices("output_url", "outputUrl"), - description="Download URL for file output.", - default=None, - ), - ] = None - output_id: Annotated[ - PdfRestFileID | None, - Field( - alias="outputId", - validation_alias=AliasChoices("output_id", "outputId"), - description="The id of the generated output when output_type is file.", - default=None, - ), - ] = None warning: Annotated[ str | None, Field(description="A warning that was generated during text extraction."), From 81345e02e67d3ec8d6c6498f5b715ce5df428a07 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 9 Jan 2026 15:19:07 -0600 Subject: [PATCH 51/61] Remove unused `ExtractTextResponse` Assisted-by: Codex --- src/pdfrest/models/__init__.py | 2 -- src/pdfrest/models/public.py | 28 ---------------------------- 2 files changed, 30 deletions(-) diff --git a/src/pdfrest/models/__init__.py b/src/pdfrest/models/__init__.py index c3242970..ef10e565 100644 --- a/src/pdfrest/models/__init__.py +++ b/src/pdfrest/models/__init__.py @@ -1,5 +1,4 @@ from .public import ( - ExtractTextResponse, PdfRestDeletionResponse, PdfRestErrorResponse, PdfRestFile, @@ -13,7 +12,6 @@ ) __all__ = [ - "ExtractTextResponse", "PdfRestDeletionResponse", "PdfRestErrorResponse", "PdfRestFile", diff --git a/src/pdfrest/models/public.py b/src/pdfrest/models/public.py index 8d1131fa..e4dc8a3a 100644 --- a/src/pdfrest/models/public.py +++ b/src/pdfrest/models/public.py @@ -20,7 +20,6 @@ from typing_extensions import override __all__ = ( - "ExtractTextResponse", "PdfRestDeletionResponse", "PdfRestErrorResponse", "PdfRestFile", @@ -403,33 +402,6 @@ class TranslatePdfTextFileResponse(PdfRestFileBasedResponse): ] = None -class ExtractTextResponse(BaseModel): - """Response returned by the extracted-text tool.""" - - model_config = ConfigDict(extra="allow") - - full_text: Annotated[ - str | None, - Field( - alias="fullText", - validation_alias=AliasChoices("full_text", "fullText"), - description="Inline extracted text when output_type is json.", - default=None, - ), - ] = None - input_id: Annotated[ - PdfRestFileID, - Field( - validation_alias=AliasChoices("input_id", "inputId"), - description="The id of the input file.", - ), - ] - warning: Annotated[ - str | None, - Field(description="A warning that was generated during text extraction."), - ] = None - - class PdfRestInfoResponse(BaseModel): """A response containing the output from the /info route.""" From 39d9836b749d59040063e68d1a6cf914af916131 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 9 Jan 2026 15:22:32 -0600 Subject: [PATCH 52/61] Remove "pdf" from Summarize method names --- src/pdfrest/client.py | 8 ++++---- tests/live/test_live_summarize_pdf_text.py | 20 ++++++++++---------- tests/test_summarize_pdf_text.py | 20 ++++++++++---------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 1415d73d..a198f353 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -2132,7 +2132,7 @@ def query_pdf_info( raw_payload = self._send_request(request) return PdfRestInfoResponse.model_validate(raw_payload) - def summarize_pdf_text( + def summarize_text( self, file: PdfRestFile | Sequence[PdfRestFile], *, @@ -2179,7 +2179,7 @@ def summarize_pdf_text( raw_payload = self._send_request(request) return SummarizePdfTextResponse.model_validate(raw_payload) - def summarize_pdf_text_to_file( + def summarize_text_to_file( self, file: PdfRestFile | Sequence[PdfRestFile], *, @@ -3142,7 +3142,7 @@ async def query_pdf_info( raw_payload = await self._send_request(request) return PdfRestInfoResponse.model_validate(raw_payload) - async def summarize_pdf_text( + async def summarize_text( self, file: PdfRestFile | Sequence[PdfRestFile], *, @@ -3189,7 +3189,7 @@ async def summarize_pdf_text( raw_payload = await self._send_request(request) return SummarizePdfTextResponse.model_validate(raw_payload) - async def summarize_pdf_text_to_file( + async def summarize_text_to_file( self, file: PdfRestFile | Sequence[PdfRestFile], *, diff --git a/tests/live/test_live_summarize_pdf_text.py b/tests/live/test_live_summarize_pdf_text.py index be8fb802..629c815a 100644 --- a/tests/live/test_live_summarize_pdf_text.py +++ b/tests/live/test_live_summarize_pdf_text.py @@ -8,7 +8,7 @@ from ..resources import get_test_resource_path -def test_live_summarize_pdf_text_success( +def test_live_summarize_text_success( pdfrest_api_key: str, pdfrest_live_base_url: str, ) -> None: @@ -18,7 +18,7 @@ def test_live_summarize_pdf_text_success( base_url=pdfrest_live_base_url, ) as client: uploaded = client.files.create_from_paths([resource])[0] - response = client.summarize_pdf_text( + response = client.summarize_text( uploaded, target_word_count=40, summary_format="overview", @@ -29,7 +29,7 @@ def test_live_summarize_pdf_text_success( assert response.input_id == uploaded.id -def test_live_summarize_pdf_text_to_file_success( +def test_live_summarize_text_to_file_success( pdfrest_api_key: str, pdfrest_live_base_url: str, ) -> None: @@ -39,7 +39,7 @@ def test_live_summarize_pdf_text_to_file_success( base_url=pdfrest_live_base_url, ) as client: uploaded = client.files.create_from_paths([resource])[0] - response = client.summarize_pdf_text_to_file( + response = client.summarize_text_to_file( uploaded, target_word_count=40, summary_format="overview", @@ -56,7 +56,7 @@ def test_live_summarize_pdf_text_to_file_success( @pytest.mark.asyncio -async def test_live_async_summarize_pdf_text_success( +async def test_live_async_summarize_text_success( pdfrest_api_key: str, pdfrest_live_base_url: str, ) -> None: @@ -66,7 +66,7 @@ async def test_live_async_summarize_pdf_text_success( base_url=pdfrest_live_base_url, ) as client: uploaded = (await client.files.create_from_paths([resource]))[0] - response = await client.summarize_pdf_text( + response = await client.summarize_text( uploaded, target_word_count=30, summary_format="overview", @@ -77,7 +77,7 @@ async def test_live_async_summarize_pdf_text_success( assert response.input_id == uploaded.id -def test_live_summarize_pdf_text_invalid_format( +def test_live_summarize_text_invalid_format( pdfrest_api_key: str, pdfrest_live_base_url: str, ) -> None: @@ -88,14 +88,14 @@ def test_live_summarize_pdf_text_invalid_format( ) as client: uploaded = client.files.create_from_paths([resource])[0] with pytest.raises(PdfRestApiError, match=r"(?i)summary"): - client.summarize_pdf_text( + client.summarize_text( uploaded, extra_body={"summary_format": "invalid-style"}, ) @pytest.mark.asyncio -async def test_live_async_summarize_pdf_text_invalid_format( +async def test_live_async_summarize_text_invalid_format( pdfrest_api_key: str, pdfrest_live_base_url: str, ) -> None: @@ -106,7 +106,7 @@ async def test_live_async_summarize_pdf_text_invalid_format( ) as client: uploaded = (await client.files.create_from_paths([resource]))[0] with pytest.raises(PdfRestApiError, match=r"(?i)summary"): - await client.summarize_pdf_text( + await client.summarize_text( uploaded, extra_body={"summary_format": "invalid-style"}, ) diff --git a/tests/test_summarize_pdf_text.py b/tests/test_summarize_pdf_text.py index cf2d7cb9..4263c488 100644 --- a/tests/test_summarize_pdf_text.py +++ b/tests/test_summarize_pdf_text.py @@ -66,7 +66,7 @@ def test_summarize_payload_invalid_page_range() -> None: SummarizePdfTextPayload.model_validate({"files": [file_repr], "pages": ["5-2"]}) -def test_summarize_pdf_text_json_success(monkeypatch: pytest.MonkeyPatch) -> None: +def test_summarize_text_json_success(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = _make_text_file(str(PdfRestFileID.generate(1))) payload_dump = SummarizePdfTextPayload.model_validate( @@ -100,7 +100,7 @@ def handler(request: httpx.Request) -> httpx.Response: transport = httpx.MockTransport(handler) with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: - response = client.summarize_pdf_text( + response = client.summarize_text( input_file, target_word_count=120, summary_format="bullet_points", @@ -115,7 +115,7 @@ def handler(request: httpx.Request) -> httpx.Response: assert response.input_id == input_file.id -def test_summarize_pdf_text_to_file_success( +def test_summarize_text_to_file_success( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) @@ -158,7 +158,7 @@ def handler(request: httpx.Request) -> httpx.Response: transport = httpx.MockTransport(handler) with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: - response = client.summarize_pdf_text_to_file( + response = client.summarize_text_to_file( input_file, target_word_count=200, summary_format="bullet_points", @@ -174,7 +174,7 @@ def handler(request: httpx.Request) -> httpx.Response: assert response.input_id == input_file.id -def test_summarize_pdf_text_to_file_request_customization( +def test_summarize_text_to_file_request_customization( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) @@ -223,7 +223,7 @@ def handler(request: httpx.Request) -> httpx.Response: transport = httpx.MockTransport(handler) with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: - response = client.summarize_pdf_text_to_file( + response = client.summarize_text_to_file( input_file, extra_query={"trace": "true"}, extra_headers={"X-Debug": "sync"}, @@ -246,7 +246,7 @@ def handler(request: httpx.Request) -> httpx.Response: @pytest.mark.asyncio -async def test_async_summarize_pdf_text_success( +async def test_async_summarize_text_success( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) @@ -275,7 +275,7 @@ def handler(request: httpx.Request) -> httpx.Response: transport = httpx.MockTransport(handler) async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: - response = await client.summarize_pdf_text(input_file) + response = await client.summarize_text(input_file) assert seen == {"post": 1} assert isinstance(response, SummarizePdfTextResponse) @@ -284,7 +284,7 @@ def handler(request: httpx.Request) -> httpx.Response: @pytest.mark.asyncio -async def test_async_summarize_pdf_text_to_file_success( +async def test_async_summarize_text_to_file_success( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) @@ -322,7 +322,7 @@ def handler(request: httpx.Request) -> httpx.Response: transport = httpx.MockTransport(handler) async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: - response = await client.summarize_pdf_text_to_file(input_file) + response = await client.summarize_text_to_file(input_file) assert seen == {"post": 1, "get": 1} assert isinstance(response, PdfRestFileBasedResponse) From 24b91544147df5ce3a72e000f9e73c2dce6a410c Mon Sep 17 00:00:00 2001 From: "Kevin A. Mitchell" Date: Thu, 8 Jan 2026 22:49:10 -0600 Subject: [PATCH 53/61] models: Add `_bool_to_on_off` converter for boolean fields - Introduced `_bool_to_on_off` function to convert boolean values to "on"/"off". - Applied the new `BeforeValidator` to `preserve_line_breaks`, `word_style`, and `word_coordinates` fields to handle boolean inputs properly. - Ensured consistency in serialization and validation of relevant model fields. Assisted-by: Codex --- src/pdfrest/models/_internal.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 3245436a..8aaa0421 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -120,6 +120,12 @@ def _serialize_file_ids(value: list[PdfRestFile]) -> str: return ",".join(str(file.id) for file in value) +def _bool_to_on_off(value: Any) -> Any: + if isinstance(value, bool): + return "on" if value else "off" + return value + + def _serialize_page_ranges(value: list[str | int | tuple[str | int, ...]]) -> str: def join_tuple(value: str | int | tuple[str | int, ...]) -> str: if isinstance(value, tuple): @@ -364,9 +370,15 @@ class ExtractTextPayload(BaseModel): PlainSerializer(_serialize_page_ranges), ] = None full_text: Literal["off", "by_page", "document"] = "document" - preserve_line_breaks: Literal["off", "on"] = "off" - word_style: Literal["off", "on"] = "off" - word_coordinates: Literal["off", "on"] = "off" + preserve_line_breaks: Annotated[ + Literal["off", "on"], BeforeValidator(_bool_to_on_off) + ] = "off" + word_style: Annotated[Literal["off", "on"], BeforeValidator(_bool_to_on_off)] = ( + "off" + ) + word_coordinates: Annotated[ + Literal["off", "on"], BeforeValidator(_bool_to_on_off) + ] = "off" output_type: Literal["json", "file"] = "json" output: Annotated[ str | None, From 9ff3a86cc07a5f60e668bd4de7a0eea53030eee6 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Fri, 9 Jan 2026 15:54:17 -0600 Subject: [PATCH 54/61] Replace on/off in external interface with `bool` Assisted-by: Codex --- src/pdfrest/client.py | 20 ++++++++++---------- src/pdfrest/models/_internal.py | 2 ++ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index a198f353..21655df4 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -2222,7 +2222,7 @@ def convert_to_markdown( file: PdfRestFile | Sequence[PdfRestFile], *, pages: PdfPageSelection | None = None, - page_break_comments: Literal["on", "off"] | None = None, + page_break_comments: bool | None = None, output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, @@ -2394,9 +2394,9 @@ def extract_pdf_text_to_file( *, pages: PdfPageSelection | None = None, full_text: Literal["off", "by_page", "document"] = "document", - preserve_line_breaks: Literal["off", "on"] = "off", - word_style: Literal["off", "on"] = "off", - word_coordinates: Literal["off", "on"] = "off", + preserve_line_breaks: bool = False, + word_style: bool = False, + word_coordinates: bool = False, output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, @@ -2818,7 +2818,7 @@ def convert_to_pdfa( *, output_type: PdfAType, output: str | None = None, - rasterize_if_errors_encountered: Literal["on", "off"] | None = None, + rasterize_if_errors_encountered: bool | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, @@ -3232,7 +3232,7 @@ async def convert_to_markdown( file: PdfRestFile | Sequence[PdfRestFile], *, pages: PdfPageSelection | None = None, - page_break_comments: Literal["on", "off"] | None = None, + page_break_comments: bool | None = None, output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, @@ -3404,9 +3404,9 @@ async def extract_pdf_text_to_file( *, pages: PdfPageSelection | None = None, full_text: Literal["off", "by_page", "document"] = "document", - preserve_line_breaks: Literal["off", "on"] = "off", - word_style: Literal["off", "on"] = "off", - word_coordinates: Literal["off", "on"] = "off", + preserve_line_breaks: bool = False, + word_style: bool = False, + word_coordinates: bool = False, output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, @@ -3870,7 +3870,7 @@ async def convert_to_pdfa( *, output_type: PdfAType, output: str | None = None, - rasterize_if_errors_encountered: Literal["on", "off"] | None = None, + rasterize_if_errors_encountered: bool | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 8aaa0421..59e576d6 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -418,6 +418,7 @@ class ConvertToMarkdownPayload(BaseModel): page_break_comments: Annotated[ Literal["on", "off"] | None, Field(serialization_alias="page_break_comments", default=None), + BeforeValidator(_bool_to_on_off), ] = None output: Annotated[ str | None, @@ -855,6 +856,7 @@ class PdfToPdfaPayload(BaseModel): serialization_alias="rasterize_if_errors_encountered", default=None, ), + BeforeValidator(_bool_to_on_off), ] = None From 9c99f33a1f1e702f0f9ecbf108105f8272fe90e2 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Mon, 12 Jan 2026 16:30:01 -0600 Subject: [PATCH 55/61] Set default values (from pdfRest) on optional client parameters Assisted-by: Codex --- src/pdfrest/client.py | 49 +++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 21655df4..8f8f95e2 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -2136,7 +2136,7 @@ def summarize_text( self, file: PdfRestFile | Sequence[PdfRestFile], *, - target_word_count: int | None = 400, + target_word_count: int = 400, summary_format: SummaryFormat = "overview", pages: PdfPageSelection | None = None, output_format: SummaryOutputFormat = "markdown", @@ -2183,7 +2183,7 @@ def summarize_text_to_file( self, file: PdfRestFile | Sequence[PdfRestFile], *, - target_word_count: int | None = 400, + target_word_count: int = 400, summary_format: SummaryFormat = "overview", pages: PdfPageSelection | None = None, output_format: SummaryOutputFormat = "markdown", @@ -2222,7 +2222,7 @@ def convert_to_markdown( file: PdfRestFile | Sequence[PdfRestFile], *, pages: PdfPageSelection | None = None, - page_break_comments: bool | None = None, + page_break_comments: bool = False, output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, @@ -2234,11 +2234,10 @@ def convert_to_markdown( payload: dict[str, Any] = { "files": file, "output_type": "file", + "page_break_comments": page_break_comments, } if pages is not None: payload["pages"] = pages - if page_break_comments is not None: - payload["page_break_comments"] = page_break_comments if output is not None: payload["output"] = output @@ -2818,7 +2817,7 @@ def convert_to_pdfa( *, output_type: PdfAType, output: str | None = None, - rasterize_if_errors_encountered: bool | None = None, + rasterize_if_errors_encountered: bool = False, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, @@ -2826,12 +2825,13 @@ def convert_to_pdfa( ) -> PdfRestFileBasedResponse: """Convert a PDF to a specified PDF/A version.""" - payload: dict[str, Any] = {"files": file, "output_type": output_type} + payload: dict[str, Any] = { + "files": file, + "output_type": output_type, + "rasterize_if_errors_encountered": rasterize_if_errors_encountered, + } if output is not None: payload["output"] = output - if rasterize_if_errors_encountered is not None: - payload["rasterize_if_errors_encountered"] = rasterize_if_errors_encountered - return self._post_file_operation( endpoint="/pdfa", payload=payload, @@ -3000,7 +3000,7 @@ def convert_to_jpeg( smoothing: Literal["none", "all", "text", "line", "image"] | Sequence[Literal["none", "all", "text", "line", "image"]] | None = None, - jpeg_quality: int | None = None, + jpeg_quality: int = 75, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, @@ -3012,6 +3012,7 @@ def convert_to_jpeg( "files": files, "resolution": resolution, "color_model": color_model, + "jpeg_quality": jpeg_quality, } if output_prefix is not None: payload["output_prefix"] = output_prefix @@ -3019,8 +3020,6 @@ def convert_to_jpeg( payload["page_range"] = page_range if smoothing is not None: payload["smoothing"] = smoothing - if jpeg_quality is not None: - payload["jpeg_quality"] = jpeg_quality return self._convert_to_graphic( endpoint="/jpg", @@ -3146,7 +3145,7 @@ async def summarize_text( self, file: PdfRestFile | Sequence[PdfRestFile], *, - target_word_count: int | None = 400, + target_word_count: int = 400, summary_format: SummaryFormat = "overview", pages: PdfPageSelection | None = None, output_format: SummaryOutputFormat = "markdown", @@ -3193,7 +3192,7 @@ async def summarize_text_to_file( self, file: PdfRestFile | Sequence[PdfRestFile], *, - target_word_count: int | None = 400, + target_word_count: int = 400, summary_format: SummaryFormat = "overview", pages: PdfPageSelection | None = None, output_format: SummaryOutputFormat = "markdown", @@ -3232,7 +3231,7 @@ async def convert_to_markdown( file: PdfRestFile | Sequence[PdfRestFile], *, pages: PdfPageSelection | None = None, - page_break_comments: bool | None = None, + page_break_comments: bool = False, output: str | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, @@ -3244,11 +3243,10 @@ async def convert_to_markdown( payload: dict[str, Any] = { "files": file, "output_type": "file", + "page_break_comments": page_break_comments, } if pages is not None: payload["pages"] = pages - if page_break_comments is not None: - payload["page_break_comments"] = page_break_comments if output is not None: payload["output"] = output @@ -3870,7 +3868,7 @@ async def convert_to_pdfa( *, output_type: PdfAType, output: str | None = None, - rasterize_if_errors_encountered: bool | None = None, + rasterize_if_errors_encountered: bool = False, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, @@ -3878,11 +3876,13 @@ async def convert_to_pdfa( ) -> PdfRestFileBasedResponse: """Asynchronously convert a PDF to a specified PDF/A version.""" - payload: dict[str, Any] = {"files": file, "output_type": output_type} + payload: dict[str, Any] = { + "files": file, + "output_type": output_type, + "rasterize_if_errors_encountered": rasterize_if_errors_encountered, + } if output is not None: payload["output"] = output - if rasterize_if_errors_encountered is not None: - payload["rasterize_if_errors_encountered"] = rasterize_if_errors_encountered return await self._post_file_operation( endpoint="/pdfa", @@ -4052,7 +4052,7 @@ async def convert_to_jpeg( smoothing: Literal["none", "all", "text", "line", "image"] | Sequence[Literal["none", "all", "text", "line", "image"]] | None = None, - jpeg_quality: int | None = None, + jpeg_quality: int = 75, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, @@ -4064,6 +4064,7 @@ async def convert_to_jpeg( "files": files, "resolution": resolution, "color_model": color_model, + "jpeg_quality": jpeg_quality, } if output_prefix is not None: payload["output_prefix"] = output_prefix @@ -4071,8 +4072,6 @@ async def convert_to_jpeg( payload["page_range"] = page_range if smoothing is not None: payload["smoothing"] = smoothing - if jpeg_quality is not None: - payload["jpeg_quality"] = jpeg_quality return await self._convert_to_graphic( endpoint="/jpg", From 47953f54fe10d3715790c35e74ea5f2a6d2d51e5 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Mon, 12 Jan 2026 16:50:03 -0600 Subject: [PATCH 56/61] Create types for remaining Literal arguments in clients Assisted-by: Codex --- src/pdfrest/client.py | 81 +++++++++++++++-------------------- src/pdfrest/types/__init__.py | 18 ++++++++ src/pdfrest/types/public.py | 18 ++++++++ 3 files changed, 71 insertions(+), 46 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 8f8f95e2..593219cc 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -107,6 +107,13 @@ ) from .types import ( ALL_PDF_INFO_QUERIES, + BmpColorModel, + CompressionLevel, + ExtractTextGranularity, + FlattenQuality, + GifColorModel, + GraphicSmoothing, + JpegColorModel, PdfAType, PdfInfoQuery, PdfMergeInput, @@ -114,8 +121,10 @@ PdfRedactionInstruction, PdfRGBColor, PdfXType, + PngColorModel, SummaryFormat, SummaryOutputFormat, + TiffColorModel, TranslateOutputFormat, ) @@ -2392,7 +2401,7 @@ def extract_pdf_text_to_file( file: PdfRestFile | Sequence[PdfRestFile], *, pages: PdfPageSelection | None = None, - full_text: Literal["off", "by_page", "document"] = "document", + full_text: ExtractTextGranularity = "document", preserve_line_breaks: bool = False, word_style: bool = False, word_coordinates: bool = False, @@ -2677,7 +2686,7 @@ def compress_pdf( self, file: PdfRestFile | Sequence[PdfRestFile], *, - compression_level: Literal["low", "medium", "high", "custom"], + compression_level: CompressionLevel, profile: PdfRestFile | Sequence[PdfRestFile] | None = None, output: str | None = None, extra_query: Query | None = None, @@ -2711,7 +2720,7 @@ def flatten_transparencies( file: PdfRestFile | Sequence[PdfRestFile], *, output: str | None = None, - quality: Literal["low", "medium", "high"] = "medium", + quality: FlattenQuality = "medium", extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, @@ -2876,10 +2885,8 @@ def convert_to_png( output_prefix: str | None = None, page_range: str | Sequence[str] | None = None, resolution: int = 300, - color_model: Literal["rgb", "rgba", "gray"] = "rgb", - smoothing: Literal["none", "all", "text", "line", "image"] - | Sequence[Literal["none", "all", "text", "line", "image"]] - | None = None, + color_model: PngColorModel = "rgb", + smoothing: GraphicSmoothing | Sequence[GraphicSmoothing] | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, @@ -2916,10 +2923,8 @@ def convert_to_bmp( output_prefix: str | None = None, page_range: str | Sequence[str] | None = None, resolution: int = 300, - color_model: Literal["rgb", "gray"] = "rgb", - smoothing: Literal["none", "all", "text", "line", "image"] - | Sequence[Literal["none", "all", "text", "line", "image"]] - | None = None, + color_model: BmpColorModel = "rgb", + smoothing: GraphicSmoothing | Sequence[GraphicSmoothing] | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, @@ -2956,10 +2961,8 @@ def convert_to_gif( output_prefix: str | None = None, page_range: str | Sequence[str] | None = None, resolution: int = 300, - color_model: Literal["rgb", "gray"] = "rgb", - smoothing: Literal["none", "all", "text", "line", "image"] - | Sequence[Literal["none", "all", "text", "line", "image"]] - | None = None, + color_model: GifColorModel = "rgb", + smoothing: GraphicSmoothing | Sequence[GraphicSmoothing] | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, @@ -2996,10 +2999,8 @@ def convert_to_jpeg( output_prefix: str | None = None, page_range: str | Sequence[str] | None = None, resolution: int = 300, - color_model: Literal["rgb", "cmyk", "gray"] = "rgb", - smoothing: Literal["none", "all", "text", "line", "image"] - | Sequence[Literal["none", "all", "text", "line", "image"]] - | None = None, + color_model: JpegColorModel = "rgb", + smoothing: GraphicSmoothing | Sequence[GraphicSmoothing] | None = None, jpeg_quality: int = 75, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, @@ -3038,10 +3039,8 @@ def convert_to_tiff( output_prefix: str | None = None, page_range: str | Sequence[str] | None = None, resolution: int = 300, - color_model: Literal["rgb", "rgba", "cmyk", "lab", "gray"] = "rgb", - smoothing: Literal["none", "all", "text", "line", "image"] - | Sequence[Literal["none", "all", "text", "line", "image"]] - | None = None, + color_model: TiffColorModel = "rgb", + smoothing: GraphicSmoothing | Sequence[GraphicSmoothing] | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, @@ -3401,7 +3400,7 @@ async def extract_pdf_text_to_file( file: PdfRestFile | Sequence[PdfRestFile], *, pages: PdfPageSelection | None = None, - full_text: Literal["off", "by_page", "document"] = "document", + full_text: ExtractTextGranularity = "document", preserve_line_breaks: bool = False, word_style: bool = False, word_coordinates: bool = False, @@ -3728,7 +3727,7 @@ async def compress_pdf( self, file: PdfRestFile | Sequence[PdfRestFile], *, - compression_level: Literal["low", "medium", "high", "custom"], + compression_level: CompressionLevel, profile: PdfRestFile | Sequence[PdfRestFile] | None = None, output: str | None = None, extra_query: Query | None = None, @@ -3762,7 +3761,7 @@ async def flatten_transparencies( file: PdfRestFile | Sequence[PdfRestFile], *, output: str | None = None, - quality: Literal["low", "medium", "high"] = "medium", + quality: FlattenQuality = "medium", extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, @@ -3928,10 +3927,8 @@ async def convert_to_png( output_prefix: str | None = None, page_range: str | Sequence[str] | None = None, resolution: int = 300, - color_model: Literal["rgb", "rgba", "gray"] = "rgb", - smoothing: Literal["none", "all", "text", "line", "image"] - | Sequence[Literal["none", "all", "text", "line", "image"]] - | None = None, + color_model: PngColorModel = "rgb", + smoothing: GraphicSmoothing | Sequence[GraphicSmoothing] | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, @@ -3968,10 +3965,8 @@ async def convert_to_bmp( output_prefix: str | None = None, page_range: str | Sequence[str] | None = None, resolution: int = 300, - color_model: Literal["rgb", "gray"] = "rgb", - smoothing: Literal["none", "all", "text", "line", "image"] - | Sequence[Literal["none", "all", "text", "line", "image"]] - | None = None, + color_model: BmpColorModel = "rgb", + smoothing: GraphicSmoothing | Sequence[GraphicSmoothing] | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, @@ -4008,10 +4003,8 @@ async def convert_to_gif( output_prefix: str | None = None, page_range: str | Sequence[str] | None = None, resolution: int = 300, - color_model: Literal["rgb", "gray"] = "rgb", - smoothing: Literal["none", "all", "text", "line", "image"] - | Sequence[Literal["none", "all", "text", "line", "image"]] - | None = None, + color_model: GifColorModel = "rgb", + smoothing: GraphicSmoothing | Sequence[GraphicSmoothing] | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, @@ -4048,10 +4041,8 @@ async def convert_to_jpeg( output_prefix: str | None = None, page_range: str | Sequence[str] | None = None, resolution: int = 300, - color_model: Literal["rgb", "cmyk", "gray"] = "rgb", - smoothing: Literal["none", "all", "text", "line", "image"] - | Sequence[Literal["none", "all", "text", "line", "image"]] - | None = None, + color_model: JpegColorModel = "rgb", + smoothing: GraphicSmoothing | Sequence[GraphicSmoothing] | None = None, jpeg_quality: int = 75, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, @@ -4090,10 +4081,8 @@ async def convert_to_tiff( output_prefix: str | None = None, page_range: str | Sequence[str] | None = None, resolution: int = 300, - color_model: Literal["rgb", "rgba", "cmyk", "lab", "gray"] = "rgb", - smoothing: Literal["none", "all", "text", "line", "image"] - | Sequence[Literal["none", "all", "text", "line", "image"]] - | None = None, + color_model: TiffColorModel = "rgb", + smoothing: GraphicSmoothing | Sequence[GraphicSmoothing] | None = None, extra_query: Query | None = None, extra_headers: AnyMapping | None = None, extra_body: Body | None = None, diff --git a/src/pdfrest/types/__init__.py b/src/pdfrest/types/__init__.py index 94952a99..b7c1ae7e 100644 --- a/src/pdfrest/types/__init__.py +++ b/src/pdfrest/types/__init__.py @@ -2,6 +2,13 @@ from .public import ( ALL_PDF_INFO_QUERIES, + BmpColorModel, + CompressionLevel, + ExtractTextGranularity, + FlattenQuality, + GifColorModel, + GraphicSmoothing, + JpegColorModel, PdfAType, PdfInfoQuery, PdfMergeInput, @@ -12,14 +19,23 @@ PdfRedactionType, PdfRGBColor, PdfXType, + PngColorModel, SummaryFormat, SummaryOutputFormat, SummaryOutputType, + TiffColorModel, TranslateOutputFormat, ) __all__ = [ "ALL_PDF_INFO_QUERIES", + "BmpColorModel", + "CompressionLevel", + "ExtractTextGranularity", + "FlattenQuality", + "GifColorModel", + "GraphicSmoothing", + "JpegColorModel", "PdfAType", "PdfInfoQuery", "PdfMergeInput", @@ -30,8 +46,10 @@ "PdfRedactionPreset", "PdfRedactionType", "PdfXType", + "PngColorModel", "SummaryFormat", "SummaryOutputFormat", "SummaryOutputType", + "TiffColorModel", "TranslateOutputFormat", ] diff --git a/src/pdfrest/types/public.py b/src/pdfrest/types/public.py index df753f2b..0d06c671 100644 --- a/src/pdfrest/types/public.py +++ b/src/pdfrest/types/public.py @@ -14,6 +14,13 @@ __all__ = ( "ALL_PDF_INFO_QUERIES", + "BmpColorModel", + "CompressionLevel", + "ExtractTextGranularity", + "FlattenQuality", + "GifColorModel", + "GraphicSmoothing", + "JpegColorModel", "PdfAType", "PdfInfoQuery", "PdfMergeInput", @@ -24,9 +31,11 @@ "PdfRedactionPreset", "PdfRedactionType", "PdfXType", + "PngColorModel", "SummaryFormat", "SummaryOutputFormat", "SummaryOutputType", + "TiffColorModel", "TranslateOutputFormat", ) @@ -105,6 +114,15 @@ class PdfMergeSource(TypedDict, total=False): PdfAType = Literal["PDF/A-1b", "PDF/A-2b", "PDF/A-2u", "PDF/A-3b", "PDF/A-3u"] PdfXType = Literal["PDF/X-1a", "PDF/X-3", "PDF/X-4", "PDF/X-6"] +ExtractTextGranularity = Literal["off", "by_page", "document"] +CompressionLevel = Literal["low", "medium", "high", "custom"] +FlattenQuality = Literal["low", "medium", "high"] +PngColorModel = Literal["rgb", "rgba", "gray"] +BmpColorModel = Literal["rgb", "gray"] +GifColorModel = Literal["rgb", "gray"] +JpegColorModel = Literal["rgb", "cmyk", "gray"] +TiffColorModel = Literal["rgb", "rgba", "cmyk", "lab", "gray"] +GraphicSmoothing = Literal["none", "all", "text", "line", "image"] SummaryFormat = Literal[ "overview", From cbc0ff8676dee393a1320c3eabbdc2c836cdef72 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Mon, 12 Jan 2026 16:53:16 -0600 Subject: [PATCH 57/61] OCR PDF: Fix misleading method descriptions OCR PDF does not extract text, but it makes subsequent text extraction possible. --- src/pdfrest/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 593219cc..8d51cd3c 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -2271,7 +2271,7 @@ def ocr_pdf( extra_body: Body | None = None, timeout: TimeoutTypes | None = None, ) -> PdfRestFileBasedResponse: - """Perform OCR on a PDF to extract searchable text.""" + """Perform OCR on a PDF to make text searchable and extractable.""" payload: dict[str, Any] = {"files": file} if pages is not None: @@ -3270,7 +3270,7 @@ async def ocr_pdf( extra_body: Body | None = None, timeout: TimeoutTypes | None = None, ) -> PdfRestFileBasedResponse: - """Perform OCR on a PDF to extract searchable text.""" + """Perform OCR on a PDF to make text searchable and extractable.""" payload: dict[str, Any] = {"files": file} if pages is not None: From 6658fe4e5d972837cd44e7cfbd0fe32b2af4ddea Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Mon, 12 Jan 2026 17:34:04 -0600 Subject: [PATCH 58/61] OCR PDF: Add missing `languages` body parameter Assisted-by: Codex --- src/pdfrest/client.py | 7 ++++-- src/pdfrest/models/_internal.py | 13 +++++++++++ src/pdfrest/types/__init__.py | 4 ++++ src/pdfrest/types/public.py | 20 +++++++++++++++++ tests/live/test_live_ocr_pdf.py | 2 +- tests/test_ocr_pdf.py | 39 +++++++++++++++++++++++++++------ 6 files changed, 75 insertions(+), 10 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 8d51cd3c..bc640278 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -114,6 +114,7 @@ GifColorModel, GraphicSmoothing, JpegColorModel, + OcrLanguage, PdfAType, PdfInfoQuery, PdfMergeInput, @@ -2264,6 +2265,7 @@ def ocr_pdf( self, file: PdfRestFile | Sequence[PdfRestFile], *, + languages: OcrLanguage | Sequence[OcrLanguage] = "English", pages: PdfPageSelection | None = None, output: str | None = None, extra_query: Query | None = None, @@ -2273,7 +2275,7 @@ def ocr_pdf( ) -> PdfRestFileBasedResponse: """Perform OCR on a PDF to make text searchable and extractable.""" - payload: dict[str, Any] = {"files": file} + payload: dict[str, Any] = {"files": file, "languages": languages} if pages is not None: payload["pages"] = pages if output is not None: @@ -3263,6 +3265,7 @@ async def ocr_pdf( self, file: PdfRestFile | Sequence[PdfRestFile], *, + languages: OcrLanguage | Sequence[OcrLanguage] = "English", pages: PdfPageSelection | None = None, output: str | None = None, extra_query: Query | None = None, @@ -3272,7 +3275,7 @@ async def ocr_pdf( ) -> PdfRestFileBasedResponse: """Perform OCR on a PDF to make text searchable and extractable.""" - payload: dict[str, Any] = {"files": file} + payload: dict[str, Any] = {"files": file, "languages": languages} if pages is not None: payload["pages"] = pages if output is not None: diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 59e576d6..e22334b1 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -22,6 +22,7 @@ from pdfrest.types.public import PdfRedactionPreset from ..types import ( + OcrLanguage, PdfAType, PdfInfoQuery, PdfXType, @@ -329,6 +330,18 @@ class OcrPdfPayload(BaseModel): ), PlainSerializer(_serialize_as_first_file_id), ] + languages: Annotated[ + list[OcrLanguage], + Field( + serialization_alias="languages", + validation_alias=AliasChoices("languages", "language"), + min_length=1, + default_factory=lambda: ["English"], + ), + BeforeValidator(_ensure_list), + BeforeValidator(_split_comma_list), + PlainSerializer(_serialize_as_comma_separated_string), + ] = ["English"] pages: Annotated[ list[AscendingPageRange] | None, Field(serialization_alias="pages", min_length=1, default=None), diff --git a/src/pdfrest/types/__init__.py b/src/pdfrest/types/__init__.py index b7c1ae7e..48f78b03 100644 --- a/src/pdfrest/types/__init__.py +++ b/src/pdfrest/types/__init__.py @@ -1,6 +1,7 @@ """Public import surface for shared pdfrest types.""" from .public import ( + ALL_OCR_LANGUAGES, ALL_PDF_INFO_QUERIES, BmpColorModel, CompressionLevel, @@ -9,6 +10,7 @@ GifColorModel, GraphicSmoothing, JpegColorModel, + OcrLanguage, PdfAType, PdfInfoQuery, PdfMergeInput, @@ -28,6 +30,7 @@ ) __all__ = [ + "ALL_OCR_LANGUAGES", "ALL_PDF_INFO_QUERIES", "BmpColorModel", "CompressionLevel", @@ -36,6 +39,7 @@ "GifColorModel", "GraphicSmoothing", "JpegColorModel", + "OcrLanguage", "PdfAType", "PdfInfoQuery", "PdfMergeInput", diff --git a/src/pdfrest/types/public.py b/src/pdfrest/types/public.py index 0d06c671..6472f2e7 100644 --- a/src/pdfrest/types/public.py +++ b/src/pdfrest/types/public.py @@ -13,6 +13,7 @@ PdfRestFile = Any __all__ = ( + "ALL_OCR_LANGUAGES", "ALL_PDF_INFO_QUERIES", "BmpColorModel", "CompressionLevel", @@ -21,6 +22,7 @@ "GifColorModel", "GraphicSmoothing", "JpegColorModel", + "OcrLanguage", "PdfAType", "PdfInfoQuery", "PdfMergeInput", @@ -140,3 +142,21 @@ class PdfMergeSource(TypedDict, total=False): SummaryOutputType = Literal["json", "file"] TranslateOutputFormat = Literal["plaintext", "markdown"] + +OcrLanguage = Literal[ + "ChineseSimplified", + "ChineseTraditional", + "Dutch", + "English", + "French", + "German", + "Italian", + "Japanese", + "Korean", + "Portuguese", + "Spanish", +] + +ALL_OCR_LANGUAGES: tuple[OcrLanguage, ...] = cast( + tuple[OcrLanguage, ...], get_args(OcrLanguage) +) diff --git a/tests/live/test_live_ocr_pdf.py b/tests/live/test_live_ocr_pdf.py index 5109bad5..5e9ede14 100644 --- a/tests/live/test_live_ocr_pdf.py +++ b/tests/live/test_live_ocr_pdf.py @@ -18,7 +18,7 @@ def test_live_ocr_pdf_success( base_url=pdfrest_live_base_url, ) as client: uploaded = client.files.create_from_paths([resource])[0] - response = client.ocr_pdf(uploaded) + response = client.ocr_pdf(uploaded, languages=["English", "German"]) assert isinstance(response, PdfRestFileBasedResponse) assert response.output_files diff --git a/tests/test_ocr_pdf.py b/tests/test_ocr_pdf.py index b5059e30..625f92f4 100644 --- a/tests/test_ocr_pdf.py +++ b/tests/test_ocr_pdf.py @@ -38,11 +38,36 @@ def test_ocr_payload_invalid_page_range() -> None: OcrPdfPayload.model_validate({"files": [file_repr], "pages": ["5-2"]}) +def test_ocr_payload_languages() -> None: + file_repr = make_pdf_file(PdfRestFileID.generate(1)) + payload = OcrPdfPayload.model_validate( + {"files": [file_repr], "languages": ["English", "German"]} + ) + assert payload.languages == ["English", "German"] + assert ( + payload.model_dump( + mode="json", by_alias=True, exclude_none=True, exclude_unset=True + )["languages"] + == "English,German" + ) + + +def test_ocr_payload_invalid_language() -> None: + file_repr = make_pdf_file(PdfRestFileID.generate(1)) + with pytest.raises(ValidationError, match="ChineseSimplified"): + OcrPdfPayload.model_validate({"files": [file_repr], "languages": ["Klingon"]}) + + def test_ocr_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(1)) payload_dump = OcrPdfPayload.model_validate( - {"files": [input_file], "pages": ["1-3"], "output": "ocr"} + { + "files": [input_file], + "pages": ["1-3"], + "output": "ocr", + "languages": ["English"], + } ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) output_id = str(PdfRestFileID.generate()) @@ -91,9 +116,9 @@ def test_ocr_pdf_request_customization( ) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(1)) - payload_dump = OcrPdfPayload.model_validate({"files": [input_file]}).model_dump( - mode="json", by_alias=True, exclude_none=True, exclude_unset=True - ) + payload_dump = OcrPdfPayload.model_validate( + {"files": [input_file], "languages": ["English"]} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) output_id = str(PdfRestFileID.generate()) captured_timeout: dict[str, float | dict[str, float] | None] = {} @@ -152,9 +177,9 @@ async def test_async_ocr_pdf_success( ) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(2)) - payload_dump = OcrPdfPayload.model_validate({"files": [input_file]}).model_dump( - mode="json", by_alias=True, exclude_none=True, exclude_unset=True - ) + payload_dump = OcrPdfPayload.model_validate( + {"files": [input_file], "languages": ["English"]} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) output_id = str(PdfRestFileID.generate()) seen: dict[str, int] = {"post": 0, "get": 0} From ed9ee65a0ec6064b13e805578dff4ba14029c764 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Mon, 12 Jan 2026 10:56:51 -0600 Subject: [PATCH 59/61] Add Flatten Layers methods Assisted-by: Codex --- src/pdfrest/client.py | 53 +++++ src/pdfrest/models/_internal.py | 24 +++ tests/live/test_live_flatten_layers.py | 112 ++++++++++ tests/test_flatten_layers.py | 269 +++++++++++++++++++++++++ 4 files changed, 458 insertions(+) create mode 100644 tests/live/test_live_flatten_layers.py create mode 100644 tests/test_flatten_layers.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index bc640278..758fa8fb 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -84,6 +84,7 @@ PdfCompressPayload, PdfFlattenAnnotationsPayload, PdfFlattenFormsPayload, + PdfFlattenLayersPayload, PdfFlattenTransparenciesPayload, PdfInfoPayload, PdfLinearizePayload, @@ -2796,6 +2797,32 @@ def flatten_annotations( timeout=timeout, ) + def flatten_layers( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Flatten all layers in a PDF into a single layer.""" + + payload: dict[str, Any] = {"files": file} + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/flattened-layers-pdf", + payload=payload, + payload_model=PdfFlattenLayersPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def rasterize_pdf( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -3838,6 +3865,32 @@ async def flatten_annotations( timeout=timeout, ) + async def flatten_layers( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously flatten all layers in a PDF.""" + + payload: dict[str, Any] = {"files": file} + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/flattened-layers-pdf", + payload=payload, + payload_model=PdfFlattenLayersPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def rasterize_pdf( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index e22334b1..bb1b931b 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -1101,6 +1101,30 @@ class PdfFlattenAnnotationsPayload(BaseModel): ] = None +class PdfFlattenLayersPayload(BaseModel): + """Adapt caller options into a pdfRest-ready flatten-layers request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + class BmpPdfRestPayload(BasePdfRestGraphicPayload[Literal["rgb", "gray"]]): """Adapt caller options into a pdfRest-ready BMP request payload.""" diff --git a/tests/live/test_live_flatten_layers.py b/tests/live/test_live_flatten_layers.py new file mode 100644 index 00000000..7343cab7 --- /dev/null +++ b/tests/live/test_live_flatten_layers.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_pdf_for_layers( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + "output_name", + [ + pytest.param(None, id="default-output"), + pytest.param("flatten-layers", id="custom-output"), + ], +) +def test_live_flatten_layers_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_layers: PdfRestFile, + output_name: str | None, +) -> None: + kwargs: dict[str, str] = {} + if output_name is not None: + kwargs["output"] = output_name + + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.flatten_layers(uploaded_pdf_for_layers, **kwargs) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) == str(uploaded_pdf_for_layers.id) + if output_name is not None: + assert output_file.name.startswith(output_name) + else: + assert output_file.name.endswith(".pdf") + + +@pytest.mark.asyncio +async def test_live_async_flatten_layers_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_layers: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.flatten_layers(uploaded_pdf_for_layers, output="async") + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async") + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) == str(uploaded_pdf_for_layers.id) + + +def test_live_flatten_layers_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_layers: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"), + ): + client.flatten_layers( + uploaded_pdf_for_layers, + extra_body={"id": "00000000-0000-0000-0000-000000000000"}, + ) + + +@pytest.mark.asyncio +async def test_live_async_flatten_layers_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_layers: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"): + await client.flatten_layers( + uploaded_pdf_for_layers, + extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, + ) diff --git a/tests/test_flatten_layers.py b/tests/test_flatten_layers.py new file mode 100644 index 00000000..963a4482 --- /dev/null +++ b/tests/test_flatten_layers.py @@ -0,0 +1,269 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import PdfFlattenLayersPayload + +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) + + +def test_flatten_layers_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfFlattenLayersPayload.model_validate( + {"files": [input_file], "output": "flattened-layers"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/flattened-layers-pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "flattened-layers.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.flatten_layers(input_file, output="flattened-layers") + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + output_file = response.output_file + assert output_file.name == "flattened-layers.pdf" + assert output_file.type == "application/pdf" + assert response.warning is None + assert str(response.input_id) == str(input_file.id) + + +def test_flatten_layers_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/flattened-layers-pdf": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["id"] == str(input_file.id) + assert payload["output"] == "custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.flatten_layers( + input_file, + output="custom", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": "yes"}, + timeout=0.29, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.29) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.29) + + +@pytest.mark.asyncio +async def test_async_flatten_layers_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfFlattenLayersPayload.model_validate( + {"files": [input_file]} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/flattened-layers-pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.flatten_layers(input_file) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_flatten_layers_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/flattened-layers-pdf": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["id"] == str(input_file.id) + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.flatten_layers( + input_file, + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": "yes"}, + timeout=0.52, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.52) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.52) + + +def test_flatten_layers_validation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + png_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="Must be a PDF file"), + ): + client.flatten_layers(png_file) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ), + ): + client.flatten_layers([pdf_file, make_pdf_file(PdfRestFileID.generate())]) From 83656f8593b58062b801fb09ae2fbd9aff12b6ba Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Mon, 12 Jan 2026 11:45:43 -0600 Subject: [PATCH 60/61] Add Convert Colors in PDF methods Assisted-by: Codex --- src/pdfrest/client.py | 72 ++++++ src/pdfrest/models/_internal.py | 63 +++++ src/pdfrest/types/__init__.py | 2 + src/pdfrest/types/public.py | 18 ++ tests/live/test_live_convert_colors.py | 118 +++++++++ tests/test_convert_colors.py | 334 +++++++++++++++++++++++++ 6 files changed, 607 insertions(+) create mode 100644 tests/live/test_live_convert_colors.py create mode 100644 tests/test_convert_colors.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 758fa8fb..c89a9f3f 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -82,6 +82,7 @@ JpegPdfRestPayload, OcrPdfPayload, PdfCompressPayload, + PdfConvertColorsPayload, PdfFlattenAnnotationsPayload, PdfFlattenFormsPayload, PdfFlattenLayersPayload, @@ -117,6 +118,7 @@ JpegColorModel, OcrLanguage, PdfAType, + PdfColorProfile, PdfInfoQuery, PdfMergeInput, PdfPageSelection, @@ -2718,6 +2720,41 @@ def compress_pdf( timeout=timeout, ) + def convert_colors( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + color_profile: PdfColorProfile, + profile: PdfRestFile | Sequence[PdfRestFile] | None = None, + preserve_black: bool = False, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Convert PDF colors using preset or custom ICC profiles.""" + + payload: dict[str, Any] = { + "files": file, + "color_profile": color_profile, + "preserve_black": preserve_black, + } + if profile is not None: + payload["profile"] = profile + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/pdf-with-converted-colors", + payload=payload, + payload_model=PdfConvertColorsPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def flatten_transparencies( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -3786,6 +3823,41 @@ async def compress_pdf( timeout=timeout, ) + async def convert_colors( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + color_profile: PdfColorProfile, + profile: PdfRestFile | Sequence[PdfRestFile] | None = None, + preserve_black: bool = False, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously convert PDF colors using preset or custom ICC profiles.""" + + payload: dict[str, Any] = { + "files": file, + "color_profile": color_profile, + "preserve_black": preserve_black, + } + if profile is not None: + payload["profile"] = profile + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/pdf-with-converted-colors", + payload=payload, + payload_model=PdfConvertColorsPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def flatten_transparencies( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index bb1b931b..84facca4 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -24,6 +24,7 @@ from ..types import ( OcrLanguage, PdfAType, + PdfColorProfile, PdfInfoQuery, PdfXType, SummaryFormat, @@ -1125,6 +1126,68 @@ class PdfFlattenLayersPayload(BaseModel): ] = None +class PdfConvertColorsPayload(BaseModel): + """Adapt caller options into a pdfRest-ready convert-colors request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + color_profile: Annotated[ + PdfColorProfile, + Field(serialization_alias="color_profile"), + ] + profile: Annotated[ + list[PdfRestFile] | None, + Field( + default=None, + min_length=1, + max_length=1, + validation_alias=AliasChoices("profile", "profiles"), + serialization_alias="profile_id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types( + "application/vnd.iccprofile", + "application/octet-stream", + error_msg="Profile must be an ICC file", + ) + ), + PlainSerializer(_serialize_as_first_file_id), + ] = None + preserve_black: Annotated[ + bool | None, + Field(serialization_alias="preserve_black", default=None), + ] = None + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + @model_validator(mode="after") + def _validate_profile_dependency(self) -> PdfConvertColorsPayload: + if self.color_profile == "custom": + if not self.profile: + msg = "color_profile 'custom' requires a profile to be provided." + raise ValueError(msg) + elif self.profile: + msg = "A profile can only be provided when color_profile is 'custom'." + raise ValueError(msg) + return self + + class BmpPdfRestPayload(BasePdfRestGraphicPayload[Literal["rgb", "gray"]]): """Adapt caller options into a pdfRest-ready BMP request payload.""" diff --git a/src/pdfrest/types/__init__.py b/src/pdfrest/types/__init__.py index 48f78b03..4e890fe9 100644 --- a/src/pdfrest/types/__init__.py +++ b/src/pdfrest/types/__init__.py @@ -12,6 +12,7 @@ JpegColorModel, OcrLanguage, PdfAType, + PdfColorProfile, PdfInfoQuery, PdfMergeInput, PdfMergeSource, @@ -41,6 +42,7 @@ "JpegColorModel", "OcrLanguage", "PdfAType", + "PdfColorProfile", "PdfInfoQuery", "PdfMergeInput", "PdfMergeSource", diff --git a/src/pdfrest/types/public.py b/src/pdfrest/types/public.py index 6472f2e7..6f83633a 100644 --- a/src/pdfrest/types/public.py +++ b/src/pdfrest/types/public.py @@ -24,6 +24,7 @@ "JpegColorModel", "OcrLanguage", "PdfAType", + "PdfColorProfile", "PdfInfoQuery", "PdfMergeInput", "PdfMergeSource", @@ -160,3 +161,20 @@ class PdfMergeSource(TypedDict, total=False): ALL_OCR_LANGUAGES: tuple[OcrLanguage, ...] = cast( tuple[OcrLanguage, ...], get_args(OcrLanguage) ) +PdfColorProfile = Literal[ + "lab-d50", + "srgb", + "apple-rgb", + "color-match-rgb", + "gamma-18", + "gamma-22", + "dot-gain-10", + "dot-gain-15", + "dot-gain-20", + "dot-gain-25", + "dot-gain-30", + "monitor-rgb", + "acrobat5-cmyk", + "acrobat9-cmyk", + "custom", +] diff --git a/tests/live/test_live_convert_colors.py b/tests/live/test_live_convert_colors.py new file mode 100644 index 00000000..20a1f480 --- /dev/null +++ b/tests/live/test_live_convert_colors.py @@ -0,0 +1,118 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_pdf_for_color_conversion( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("report.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + "output_name", + [ + pytest.param(None, id="default-output"), + pytest.param("converted-colors", id="custom-output"), + ], +) +def test_live_convert_colors_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_color_conversion: PdfRestFile, + output_name: str | None, +) -> None: + kwargs: dict[str, str | bool] = {"color_profile": "srgb"} + if output_name is not None: + kwargs["output"] = output_name + + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.convert_colors(uploaded_pdf_for_color_conversion, **kwargs) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) == str(uploaded_pdf_for_color_conversion.id) + if output_name is not None: + assert output_file.name.startswith(output_name) + else: + assert output_file.name.endswith(".pdf") + + +@pytest.mark.asyncio +async def test_live_async_convert_colors_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_color_conversion: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.convert_colors( + uploaded_pdf_for_color_conversion, + color_profile="srgb", + output="async", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async") + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) == str(uploaded_pdf_for_color_conversion.id) + + +def test_live_convert_colors_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_color_conversion: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"), + ): + client.convert_colors( + uploaded_pdf_for_color_conversion, + color_profile="srgb", + extra_body={"id": "00000000-0000-0000-0000-000000000000"}, + ) + + +@pytest.mark.asyncio +async def test_live_async_convert_colors_invalid_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_for_color_conversion: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError, match=r"(?i)(id|file)"): + await client.convert_colors( + uploaded_pdf_for_color_conversion, + color_profile="srgb", + extra_body={"id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, + ) diff --git a/tests/test_convert_colors.py b/tests/test_convert_colors.py new file mode 100644 index 00000000..4dec0dff --- /dev/null +++ b/tests/test_convert_colors.py @@ -0,0 +1,334 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import PdfConvertColorsPayload + +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) + + +def _make_icc_file() -> PdfRestFile: + return PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "profile.icc", + "application/vnd.iccprofile", + ) + ) + + +def test_convert_colors_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfConvertColorsPayload.model_validate( + {"files": [input_file], "color_profile": "srgb", "output": "converted"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/pdf-with-converted-colors" + ): + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "converted.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_colors( + input_file, color_profile="srgb", output="converted" + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + output_file = response.output_file + assert output_file.name == "converted.pdf" + assert output_file.type == "application/pdf" + assert response.warning is None + assert str(response.input_id) == str(input_file.id) + + +def test_convert_colors_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + profile_file = _make_icc_file() + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/pdf-with-converted-colors" + ): + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["color_profile"] == "custom" + assert payload["profile_id"] == str(profile_file.id) + assert payload["preserve_black"] is True + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_colors( + input_file, + color_profile="custom", + profile=profile_file, + preserve_black=True, + output="custom", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": "yes"}, + timeout=0.29, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.29) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.29) + + +@pytest.mark.asyncio +async def test_async_convert_colors_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfConvertColorsPayload.model_validate( + {"files": [input_file], "color_profile": "srgb"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/pdf-with-converted-colors" + ): + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_colors(input_file, color_profile="srgb") + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_convert_colors_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + profile_file = _make_icc_file() + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/pdf-with-converted-colors" + ): + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] == "yes" + assert payload["color_profile"] == "custom" + assert payload["profile_id"] == str(profile_file.id) + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_colors( + input_file, + color_profile="custom", + profile=profile_file, + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": "yes"}, + timeout=0.52, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.52) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.52) + + +def test_convert_colors_validation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + png_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + wrong_profile_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "profile.txt", + "text/plain", + ) + ) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="Must be a PDF file"), + ): + client.convert_colors(png_file, color_profile="srgb") + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ), + ): + client.convert_colors( + [pdf_file, make_pdf_file(PdfRestFileID.generate())], + color_profile="srgb", + ) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValueError, match="requires a profile"), + ): + client.convert_colors(pdf_file, color_profile="custom") + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValueError, match="only be provided when color_profile"), + ): + client.convert_colors(pdf_file, color_profile="srgb", profile=_make_icc_file()) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="Profile must be an ICC file"), + ): + client.convert_colors( + pdf_file, + color_profile="custom", + profile=wrong_profile_file, + ) From 18562cabeb0617048e8f5a03533167bf8eeaa13c Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Mon, 12 Jan 2026 14:04:16 -0600 Subject: [PATCH 61/61] Add Blank PDF methods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add `blank_pdf()` sync and async methods - Ensure compatibility with Blank PDF response: - Allow PdfRestRawFileResponse.input_id to default empty so missing inputId doesn’t fail validation - When normalizing file responses, fall back to raw ids (outputId) when inputId is absent for blank-pdf - Document blank-pdf handling to keep response construction working without server-provided input ids --- src/pdfrest/client.py | 87 ++++++++- src/pdfrest/models/_internal.py | 59 +++++- src/pdfrest/types/__init__.py | 4 + src/pdfrest/types/public.py | 5 + tests/live/test_live_blank_pdf.py | 103 ++++++++++ tests/test_blank_pdf.py | 311 ++++++++++++++++++++++++++++++ 6 files changed, 566 insertions(+), 3 deletions(-) create mode 100644 tests/live/test_live_blank_pdf.py create mode 100644 tests/test_blank_pdf.py diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index c89a9f3f..5ed0ed81 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -81,6 +81,7 @@ GifPdfRestPayload, JpegPdfRestPayload, OcrPdfPayload, + PdfBlankPayload, PdfCompressPayload, PdfConvertColorsPayload, PdfFlattenAnnotationsPayload, @@ -121,7 +122,9 @@ PdfColorProfile, PdfInfoQuery, PdfMergeInput, + PdfPageOrientation, PdfPageSelection, + PdfPageSize, PdfRedactionInstruction, PdfRGBColor, PdfXType, @@ -1032,8 +1035,9 @@ def _post_file_operation( for file_id in output_ids ] + input_ids = raw_response.input_id or (raw_response.ids or []) response_payload: dict[str, Any] = { - "input_id": [str(file_id) for file_id in raw_response.input_id], + "input_id": [str(file_id) for file_id in input_ids], "output_file": [ file.model_dump(mode="json", by_alias=True) for file in output_files ], @@ -1307,8 +1311,9 @@ async def throttled_fetch_file_info(file_id: str) -> PdfRestFile: ) ) + input_ids = raw_response.input_id or (raw_response.ids or []) response_payload: dict[str, Any] = { - "input_id": [str(file_id) for file_id in raw_response.input_id], + "input_id": [str(file_id) for file_id in input_ids], "output_file": [ file.model_dump(mode="json", by_alias=True) for file in output_files ], @@ -2755,6 +2760,45 @@ def convert_colors( timeout=timeout, ) + def blank_pdf( + self, + *, + page_size: PdfPageSize, + page_count: int, + page_orientation: PdfPageOrientation | None = None, + custom_height: float | None = None, + custom_width: float | None = None, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Create a blank PDF with the specified size, count, and orientation.""" + + payload: dict[str, Any] = { + "page_size": page_size, + "page_count": page_count, + } + if page_orientation is not None: + payload["page_orientation"] = page_orientation + if custom_height is not None: + payload["custom_height"] = custom_height + if custom_width is not None: + payload["custom_width"] = custom_width + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/blank-pdf", + payload=payload, + payload_model=PdfBlankPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def flatten_transparencies( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -3858,6 +3902,45 @@ async def convert_colors( timeout=timeout, ) + async def blank_pdf( + self, + *, + page_size: PdfPageSize, + page_count: int, + page_orientation: PdfPageOrientation | None = None, + custom_height: float | None = None, + custom_width: float | None = None, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously create a blank PDF with the specified size.""" + + payload: dict[str, Any] = { + "page_size": page_size, + "page_count": page_count, + } + if page_orientation is not None: + payload["page_orientation"] = page_orientation + if custom_height is not None: + payload["custom_height"] = custom_height + if custom_width is not None: + payload["custom_width"] = custom_width + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/blank-pdf", + payload=payload, + payload_model=PdfBlankPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def flatten_transparencies( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 84facca4..6fe476ad 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -26,6 +26,8 @@ PdfAType, PdfColorProfile, PdfInfoQuery, + PdfPageOrientation, + PdfPageSize, PdfXType, SummaryFormat, SummaryOutputFormat, @@ -1126,6 +1128,57 @@ class PdfFlattenLayersPayload(BaseModel): ] = None +class PdfBlankPayload(BaseModel): + """Adapt caller options into a pdfRest-ready blank PDF request payload.""" + + page_size: Annotated[ + PdfPageSize, + Field(serialization_alias="page_size"), + ] + page_count: Annotated[ + int, + Field(serialization_alias="page_count", ge=1, le=1000), + ] + page_orientation: Annotated[ + PdfPageOrientation | None, + Field(serialization_alias="page_orientation", default=None), + ] = None + custom_height: Annotated[ + float | None, + Field(serialization_alias="custom_height", gt=0, default=None), + ] = None + custom_width: Annotated[ + float | None, + Field(serialization_alias="custom_width", gt=0, default=None), + ] = None + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + @model_validator(mode="after") + def _validate_page_configuration(self) -> PdfBlankPayload: + is_custom = self.page_size == "custom" + has_custom_height = self.custom_height is not None + has_custom_width = self.custom_width is not None + if is_custom: + if not (has_custom_height and has_custom_width): + msg = "custom_height and custom_width are required when page_size is 'custom'." + raise ValueError(msg) + if self.page_orientation is not None: + msg = "page_orientation must be omitted when page_size is 'custom'." + raise ValueError(msg) + else: + if self.page_orientation is None: + msg = "page_orientation is required when page_size is not 'custom'." + raise ValueError(msg) + if has_custom_height or has_custom_width: + msg = "custom_height and custom_width can only be provided when page_size is 'custom'." + raise ValueError(msg) + return self + + class PdfConvertColorsPayload(BaseModel): """Adapt caller options into a pdfRest-ready convert-colors request payload.""" @@ -1239,7 +1292,11 @@ class PdfRestRawFileResponse(BaseModel): input_id: Annotated[ list[PdfRestFileID], - Field(alias="inputId", description="The id of the input file"), + Field( + alias="inputId", + description="The id of the input file", + default_factory=list, + ), BeforeValidator(_ensure_list), ] output_urls: Annotated[ diff --git a/src/pdfrest/types/__init__.py b/src/pdfrest/types/__init__.py index 4e890fe9..070cac75 100644 --- a/src/pdfrest/types/__init__.py +++ b/src/pdfrest/types/__init__.py @@ -16,7 +16,9 @@ PdfInfoQuery, PdfMergeInput, PdfMergeSource, + PdfPageOrientation, PdfPageSelection, + PdfPageSize, PdfRedactionInstruction, PdfRedactionPreset, PdfRedactionType, @@ -46,7 +48,9 @@ "PdfInfoQuery", "PdfMergeInput", "PdfMergeSource", + "PdfPageOrientation", "PdfPageSelection", + "PdfPageSize", "PdfRGBColor", "PdfRedactionInstruction", "PdfRedactionPreset", diff --git a/src/pdfrest/types/public.py b/src/pdfrest/types/public.py index 6f83633a..c9ddc8a1 100644 --- a/src/pdfrest/types/public.py +++ b/src/pdfrest/types/public.py @@ -28,7 +28,9 @@ "PdfInfoQuery", "PdfMergeInput", "PdfMergeSource", + "PdfPageOrientation", "PdfPageSelection", + "PdfPageSize", "PdfRGBColor", "PdfRedactionInstruction", "PdfRedactionPreset", @@ -178,3 +180,6 @@ class PdfMergeSource(TypedDict, total=False): "acrobat9-cmyk", "custom", ] + +PdfPageSize = Literal["letter", "legal", "ledger", "A3", "A4", "A5", "custom"] +PdfPageOrientation = Literal["portrait", "landscape"] diff --git a/tests/live/test_live_blank_pdf.py b/tests/live/test_live_blank_pdf.py new file mode 100644 index 00000000..07f281e1 --- /dev/null +++ b/tests/live/test_live_blank_pdf.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient + + +@pytest.mark.parametrize( + "output_name", + [ + pytest.param(None, id="default-output"), + pytest.param("blank-doc", id="custom-output"), + ], +) +def test_live_blank_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + output_name: str | None, +) -> None: + kwargs: dict[str, str | int] = { + "page_size": "letter", + "page_count": 1, + "page_orientation": "portrait", + } + if output_name is not None: + kwargs["output"] = output_name + + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.blank_pdf(**kwargs) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + if output_name is not None: + assert output_file.name.startswith(output_name) + else: + assert output_file.name.endswith(".pdf") + + +@pytest.mark.asyncio +async def test_live_async_blank_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.blank_pdf( + page_size="A4", + page_count=2, + page_orientation="landscape", + output="async-blank", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async-blank") + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + + +def test_live_blank_pdf_invalid_request( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError, match=r"(?i)(page|size)"), + ): + client.blank_pdf( + page_size="letter", + page_count=1, + page_orientation="portrait", + extra_body={"page_size": "not-a-size"}, + ) + + +@pytest.mark.asyncio +async def test_live_async_blank_pdf_invalid_request( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError, match=r"(?i)(page|size)"): + await client.blank_pdf( + page_size="letter", + page_count=1, + page_orientation="portrait", + extra_body={"page_size": "bad-size"}, + ) diff --git a/tests/test_blank_pdf.py b/tests/test_blank_pdf.py new file mode 100644 index 00000000..b38ee0e0 --- /dev/null +++ b/tests/test_blank_pdf.py @@ -0,0 +1,311 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import PdfBlankPayload + +from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, build_file_info_payload + + +def test_blank_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfBlankPayload.model_validate( + { + "page_size": "letter", + "page_count": 2, + "page_orientation": "portrait", + "output": "blank", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/blank-pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "blank.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.blank_pdf( + page_size="letter", + page_count=2, + page_orientation="portrait", + output="blank", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + output_file = response.output_file + assert output_file.name == "blank.pdf" + assert output_file.type == "application/pdf" + assert response.warning is None + assert str(response.input_id) == output_id + + +def test_blank_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/blank-pdf": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["page_size"] == "custom" + assert payload["custom_height"] == 792 + assert payload["custom_width"] == 612 + assert "page_orientation" not in payload + assert payload["debug"] == "yes" + assert payload["output"] == "custom" + return httpx.Response( + 200, + json={ + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.blank_pdf( + page_size="custom", + page_count=3, + custom_height=792, + custom_width=612, + output="custom", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": "yes"}, + timeout=0.29, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.29) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.29) + + +@pytest.mark.asyncio +async def test_async_blank_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfBlankPayload.model_validate( + { + "page_size": "A4", + "page_count": 1, + "page_orientation": "landscape", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/blank-pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.blank_pdf( + page_size="A4", + page_count=1, + page_orientation="landscape", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == output_id + + +@pytest.mark.asyncio +async def test_async_blank_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/blank-pdf": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["page_size"] == "custom" + assert payload["custom_height"] == 100 + assert payload["custom_width"] == 50 + assert "page_orientation" not in payload + assert payload["debug"] == "yes" + return httpx.Response( + 200, + json={ + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.blank_pdf( + page_size="custom", + page_count=1, + custom_height=100, + custom_width=50, + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": "yes"}, + timeout=0.52, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.52) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.52) + + +def test_blank_pdf_validation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValueError, match="page_orientation is required"), + ): + client.blank_pdf(page_size="letter", page_count=1) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValueError, match="custom_height and custom_width are required"), + ): + client.blank_pdf(page_size="custom", page_count=1, custom_height=50) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValueError, match="custom_height and custom_width can only be provided" + ), + ): + client.blank_pdf( + page_size="A3", + page_count=1, + page_orientation="portrait", + custom_width=10, + ) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValueError, match="page_orientation must be omitted"), + ): + client.blank_pdf( + page_size="custom", + page_count=1, + page_orientation="portrait", + custom_width=10, + custom_height=10, + ) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, match="Input should be less than or equal to 1000" + ), + ): + client.blank_pdf( + page_size="A4", + page_count=1001, + page_orientation="portrait", + )