From cf978bec73785485575f2c3c60c2c8247d16baf0 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 14 Jan 2026 15:34:08 -0600 Subject: [PATCH 1/8] Add Import Form Data tool Assisted-by: Codex --- src/pdfrest/client.py | 55 +++++++++++++++++++++++++++++++++ src/pdfrest/models/_internal.py | 46 +++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 505f1288..c2460f66 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -99,6 +99,7 @@ PdfFlattenFormsPayload, PdfFlattenLayersPayload, PdfFlattenTransparenciesPayload, + PdfImportFormDataPayload, PdfInfoPayload, PdfLinearizePayload, PdfMergePayload, @@ -2846,6 +2847,33 @@ def convert_to_word( timeout=timeout, ) + def import_form_data( + self, + file: PdfRestFile | Sequence[PdfRestFile], + data_file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Import form data from a data file into an existing PDF with form fields.""" + + payload: dict[str, Any] = {"files": file, "data_file": data_file} + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/pdf-with-imported-form-data", + payload=payload, + payload_model=PdfImportFormDataPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def flatten_pdf_forms( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -4549,6 +4577,33 @@ async def convert_to_word( timeout=timeout, ) + async def import_form_data( + self, + file: PdfRestFile | Sequence[PdfRestFile], + data_file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously import form data from a data file into a PDF.""" + + payload: dict[str, Any] = {"files": file, "data_file": data_file} + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/pdf-with-imported-form-data", + payload=payload, + payload_model=PdfImportFormDataPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def flatten_pdf_forms( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 65f65197..55317bd7 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -1318,6 +1318,52 @@ class PdfFlattenFormsPayload(BaseModel): ] = None +class PdfImportFormDataPayload(BaseModel): + """Adapt caller options into a pdfRest-ready import-form-data request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + data_file: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("data_file", "data_files"), + serialization_alias="data_file_id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types( + "application/xml", + "text/xml", + "application/vnd.fdf", + "application/vnd.adobe.xfdf", + "application/vnd.adobe.xdp+xml", + "application/vnd.adobe.xfd+xml", + error_msg="Data file must be an XFDF, XDP, XFD, FDF, or XML file", + ) + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + class PdfCompressPayload(BaseModel): """Adapt caller options into a pdfRest-ready compress request payload.""" From 3a5c1815c3f229a3a152a065cb39a5a34ff40e52 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 14 Jan 2026 15:34:28 -0600 Subject: [PATCH 2/8] Test Import Form Data tool Assisted-by: Codex --- tests/live/test_live_import_form_data.py | 135 ++++++++++ tests/resources/form_data.xml | 14 + tests/test_import_form_data.py | 324 +++++++++++++++++++++++ 3 files changed, 473 insertions(+) create mode 100644 tests/live/test_live_import_form_data.py create mode 100644 tests/resources/form_data.xml create mode 100644 tests/test_import_form_data.py diff --git a/tests/live/test_live_import_form_data.py b/tests/live/test_live_import_form_data.py new file mode 100644 index 00000000..8a029edd --- /dev/null +++ b/tests/live/test_live_import_form_data.py @@ -0,0 +1,135 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_pdf_with_forms( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("form_with_data.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.fixture(scope="module") +def uploaded_form_data_file( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("form_data.xml") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + "output_name", + [ + pytest.param(None, id="default-output"), + pytest.param("imported-form", id="custom-output"), + ], +) +def test_live_import_form_data( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_with_forms: PdfRestFile, + uploaded_form_data_file: PdfRestFile, + output_name: str | None, +) -> None: + kwargs: dict[str, str] = {} + if output_name is not None: + kwargs["output"] = output_name + + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.import_form_data( + uploaded_pdf_with_forms, + uploaded_form_data_file, + **kwargs, + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_with_forms.id) + if output_name is not None: + assert output_file.name.startswith(output_name) + else: + assert output_file.name.endswith(".pdf") + + +@pytest.mark.asyncio +async def test_live_async_import_form_data_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_with_forms: PdfRestFile, + uploaded_form_data_file: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.import_form_data( + uploaded_pdf_with_forms, + uploaded_form_data_file, + output="async-imported", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async-imported") + assert output_file.type == "application/pdf" + assert str(response.input_id) == str(uploaded_pdf_with_forms.id) + + +def test_live_import_form_data_invalid_data_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_with_forms: PdfRestFile, + uploaded_form_data_file: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError, match=r"(?i)(data|id)"), + ): + client.import_form_data( + uploaded_pdf_with_forms, + uploaded_form_data_file, + extra_body={"data_file_id": "ffffffff-ffff-ffff-ffff-ffffffffffff"}, + ) + + +@pytest.mark.asyncio +async def test_live_async_import_form_data_invalid_data_file_id( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_with_forms: PdfRestFile, + uploaded_form_data_file: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError, match=r"(?i)(data|id)"): + await client.import_form_data( + uploaded_pdf_with_forms, + uploaded_form_data_file, + extra_body={"data_file_id": "00000000-0000-0000-0000-000000000000"}, + ) diff --git a/tests/resources/form_data.xml b/tests/resources/form_data.xml new file mode 100644 index 00000000..9aa268dd --- /dev/null +++ b/tests/resources/form_data.xml @@ -0,0 +1,14 @@ + + + + + Jamie + + + Appleseed + + + Green + + + diff --git a/tests/test_import_form_data.py b/tests/test_import_form_data.py new file mode 100644 index 00000000..35dfd027 --- /dev/null +++ b/tests/test_import_form_data.py @@ -0,0 +1,324 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import PdfImportFormDataPayload + +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) + + +def _make_data_file( + file_id: PdfRestFileID, *, mime_type: str = "application/xml" +) -> PdfRestFile: + return PdfRestFile.model_validate( + build_file_info_payload(file_id, "form-data.xml", mime_type) + ) + + +def test_import_form_data_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + data_file = _make_data_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfImportFormDataPayload.model_validate( + {"files": [input_file], "data_file": [data_file], "output": "filled-form"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/pdf-with-imported-form-data" + ): + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "filled-form.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.import_form_data( + input_file, + data_file, + output="filled-form", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "filled-form.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + assert response.warning is None + + +def test_import_form_data_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + data_file = _make_data_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/pdf-with-imported-form-data" + ): + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["id"] == str(input_file.id) + assert payload["data_file_id"] == str(data_file.id) + assert payload["output"] == "custom-output" + assert payload["debug"] == "flag" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.import_form_data( + input_file, + data_file, + output="custom-output", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": "flag"}, + timeout=0.41, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.41) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.41) + + +@pytest.mark.asyncio +async def test_async_import_form_data_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + data_file = _make_data_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfImportFormDataPayload.model_validate( + {"files": [input_file], "data_file": [data_file]} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/pdf-with-imported-form-data" + ): + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.import_form_data(input_file, data_file) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_import_form_data_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + data_file = _make_data_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if ( + request.method == "POST" + and request.url.path == "/pdf-with-imported-form-data" + ): + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["id"] == str(input_file.id) + assert payload["data_file_id"] == str(data_file.id) + assert payload["note"] == "details" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-custom.pdf", + "application/pdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.import_form_data( + input_file, + data_file, + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"note": "details"}, + timeout=0.73, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.73) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.73) + + +def test_import_form_data_validation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + data_file = _make_data_file(PdfRestFileID.generate(2)) + png_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="Must be a PDF file"), + ): + client.import_form_data(png_file, data_file) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, + match="Data file must be an XFDF, XDP, XFD, FDF, or XML file", + ), + ): + client.import_form_data(pdf_file, png_file) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ), + ): + client.import_form_data( + [pdf_file, make_pdf_file(PdfRestFileID.generate())], + data_file, + ) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ), + ): + client.import_form_data( + pdf_file, + [data_file, _make_data_file(PdfRestFileID.generate())], + ) From 4f88ee089aedc0203a93418c621b006993f4a863 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 14 Jan 2026 15:58:43 -0600 Subject: [PATCH 3/8] Add Export Form Data tool Assisted-by: Codex --- src/pdfrest/client.py | 62 +++++++++++++++++++++++++++++++++ src/pdfrest/models/_internal.py | 29 +++++++++++++++ src/pdfrest/types/__init__.py | 2 ++ src/pdfrest/types/public.py | 2 ++ 4 files changed, 95 insertions(+) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index c2460f66..a00c33a3 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -95,6 +95,7 @@ PdfCompressPayload, PdfDecryptPayload, PdfEncryptPayload, + PdfExportFormDataPayload, PdfFlattenAnnotationsPayload, PdfFlattenFormsPayload, PdfFlattenLayersPayload, @@ -128,6 +129,7 @@ ALL_PDF_INFO_QUERIES, BmpColorModel, CompressionLevel, + ExportDataFormat, ExtractTextGranularity, FlattenQuality, GifColorModel, @@ -2874,6 +2876,36 @@ def import_form_data( timeout=timeout, ) + def export_form_data( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + data_format: ExportDataFormat, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Export form data from a PDF with form fields to an external data file.""" + + payload: dict[str, Any] = { + "files": file, + "data_format": data_format, + } + if output is not None: + payload["output"] = output + + return self._post_file_operation( + endpoint="/exported-form-data", + payload=payload, + payload_model=PdfExportFormDataPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def flatten_pdf_forms( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -4604,6 +4636,36 @@ async def import_form_data( timeout=timeout, ) + async def export_form_data( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + data_format: ExportDataFormat, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously export form data from a PDF into a data file.""" + + payload: dict[str, Any] = { + "files": file, + "data_format": data_format, + } + if output is not None: + payload["output"] = output + + return await self._post_file_operation( + endpoint="/exported-form-data", + payload=payload, + payload_model=PdfExportFormDataPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def flatten_pdf_forms( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 55317bd7..78d229bd 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -26,6 +26,7 @@ HtmlPageOrientation, HtmlPageSize, HtmlWebLayout, + ExportDataFormat, OcrLanguage, PdfAType, PdfConversionCompression, @@ -1364,6 +1365,34 @@ class PdfImportFormDataPayload(BaseModel): ] = None +class PdfExportFormDataPayload(BaseModel): + """Adapt caller options into a pdfRest-ready export-form-data request payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types("application/pdf", error_msg="Must be a PDF file") + ), + PlainSerializer(_serialize_as_first_file_id), + ] + data_format: Annotated[ + ExportDataFormat, + Field(serialization_alias="data_format"), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + class PdfCompressPayload(BaseModel): """Adapt caller options into a pdfRest-ready compress request payload.""" diff --git a/src/pdfrest/types/__init__.py b/src/pdfrest/types/__init__.py index b7db11ae..61360937 100644 --- a/src/pdfrest/types/__init__.py +++ b/src/pdfrest/types/__init__.py @@ -6,6 +6,7 @@ ALL_PDF_RESTRICTIONS, BmpColorModel, CompressionLevel, + ExportDataFormat, ExtractTextGranularity, FlattenQuality, GifColorModel, @@ -48,6 +49,7 @@ "ALL_PDF_RESTRICTIONS", "BmpColorModel", "CompressionLevel", + "ExportDataFormat", "ExtractTextGranularity", "FlattenQuality", "GifColorModel", diff --git a/src/pdfrest/types/public.py b/src/pdfrest/types/public.py index 20c73d97..9796b484 100644 --- a/src/pdfrest/types/public.py +++ b/src/pdfrest/types/public.py @@ -18,6 +18,7 @@ "ALL_PDF_RESTRICTIONS", "BmpColorModel", "CompressionLevel", + "ExportDataFormat", "ExtractTextGranularity", "FlattenQuality", "GifColorModel", @@ -167,6 +168,7 @@ class PdfMergeSource(TypedDict, total=False): JpegColorModel = Literal["rgb", "cmyk", "gray"] TiffColorModel = Literal["rgb", "rgba", "cmyk", "lab", "gray"] GraphicSmoothing = Literal["none", "all", "text", "line", "image"] +ExportDataFormat = Literal["fdf", "xfdf", "xml", "xdp", "xfd"] SummaryFormat = Literal[ "overview", From e8ae2f7b12f760c61f1b7c2314a298849981a2d5 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 14 Jan 2026 15:59:04 -0600 Subject: [PATCH 4/8] Test Export Form Data Assisted-by: Codex --- tests/live/test_live_export_form_data.py | 107 +++++++++ tests/test_export_form_data.py | 284 +++++++++++++++++++++++ 2 files changed, 391 insertions(+) create mode 100644 tests/live/test_live_export_form_data.py create mode 100644 tests/test_export_form_data.py diff --git a/tests/live/test_live_export_form_data.py b/tests/live/test_live_export_form_data.py new file mode 100644 index 00000000..b24c2fac --- /dev/null +++ b/tests/live/test_live_export_form_data.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_pdf_with_forms( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("form_with_data.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + "data_format", + [ + pytest.param("xml", id="xml"), + pytest.param("xfdf", id="xfdf"), + ], +) +def test_live_export_form_data( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_with_forms: PdfRestFile, + data_format: str, +) -> None: + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.export_form_data( + uploaded_pdf_with_forms, + data_format=data_format, # type: ignore[arg-type] + output=f"exported-{data_format}", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith(f"exported-{data_format}") + assert str(response.input_id) == str(uploaded_pdf_with_forms.id) + + +@pytest.mark.asyncio +async def test_live_async_export_form_data_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_with_forms: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.export_form_data( + uploaded_pdf_with_forms, + data_format="xml", + output="async-exported", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async-exported") + assert str(response.input_id) == str(uploaded_pdf_with_forms.id) + + +def test_live_export_form_data_invalid_format_for_pdf_type( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_with_forms: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError, match=r"(?i)data_format"), + ): + client.export_form_data( + uploaded_pdf_with_forms, + data_format="xdp", # type: ignore[arg-type] + ) + + +@pytest.mark.asyncio +async def test_live_async_export_form_data_invalid_format_for_pdf_type( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_pdf_with_forms: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError, match=r"(?i)data_format"): + await client.export_form_data( + uploaded_pdf_with_forms, + data_format="xdp", # type: ignore[arg-type] + ) diff --git a/tests/test_export_form_data.py b/tests/test_export_form_data.py new file mode 100644 index 00000000..ea52b8c4 --- /dev/null +++ b/tests/test_export_form_data.py @@ -0,0 +1,284 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import PdfExportFormDataPayload + +from .graphics_test_helpers import ( + ASYNC_API_KEY, + VALID_API_KEY, + build_file_info_payload, + make_pdf_file, +) + + +def test_export_form_data_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfExportFormDataPayload.model_validate( + {"files": [input_file], "data_format": "xml", "output": "exported-data"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/exported-form-data": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "exported-data.xml", + "application/xml", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.export_form_data( + input_file, + data_format="xml", + output="exported-data", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "exported-data.xml" + assert response.output_file.type == "application/xml" + assert str(response.input_id) == str(input_file.id) + assert response.warning is None + + +def test_export_form_data_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(1)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/exported-form-data": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["id"] == str(input_file.id) + assert payload["data_format"] == "fdf" + assert payload["output"] == "custom-data" + assert payload["flag"] == "yes" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "custom-data.fdf", + "application/vnd.fdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.export_form_data( + input_file, + data_format="fdf", + output="custom-data", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"flag": "yes"}, + timeout=0.37, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom-data.fdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.37) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.37) + + +@pytest.mark.asyncio +async def test_async_export_form_data_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + + payload_dump = PdfExportFormDataPayload.model_validate( + {"files": [input_file], "data_format": "xfdf"} + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/exported-form-data": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-data.xfdf", + "application/vnd.adobe.xfdf", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.export_form_data(input_file, data_format="xfdf") + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-data.xfdf" + assert response.output_file.type == "application/vnd.adobe.xfdf" + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_export_form_data_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_pdf_file(PdfRestFileID.generate(2)) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/exported-form-data": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["id"] == str(input_file.id) + assert payload["data_format"] == "xml" + assert payload["note"] == "details" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["format"] == "info" + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, + "async-custom.xml", + "application/xml", + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.export_form_data( + input_file, + data_format="xml", + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"note": "details"}, + timeout=0.62, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.xml" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.62) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.62) + + +def test_export_form_data_validation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + png_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="Must be a PDF file"), + ): + client.export_form_data(png_file, data_format="xml") + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises(ValidationError, match="Input should be 'fdf'"), + ): + client.export_form_data(pdf_file, data_format="yaml") # type: ignore[arg-type] + + with ( + PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client, + pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ), + ): + client.export_form_data( + [pdf_file, make_pdf_file(PdfRestFileID.generate())], data_format="xml" + ) From 222f14e2eaaec0963543b3f3f532b87cbb572cc6 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 11 Feb 2026 18:01:47 -0600 Subject: [PATCH 5/8] tests: treat import-form-data responses as multi-input (input_ids) --- tests/live/test_live_import_form_data.py | 14 ++++++++++++-- tests/test_import_form_data.py | 12 +++++++----- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/tests/live/test_live_import_form_data.py b/tests/live/test_live_import_form_data.py index 8a029edd..a1d9d932 100644 --- a/tests/live/test_live_import_form_data.py +++ b/tests/live/test_live_import_form_data.py @@ -65,7 +65,12 @@ def test_live_import_form_data( assert response.output_files output_file = response.output_file assert output_file.type == "application/pdf" - assert str(response.input_id) == str(uploaded_pdf_with_forms.id) + assert str(uploaded_pdf_with_forms.id) in { + str(file_id) for file_id in response.input_ids + } + assert str(uploaded_form_data_file.id) in { + str(file_id) for file_id in response.input_ids + } if output_name is not None: assert output_file.name.startswith(output_name) else: @@ -93,7 +98,12 @@ async def test_live_async_import_form_data_success( output_file = response.output_file assert output_file.name.startswith("async-imported") assert output_file.type == "application/pdf" - assert str(response.input_id) == str(uploaded_pdf_with_forms.id) + assert str(uploaded_pdf_with_forms.id) in { + str(file_id) for file_id in response.input_ids + } + assert str(uploaded_form_data_file.id) in { + str(file_id) for file_id in response.input_ids + } def test_live_import_form_data_invalid_data_file_id( diff --git a/tests/test_import_form_data.py b/tests/test_import_form_data.py index 35dfd027..127421a3 100644 --- a/tests/test_import_form_data.py +++ b/tests/test_import_form_data.py @@ -49,7 +49,7 @@ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response( 200, json={ - "inputId": [input_file.id], + "inputId": [input_file.id, data_file.id], "outputId": [output_id], }, ) @@ -79,7 +79,8 @@ def handler(request: httpx.Request) -> httpx.Response: assert isinstance(response, PdfRestFileBasedResponse) assert response.output_file.name == "filled-form.pdf" assert response.output_file.type == "application/pdf" - assert str(response.input_id) == str(input_file.id) + assert str(input_file.id) in {str(file_id) for file_id in response.input_ids} + assert str(data_file.id) in {str(file_id) for file_id in response.input_ids} assert response.warning is None @@ -108,7 +109,7 @@ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response( 200, json={ - "inputId": [input_file.id], + "inputId": [input_file.id, data_file.id], "outputId": [output_id], }, ) @@ -177,7 +178,7 @@ def handler(request: httpx.Request) -> httpx.Response: return httpx.Response( 200, json={ - "inputId": [input_file.id], + "inputId": [input_file.id, data_file.id], "outputId": [output_id], }, ) @@ -203,7 +204,8 @@ def handler(request: httpx.Request) -> httpx.Response: assert isinstance(response, PdfRestFileBasedResponse) assert response.output_file.name == "async.pdf" assert response.output_file.type == "application/pdf" - assert str(response.input_id) == str(input_file.id) + assert str(input_file.id) in {str(file_id) for file_id in response.input_ids} + assert str(data_file.id) in {str(file_id) for file_id in response.input_ids} @pytest.mark.asyncio From e03c1a2e5789144ebae23d0ec5066a42322e722f Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Tue, 17 Feb 2026 16:02:04 -0600 Subject: [PATCH 6/8] export-form-data: Enumerate export formats, shore up test coverage Assisted-by: Codex --- src/pdfrest/client.py | 14 +- src/pdfrest/types/public.py | 2 + tests/live/test_live_export_form_data.py | 196 +++++++++++++++++++++-- tests/test_export_form_data.py | 121 ++++++++++++-- 4 files changed, 304 insertions(+), 29 deletions(-) diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index a00c33a3..7fd45ac6 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -2887,7 +2887,12 @@ def export_form_data( extra_body: Body | None = None, timeout: TimeoutTypes | None = None, ) -> PdfRestFileBasedResponse: - """Export form data from a PDF with form fields to an external data file.""" + """Export form data from a PDF into an external data file. + + `data_format` support depends on detected form type: + - AcroForm PDFs: `xfdf`, `fdf`, `xml` + - XFA PDFs: `xfd`, `xdp`, `xml` + """ payload: dict[str, Any] = { "files": file, @@ -4647,7 +4652,12 @@ async def export_form_data( extra_body: Body | None = None, timeout: TimeoutTypes | None = None, ) -> PdfRestFileBasedResponse: - """Asynchronously export form data from a PDF into a data file.""" + """Asynchronously export form data from a PDF into a data file. + + `data_format` support depends on detected form type: + - AcroForm PDFs: `xfdf`, `fdf`, `xml` + - XFA PDFs: `xfd`, `xdp`, `xml` + """ payload: dict[str, Any] = { "files": file, diff --git a/src/pdfrest/types/public.py b/src/pdfrest/types/public.py index 9796b484..55497645 100644 --- a/src/pdfrest/types/public.py +++ b/src/pdfrest/types/public.py @@ -168,6 +168,8 @@ class PdfMergeSource(TypedDict, total=False): JpegColorModel = Literal["rgb", "cmyk", "gray"] TiffColorModel = Literal["rgb", "rgba", "cmyk", "lab", "gray"] GraphicSmoothing = Literal["none", "all", "text", "line", "image"] +# Server accepts all values here, but enforces form-type subsets at runtime: +# AcroForm -> xfdf/fdf/xml, XFA -> xfd/xdp/xml. ExportDataFormat = Literal["fdf", "xfdf", "xml", "xdp", "xfd"] SummaryFormat = Literal[ diff --git a/tests/live/test_live_export_form_data.py b/tests/live/test_live_export_form_data.py index b24c2fac..881c2771 100644 --- a/tests/live/test_live_export_form_data.py +++ b/tests/live/test_live_export_form_data.py @@ -4,6 +4,7 @@ from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient from pdfrest.models import PdfRestFile +from pdfrest.types import ExportDataFormat from ..resources import get_test_resource_path @@ -24,15 +25,16 @@ def uploaded_pdf_with_forms( @pytest.mark.parametrize( "data_format", [ - pytest.param("xml", id="xml"), + pytest.param("fdf", id="fdf"), pytest.param("xfdf", id="xfdf"), + pytest.param("xml", id="xml"), ], ) -def test_live_export_form_data( +def test_live_export_form_data_acroform( pdfrest_api_key: str, pdfrest_live_base_url: str, uploaded_pdf_with_forms: PdfRestFile, - data_format: str, + data_format: ExportDataFormat, ) -> None: with PdfRestClient( api_key=pdfrest_api_key, @@ -40,21 +42,81 @@ def test_live_export_form_data( ) as client: response = client.export_form_data( uploaded_pdf_with_forms, - data_format=data_format, # type: ignore[arg-type] + data_format=data_format, output=f"exported-{data_format}", ) assert response.output_files output_file = response.output_file assert output_file.name.startswith(f"exported-{data_format}") - assert str(response.input_id) == str(uploaded_pdf_with_forms.id) + assert output_file.type + assert output_file.size > 0 + assert response.warning is None + assert str(uploaded_pdf_with_forms.id) in { + str(file_id) for file_id in response.input_ids + } + + +@pytest.fixture(scope="module") +def uploaded_xfa_pdf( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("xfa.pdf") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + "data_format", + [ + pytest.param("xfd", id="xfd"), + pytest.param("xdp", id="xdp"), + pytest.param("xml", id="xml"), + ], +) +def test_live_export_form_data_xfa( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_xfa_pdf: PdfRestFile, + data_format: ExportDataFormat, +) -> None: + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.export_form_data( + uploaded_xfa_pdf, + data_format=data_format, + output=f"exported-xfa-{data_format}", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith(f"exported-xfa-{data_format}") + assert output_file.type + assert output_file.size > 0 + assert response.warning is None + assert str(uploaded_xfa_pdf.id) in {str(file_id) for file_id in response.input_ids} @pytest.mark.asyncio -async def test_live_async_export_form_data_success( +@pytest.mark.parametrize( + "data_format", + [ + pytest.param("fdf", id="fdf"), + pytest.param("xfdf", id="xfdf"), + pytest.param("xml", id="xml"), + ], +) +async def test_live_async_export_form_data_acroform( pdfrest_api_key: str, pdfrest_live_base_url: str, uploaded_pdf_with_forms: PdfRestFile, + data_format: ExportDataFormat, ) -> None: async with AsyncPdfRestClient( api_key=pdfrest_api_key, @@ -62,46 +124,152 @@ async def test_live_async_export_form_data_success( ) as client: response = await client.export_form_data( uploaded_pdf_with_forms, - data_format="xml", - output="async-exported", + data_format=data_format, + output=f"async-acro-{data_format}", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith(f"async-acro-{data_format}") + assert output_file.type + assert output_file.size > 0 + assert response.warning is None + assert str(uploaded_pdf_with_forms.id) in { + str(file_id) for file_id in response.input_ids + } + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "data_format", + [ + pytest.param("xfd", id="xfd"), + pytest.param("xdp", id="xdp"), + pytest.param("xml", id="xml"), + ], +) +async def test_live_async_export_form_data_xfa( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_xfa_pdf: PdfRestFile, + data_format: ExportDataFormat, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.export_form_data( + uploaded_xfa_pdf, + data_format=data_format, + output=f"async-xfa-{data_format}", ) assert response.output_files output_file = response.output_file - assert output_file.name.startswith("async-exported") - assert str(response.input_id) == str(uploaded_pdf_with_forms.id) + assert output_file.name.startswith(f"async-xfa-{data_format}") + assert output_file.type + assert output_file.size > 0 + assert response.warning is None + assert str(uploaded_xfa_pdf.id) in {str(file_id) for file_id in response.input_ids} +@pytest.mark.parametrize( + "invalid_format", + [ + pytest.param("xdp", id="xdp"), + pytest.param("xfd", id="xfd"), + ], +) def test_live_export_form_data_invalid_format_for_pdf_type( pdfrest_api_key: str, pdfrest_live_base_url: str, uploaded_pdf_with_forms: PdfRestFile, + invalid_format: ExportDataFormat, ) -> None: with ( PdfRestClient( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client, - pytest.raises(PdfRestApiError, match=r"(?i)data_format"), + pytest.raises(PdfRestApiError, match=r"(?i)(acroform|data_format)"), ): client.export_form_data( uploaded_pdf_with_forms, - data_format="xdp", # type: ignore[arg-type] + data_format=invalid_format, ) @pytest.mark.asyncio +@pytest.mark.parametrize( + "invalid_format", + [ + pytest.param("xdp", id="xdp"), + pytest.param("xfd", id="xfd"), + ], +) async def test_live_async_export_form_data_invalid_format_for_pdf_type( pdfrest_api_key: str, pdfrest_live_base_url: str, uploaded_pdf_with_forms: PdfRestFile, + invalid_format: ExportDataFormat, ) -> None: async with AsyncPdfRestClient( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - with pytest.raises(PdfRestApiError, match=r"(?i)data_format"): + with pytest.raises(PdfRestApiError, match=r"(?i)(acroform|data_format)"): await client.export_form_data( uploaded_pdf_with_forms, - data_format="xdp", # type: ignore[arg-type] + data_format=invalid_format, + ) + + +@pytest.mark.parametrize( + "invalid_format", + [ + pytest.param("xfdf", id="xfdf"), + pytest.param("fdf", id="fdf"), + ], +) +def test_live_export_form_data_invalid_format_for_xfa( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_xfa_pdf: PdfRestFile, + invalid_format: ExportDataFormat, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError, match=r"(?i)(xfa|data_format)"), + ): + client.export_form_data( + uploaded_xfa_pdf, + data_format=invalid_format, + ) + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "invalid_format", + [ + pytest.param("xfdf", id="xfdf"), + pytest.param("fdf", id="fdf"), + ], +) +async def test_live_async_export_form_data_invalid_format_for_xfa( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_xfa_pdf: PdfRestFile, + invalid_format: ExportDataFormat, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError, match=r"(?i)(xfa|data_format)"): + await client.export_form_data( + uploaded_xfa_pdf, + data_format=invalid_format, ) diff --git a/tests/test_export_form_data.py b/tests/test_export_form_data.py index ea52b8c4..f590e504 100644 --- a/tests/test_export_form_data.py +++ b/tests/test_export_form_data.py @@ -9,6 +9,7 @@ from pdfrest import AsyncPdfRestClient, PdfRestClient from pdfrest.models import PdfRestFile, PdfRestFileBasedResponse, PdfRestFileID from pdfrest.models._internal import PdfExportFormDataPayload +from pdfrest.types import ExportDataFormat from .graphics_test_helpers import ( ASYNC_API_KEY, @@ -18,13 +19,43 @@ ) -def test_export_form_data_success(monkeypatch: pytest.MonkeyPatch) -> None: +@pytest.mark.parametrize( + ("data_format", "output_name", "mime_type"), + [ + pytest.param("fdf", "exported-data.fdf", "application/vnd.fdf", id="fdf"), + pytest.param( + "xfdf", + "exported-data.xfdf", + "application/vnd.adobe.xfdf", + id="xfdf", + ), + pytest.param("xml", "exported-data.xml", "application/xml", id="xml"), + pytest.param( + "xdp", + "exported-data.xdp", + "application/vnd.adobe.xdp+xml", + id="xdp", + ), + pytest.param( + "xfd", + "exported-data.xfd", + "application/vnd.adobe.xfd+xml", + id="xfd", + ), + ], +) +def test_export_form_data_success( + monkeypatch: pytest.MonkeyPatch, + data_format: ExportDataFormat, + output_name: str, + mime_type: str, +) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(1)) output_id = str(PdfRestFileID.generate()) payload_dump = PdfExportFormDataPayload.model_validate( - {"files": [input_file], "data_format": "xml", "output": "exported-data"} + {"files": [input_file], "data_format": data_format, "output": "exported-data"} ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) seen: dict[str, int] = {"post": 0, "get": 0} @@ -48,8 +79,8 @@ def handler(request: httpx.Request) -> httpx.Response: 200, json=build_file_info_payload( output_id, - "exported-data.xml", - "application/xml", + output_name, + mime_type, ), ) msg = f"Unexpected request {request.method} {request.url}" @@ -59,14 +90,14 @@ def handler(request: httpx.Request) -> httpx.Response: with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: response = client.export_form_data( input_file, - data_format="xml", + data_format=data_format, output="exported-data", ) assert seen == {"post": 1, "get": 1} assert isinstance(response, PdfRestFileBasedResponse) - assert response.output_file.name == "exported-data.xml" - assert response.output_file.type == "application/xml" + assert response.output_file.name == output_name + assert response.output_file.type == mime_type assert str(response.input_id) == str(input_file.id) assert response.warning is None @@ -136,15 +167,43 @@ def handler(request: httpx.Request) -> httpx.Response: @pytest.mark.asyncio +@pytest.mark.parametrize( + ("data_format", "output_name", "mime_type"), + [ + pytest.param("fdf", "async-data.fdf", "application/vnd.fdf", id="fdf"), + pytest.param( + "xfdf", + "async-data.xfdf", + "application/vnd.adobe.xfdf", + id="xfdf", + ), + pytest.param("xml", "async-data.xml", "application/xml", id="xml"), + pytest.param( + "xdp", + "async-data.xdp", + "application/vnd.adobe.xdp+xml", + id="xdp", + ), + pytest.param( + "xfd", + "async-data.xfd", + "application/vnd.adobe.xfd+xml", + id="xfd", + ), + ], +) async def test_async_export_form_data_success( monkeypatch: pytest.MonkeyPatch, + data_format: ExportDataFormat, + output_name: str, + mime_type: str, ) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(2)) output_id = str(PdfRestFileID.generate()) payload_dump = PdfExportFormDataPayload.model_validate( - {"files": [input_file], "data_format": "xfdf"} + {"files": [input_file], "data_format": data_format} ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) seen: dict[str, int] = {"post": 0, "get": 0} @@ -168,8 +227,8 @@ def handler(request: httpx.Request) -> httpx.Response: 200, json=build_file_info_payload( output_id, - "async-data.xfdf", - "application/vnd.adobe.xfdf", + output_name, + mime_type, ), ) msg = f"Unexpected request {request.method} {request.url}" @@ -177,12 +236,12 @@ def handler(request: httpx.Request) -> httpx.Response: transport = httpx.MockTransport(handler) async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: - response = await client.export_form_data(input_file, data_format="xfdf") + response = await client.export_form_data(input_file, data_format=data_format) assert seen == {"post": 1, "get": 1} assert isinstance(response, PdfRestFileBasedResponse) - assert response.output_file.name == "async-data.xfdf" - assert response.output_file.type == "application/vnd.adobe.xfdf" + assert response.output_file.name == output_name + assert response.output_file.type == mime_type assert str(response.input_id) == str(input_file.id) @@ -282,3 +341,39 @@ def test_export_form_data_validation(monkeypatch: pytest.MonkeyPatch) -> None: client.export_form_data( [pdf_file, make_pdf_file(PdfRestFileID.generate())], data_format="xml" ) + + +@pytest.mark.asyncio +async def test_async_export_form_data_validation( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + png_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + with pytest.raises(ValidationError, match="Must be a PDF file"): + await client.export_form_data(png_file, data_format="xml") + + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + with pytest.raises(ValidationError, match="Input should be 'fdf'"): + await client.export_form_data( + pdf_file, + data_format="yaml", # type: ignore[arg-type] + ) + + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + with pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ): + await client.export_form_data( + [pdf_file, make_pdf_file(PdfRestFileID.generate())], + data_format="xml", + ) From 0e861aaf5e6ff6a13b2a2e02e467f8d429102fb8 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 18 Feb 2026 13:54:51 -0600 Subject: [PATCH 7/8] import-form-data: Test all data file types Assisted-by: Codex --- tests/live/test_live_import_form_data.py | 147 +++++++++-- tests/resources/acroform.pdf | Bin 0 -> 9971 bytes tests/resources/test-data-acro.fdf | 19 ++ tests/resources/test-data-acro.xfdf | 11 + tests/resources/test-data-acro.xml | 9 + tests/resources/test-data-xfa.xdp | 292 ++++++++++++++++++++++ tests/resources/test-data-xfa.xfd | 300 +++++++++++++++++++++++ tests/resources/test-data-xfa.xml | 284 +++++++++++++++++++++ tests/test_import_form_data.py | 88 ++++++- 9 files changed, 1130 insertions(+), 20 deletions(-) create mode 100644 tests/resources/acroform.pdf create mode 100644 tests/resources/test-data-acro.fdf create mode 100644 tests/resources/test-data-acro.xfdf create mode 100644 tests/resources/test-data-acro.xml create mode 100644 tests/resources/test-data-xfa.xdp create mode 100644 tests/resources/test-data-xfa.xfd create mode 100644 tests/resources/test-data-xfa.xml diff --git a/tests/live/test_live_import_form_data.py b/tests/live/test_live_import_form_data.py index a1d9d932..f20ca1b2 100644 --- a/tests/live/test_live_import_form_data.py +++ b/tests/live/test_live_import_form_data.py @@ -7,31 +7,93 @@ from ..resources import get_test_resource_path +IMPORT_FORM_DATA_SUCCESS_CASES = ( + pytest.param( + ("acroform.pdf", "test-data-acro.xml", None), + id="acro-xml", + ), + pytest.param( + ("acroform.pdf", "test-data-acro.xfdf", None), + id="acro-xfdf", + ), + pytest.param( + ("xfa.pdf", "test-data-xfa.xml", None), + id="xfa-xml", + ), + pytest.param( + ("xfa.pdf", "test-data-xfa.xdp", None), + id="xfa-xdp", + ), + pytest.param( + ( + "xfa.pdf", + "test-data-xfa.xfd", + "application/vnd.adobe.xfd+xml", + ), + id="xfa-xfd", + ), +) + + +@pytest.fixture(scope="module", params=IMPORT_FORM_DATA_SUCCESS_CASES) +def uploaded_success_import_form_data_case( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + request: pytest.FixtureRequest, +) -> tuple[PdfRestFile, PdfRestFile]: + pdf_resource_name, data_resource_name, forced_data_mime = request.param + pdf_resource = get_test_resource_path(pdf_resource_name) + data_resource = get_test_resource_path(data_resource_name) + + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + input_file = client.files.create_from_paths([pdf_resource])[0] + data_file = client.files.create_from_paths([data_resource])[0] + + # pdfRest currently reports .xfd uploads as application/octet-stream. + # Override the local MIME metadata so the request can exercise xfd imports. + if forced_data_mime is not None: + data_file = data_file.model_copy(update={"type": forced_data_mime}) + + return input_file, data_file + @pytest.fixture(scope="module") -def uploaded_pdf_with_forms( +def uploaded_acro_import_pair( pdfrest_api_key: str, pdfrest_live_base_url: str, -) -> PdfRestFile: - resource = get_test_resource_path("form_with_data.pdf") +) -> tuple[PdfRestFile, PdfRestFile]: + acro_pdf = get_test_resource_path("acroform.pdf") + acro_data = get_test_resource_path("test-data-acro.xml") + with PdfRestClient( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - return client.files.create_from_paths([resource])[0] + input_file = client.files.create_from_paths([acro_pdf])[0] + data_file = client.files.create_from_paths([acro_data])[0] + + return input_file, data_file @pytest.fixture(scope="module") -def uploaded_form_data_file( +def uploaded_acro_fdf_import_pair( pdfrest_api_key: str, pdfrest_live_base_url: str, -) -> PdfRestFile: - resource = get_test_resource_path("form_data.xml") +) -> tuple[PdfRestFile, PdfRestFile]: + acro_pdf = get_test_resource_path("acroform.pdf") + acro_fdf = get_test_resource_path("test-data-acro.fdf") + with PdfRestClient( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, ) as client: - return client.files.create_from_paths([resource])[0] + input_file = client.files.create_from_paths([acro_pdf])[0] + data_file = client.files.create_from_paths([acro_fdf])[0] + + return input_file, data_file @pytest.mark.parametrize( @@ -44,14 +106,17 @@ def uploaded_form_data_file( def test_live_import_form_data( pdfrest_api_key: str, pdfrest_live_base_url: str, - uploaded_pdf_with_forms: PdfRestFile, - uploaded_form_data_file: PdfRestFile, + uploaded_success_import_form_data_case: tuple[PdfRestFile, PdfRestFile], output_name: str | None, ) -> None: kwargs: dict[str, str] = {} if output_name is not None: kwargs["output"] = output_name + uploaded_pdf_with_forms, uploaded_form_data_file = ( + uploaded_success_import_form_data_case + ) + with PdfRestClient( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, @@ -65,6 +130,8 @@ def test_live_import_form_data( assert response.output_files output_file = response.output_file assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None assert str(uploaded_pdf_with_forms.id) in { str(file_id) for file_id in response.input_ids } @@ -81,9 +148,12 @@ def test_live_import_form_data( async def test_live_async_import_form_data_success( pdfrest_api_key: str, pdfrest_live_base_url: str, - uploaded_pdf_with_forms: PdfRestFile, - uploaded_form_data_file: PdfRestFile, + uploaded_success_import_form_data_case: tuple[PdfRestFile, PdfRestFile], ) -> None: + uploaded_pdf_with_forms, uploaded_form_data_file = ( + uploaded_success_import_form_data_case + ) + async with AsyncPdfRestClient( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, @@ -98,6 +168,8 @@ async def test_live_async_import_form_data_success( output_file = response.output_file assert output_file.name.startswith("async-imported") assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None assert str(uploaded_pdf_with_forms.id) in { str(file_id) for file_id in response.input_ids } @@ -106,12 +178,54 @@ async def test_live_async_import_form_data_success( } +def test_live_import_form_data_fdf_server_error( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_acro_fdf_import_pair: tuple[PdfRestFile, PdfRestFile], +) -> None: + uploaded_pdf_with_forms, uploaded_form_data_file = uploaded_acro_fdf_import_pair + + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises( + PdfRestApiError, + match=r"(?i)(issue processing|filled correctly|corrupted)", + ), + ): + client.import_form_data(uploaded_pdf_with_forms, uploaded_form_data_file) + + +@pytest.mark.asyncio +async def test_live_async_import_form_data_fdf_server_error( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_acro_fdf_import_pair: tuple[PdfRestFile, PdfRestFile], +) -> None: + uploaded_pdf_with_forms, uploaded_form_data_file = uploaded_acro_fdf_import_pair + + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises( + PdfRestApiError, + match=r"(?i)(issue processing|filled correctly|corrupted)", + ): + await client.import_form_data( + uploaded_pdf_with_forms, uploaded_form_data_file + ) + + def test_live_import_form_data_invalid_data_file_id( pdfrest_api_key: str, pdfrest_live_base_url: str, - uploaded_pdf_with_forms: PdfRestFile, - uploaded_form_data_file: PdfRestFile, + uploaded_acro_import_pair: tuple[PdfRestFile, PdfRestFile], ) -> None: + uploaded_pdf_with_forms, uploaded_form_data_file = uploaded_acro_import_pair + with ( PdfRestClient( api_key=pdfrest_api_key, @@ -130,9 +244,10 @@ def test_live_import_form_data_invalid_data_file_id( async def test_live_async_import_form_data_invalid_data_file_id( pdfrest_api_key: str, pdfrest_live_base_url: str, - uploaded_pdf_with_forms: PdfRestFile, - uploaded_form_data_file: PdfRestFile, + uploaded_acro_import_pair: tuple[PdfRestFile, PdfRestFile], ) -> None: + uploaded_pdf_with_forms, uploaded_form_data_file = uploaded_acro_import_pair + async with AsyncPdfRestClient( api_key=pdfrest_api_key, base_url=pdfrest_live_base_url, diff --git a/tests/resources/acroform.pdf b/tests/resources/acroform.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f8a37c6b0a638804f394feee213c551b3335de64 GIT binary patch literal 9971 zcmdry33wCL)~t#FDvJ*gd@`jibj@TYJ6+Q@Y14fVr5lrEl1w|936rFy@Pmq!rJw?$ zA|fD&xKWpah#;WiF9^SYED9B#S}ZCEPq9?N2mhJrv`JfV!R7ONg?8rNIp^Ga&pG#; zbI-XUImw17OsbHCJiKV(DkP6|v79G^Fe51>LvnKvPM&n)FZ>0zvhRg|$+Q9YIYLqhs8ZuOo|al0Y9r(0s4iQ|p380sMtF|9~$* z%Wo)t7C$poR3b}Tf6aLKuN;qGYWsNNe0(!crffEnBN+?nMoNGusF@?HVa67MV;Oi* z9PrvklhY8?%6bTkh37l!@G^FS^Ezq53oX{pGGrBkasqQa6m2CDxfVgEdRY(YhEAG9 zv?>I(a|Dpaa1D&&rD+mSt%Ti9ave{rnMP2OrYV=3a>JX`O1K^HWZIuLnuQ?{)W#7O z55-T&@f`QB+VqdErzvG*TE;RkV z_;(AVw$t!%gq3s>9B{x!!7r>K!%Yh_&(?#4L{=7CDPXj)u*qTb=z^DH5lp23iFi3)T(!^wmYO-Xie$_L2TfqBZwL$PsukzUa#kA&te_ZR zz8Z9fX6>NRG{XXmr0~~D+CX@uopO6%hQK2yVG*QnCG8vuGjh>hx5!IR4eRy-Ln#)d z=5fGhUz4zSJpk?WBA7-lu$^M9yu!3owUAbdrePz2w)l`>c22@=@zTQdG_VNxJk?8Z zfF-;;2pXm*3?+8E5ljoq(6#FmsuRc6`75igT@6v4k0rWx{nUX>k~-(27m4UaB6@!! zdLOb;gfoh8Mt?Y?4^Hm^h0PKN&lZugMWk$hr0fnLAF|U+dngxOi=f$}J`{=+3PlQq z{uBy*n8o7v0td_S`-0@0pzUTFw4V@35F!ZSPl6C9LHI}!?Q9}IlCaxFk%1!cdlw}| zBBVbN0P4_eN`#|CILaT6^1+cjn;3C$RzzY&B-S5^?Evy2Td8UaObp73O!A8OUJ>8x zkMH&2)r#N281PK0?f6pWa{|gvONpS@bpqY(d%NZ5y@bE5_cPvtn?Zads5(cGpoxj> zG^7MNWQbgWBe+b8sgN>o7ZlBFe+j(N;PsM8a6F{&f#sZnM*+{0K{9sWBBq2blSz0u ziYLW8KAHFz2<4G(*30p>JthW0b2!$L4_3DX<-I6cKu+`UgYkYYCPwgd2*3WkxHyDTivb4iSm2~=b0ujJ{F&Rsk%Ut+ZRO0xu>o((2GzCINLC{s{}}d7THQo%ufVB& zPSlOdy+spO9UdG!df2;3r)FHW4qR}mw%_pl`KPej>b!41TA-+q?FxBxeE-qJlD(A| zo41ckJ~Y1ng3RYl!4;P;O@8Ff%pI8Ll`G#bIf8DL95msR2T@VOrV}gOapNum<%v-5L}P~FFgwg3oN@$2(4CCK5xKt@f^Xn)1mqbTne!yY@!>ezTQbt zw5a>SJ3?5k5O=xxzMs*5%y(w^vo-+M<%sdOv;6`G{`J1Qc@K$7$u#n}~nZe_;HB z3E|rB4S+QFY$|^)XF~uIoRzRGCwR)e&6bhDLlnXF^I~?k?Y(rmwfq(twmww4IC;zI zht_@9JGRt@YCr7dUNkx6f7Ev`F*}Dgrjhm63e$G#Uf^E1Yxm3Rzx;On^1X+fF87-Z_IVAN(i%kvyptJ=AnmMnwS#$<@2LsdcD5Z zaK1yZ|Od9e;2Hq>%9k|34rIf^(}FZ<1*H~Fyf!8>$=^ljdHjc(6HSD$15_m8xLk@b4@ zz=i`g2fvP-gXgEk-cH;TeTvy$KJq2>hnCuV%I2heaOJ9V=FUUCE8hF8ZS6N_npcgy zRI%l;rl!khFAk0VK;3fkj)3f@qLW)^Nsm2Q8M)B1!Et2zqT^AUMw|(_tpTg7eDCuU z%P7@&Hh0^enUf~&U63Adc<&d{E1S;TdY*a5vBh-p>&y@J0dog@{e1Zw+tSy+W!`dd z_xl0z_|5N1)1rFI8&tg)+_AP75g{9r^y6`4cixN7Mya;ttvOW|YI6VhS~7cWNWkpF z`tt+RsIYI}rPG(pjTt@y%U#S2v+w?@;lVk(R~PEO%-f`TWL|FYfFmJKT*bFNG`GI= zpF0l#p`yOB62RG+vNuk`=gaH4lkQGs$`-2Fb$H z3R9c$UIY8xcgM1bQC~hj`<16c2K4@7;nqIA_wN~exaZ1ma2U9$l`G`@LF>04fp2QB zF=P0>Hc|iNy`RRc+*Nf(lK6S#vbrbr4XZ22xAKN~ln1BXRsWN9XNC36sA6c4XYAO#^?r8u^UorTuX)U)!<0{M1Jq4#ZYRS0p<$eOG0jl5d_Bg@wBh zg&5aA`XBDbpN4)i>q+=gvFI7Z#%M~;9#G}E-q=#s$W(Z_(SXC`jiJQ zRE6)Kk^aPU6B=HX54LiVEuYML|DThUcU}m;)a&%9C%E1C!3tMK{F06Pk^{egYxeR3 zeP)ugS5+7FZToR(vt-0f*lgz%4sXc}Js!QQJ~uYuy=4d1k&7OxI=A55k`=|)m>IgP zcXo__^swyd%oaN9{Lz&sre3>t%a+#R@kh^ATrs{9)yE7b7xYaB_Qehi+{vI@+FA!B)uq4xK&`qi z=+tY4H4Up5dN+*T_xZm(IzPmZ>F#Xzg{Wr$*?|f*?D(9AGMg8lu zPvXW7s2O9}_T~x`yHv`_tU0FE4^Nhb4<50tZoH{_`HzQ2Yzy7oZ~yBj`UU6B?=xrP zu7p9~HR|TCpY#;(-V!cI_Z%(V5p}33e$M?vz7KBezhTp|ANKO@t@O5q%R^Ju3*_H< zOet@FJ}-P!S^Y6X?I`{1U1hI+g*IN9_UyAqhhqIT>CfCdX@hL?8pXuwpF+0QB~SU` zKksjkX=+*S**v-`==5o_V9spMrs=CMuKDECLg)HiY~(I`W3{46bJ_h}!@xT$P7Hr8 zyXA>52OV5D?5890(V7Df-dQ7iNk0AcJ(4;TTV)7ddspObU-y1LdgJP-=F6casI{S> zF!ra>{iFYJarCIHcS8n$CD&r>j%#aS!?9;?z1r|t{PG#EN_*Ev7naM&bFxzJ1<4H)lAZRgU zW-(v-C94wv6Vg#403VmOpH1^R0dTRV$y31jz0Cs%!MhF;m;4xhc}(c($YXY9DHA=A z0*4$WF60bTp=bg8{iS3jR8Alds^Ig6*GW`DjINNwDGnY={CdGTpPGcUjoZReW{Ay& zWF@31{kpoRX!w=z3j_Z0$w{GT+mX7xPJTWr+e}kay-*wQRXA@{tc7r?GlleX#aba7 z+cBn)JB3OqBFF?1wt^h7_Oh~YuF6C3_7)sYk|4DXUj`2H3*8~pSXt-HlLywgzT5Zhu4lQK4hw$aN0=}pGo4=C}x-Qpl}O?Iaxcy zIfCKK0TMxm?HREgK=$Wym-8W%0)^~91X;q=c;HOF9)T9R$t*}T@RPUmYADXVon{i@ zA)wIcPuBl@rStMt7$lTnSfR<2ZNB4n}& z@V^6&NA_J1;8~GDnK^>1MWT=%7Xkn&ovf{aTBBRY)x-RMX%HxuSiKgg!u*y|#F`0T z9zjCAjN1iuG8;u&`NRtawO+o{!@meU9N}{D-2PnlkF0%#Bwc=z(Wp_1rKIk4C9#$ySE_hQVznfN@DwttprUv*ln%)(&MY=e zh*AZ{I;17sgvzwI;;f3`plFrNUWUsG371YiHQb~xj!DYaW{1YbJE9Vc%9Ao>sLQ}w zGR6fbs)EL8OG;CMjA1zy5#<#b(UGZfc1LDlA{Ct$o|`y1Rv!_JXKN=Bxw3KNvPCO^1UyAklv+8Jh%O9_MUCV{N1U!SHz=z}ZmLp6g<%T0J-(Qkl${!v77>%0rC_y* zQ*&I>S#dr5hOo;ztc|beB-WgSaue?ad?rNE-(qR6 z?eUdJ-gp}*k{0yoR~Rxs?V84sHVITKF^R162T>`NawTFzI{VaM0r?+<>FmRJGuo{W z2WQc}Pp*LJcZGwC4q%YF^{L@5M7KU1Q-Y1`3WxJ~nC^W_yazp8%iHj-xN;0~SKa$? zZI5-xRhk}ik!w}J*sk;xvYzWuC?Kxs4yTs)pr@3>Es$<-N)_*Zx{jg3nX>=@ literal 0 HcmV?d00001 diff --git a/tests/resources/test-data-acro.fdf b/tests/resources/test-data-acro.fdf new file mode 100644 index 00000000..68a80087 --- /dev/null +++ b/tests/resources/test-data-acro.fdf @@ -0,0 +1,19 @@ +%FDF-1.2 +%âãÏÓ +1 0 obj +<< +/F (minimal_contact_form_acroform.pdf) +/Fields [ + << /T (Name) /V (Alice Example) >> + << /T (Email) /V (alice@example.com) >> + << /T (Phone) /V (+1 312 555 0100) >> + << /T (Message) /V (Hello!\nThis is a sample message populated via FDF.) >> + << /T (Subscribe) /V /Yes >> +] +>> +endobj +trailer +<< +/Root 1 0 R +>> +%%EOF diff --git a/tests/resources/test-data-acro.xfdf b/tests/resources/test-data-acro.xfdf new file mode 100644 index 00000000..8d2a7d76 --- /dev/null +++ b/tests/resources/test-data-acro.xfdf @@ -0,0 +1,11 @@ + + + + Alice Example + alice@example.com + +1 312 555 0100 + Hello! +This is a sample message populated via XFDF. + Yes + + diff --git a/tests/resources/test-data-acro.xml b/tests/resources/test-data-acro.xml new file mode 100644 index 00000000..32ebb856 --- /dev/null +++ b/tests/resources/test-data-acro.xml @@ -0,0 +1,9 @@ + + + Alice Example + alice@example.com + +1 312 555 0100 + Hello! +This is a sample message populated via XML (convert to FDF/XFDF as needed). + true + diff --git a/tests/resources/test-data-xfa.xdp b/tests/resources/test-data-xfa.xdp new file mode 100644 index 00000000..39dcfbcb --- /dev/null +++ b/tests/resources/test-data-xfa.xdp @@ -0,0 +1,292 @@ + + + +JohnDoexxxxxxxxxJaneDoexxxxxxxxx
123 Oak StAnytown, USA 00000
HoH
+
diff --git a/tests/resources/test-data-xfa.xfd b/tests/resources/test-data-xfa.xfd new file mode 100644 index 00000000..5a3f984f --- /dev/null +++ b/tests/resources/test-data-xfa.xfd @@ -0,0 +1,300 @@ + + +JohnDoexxxxxxxxxJaneDoexxxxxxxxx
123 Oak StAnytown, USA 00000
HoH
diff --git a/tests/resources/test-data-xfa.xml b/tests/resources/test-data-xfa.xml new file mode 100644 index 00000000..3319f551 --- /dev/null +++ b/tests/resources/test-data-xfa.xml @@ -0,0 +1,284 @@ + +JohnDoexxxxxxxxxJaneDoexxxxxxxxx
123 Oak StAnytown, USA 00000
HoH
diff --git a/tests/test_import_form_data.py b/tests/test_import_form_data.py index 127421a3..10519bb3 100644 --- a/tests/test_import_form_data.py +++ b/tests/test_import_form_data.py @@ -17,19 +17,51 @@ make_pdf_file, ) +ACCEPTED_IMPORT_DATA_FILE_MIME_TYPES = ( + pytest.param("application/xml", id="application-xml"), + pytest.param("text/xml", id="text-xml"), + pytest.param("application/vnd.fdf", id="application-vnd-fdf"), + pytest.param( + "application/vnd.adobe.xfdf", + id="application-vnd-adobe-xfdf", + ), + pytest.param( + "application/vnd.adobe.xdp+xml", + id="application-vnd-adobe-xdp+xml", + ), + pytest.param( + "application/vnd.adobe.xfd+xml", + id="application-vnd-adobe-xfd+xml", + ), +) + def _make_data_file( file_id: PdfRestFileID, *, mime_type: str = "application/xml" ) -> PdfRestFile: + file_name = "form-data.xml" + if mime_type == "application/vnd.fdf": + file_name = "form-data.fdf" + elif mime_type == "application/vnd.adobe.xfdf": + file_name = "form-data.xfdf" + elif mime_type == "application/vnd.adobe.xdp+xml": + file_name = "form-data.xdp" + elif mime_type == "application/vnd.adobe.xfd+xml": + file_name = "form-data.xfd" + return PdfRestFile.model_validate( - build_file_info_payload(file_id, "form-data.xml", mime_type) + build_file_info_payload(file_id, file_name, mime_type) ) -def test_import_form_data_success(monkeypatch: pytest.MonkeyPatch) -> None: +@pytest.mark.parametrize("data_file_mime", ACCEPTED_IMPORT_DATA_FILE_MIME_TYPES) +def test_import_form_data_success( + monkeypatch: pytest.MonkeyPatch, + data_file_mime: str, +) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(1)) - data_file = _make_data_file(PdfRestFileID.generate(2)) + data_file = _make_data_file(PdfRestFileID.generate(2), mime_type=data_file_mime) output_id = str(PdfRestFileID.generate()) payload_dump = PdfImportFormDataPayload.model_validate( @@ -153,12 +185,14 @@ def handler(request: httpx.Request) -> httpx.Response: @pytest.mark.asyncio +@pytest.mark.parametrize("data_file_mime", ACCEPTED_IMPORT_DATA_FILE_MIME_TYPES) async def test_async_import_form_data_success( monkeypatch: pytest.MonkeyPatch, + data_file_mime: str, ) -> None: monkeypatch.delenv("PDFREST_API_KEY", raising=False) input_file = make_pdf_file(PdfRestFileID.generate(2)) - data_file = _make_data_file(PdfRestFileID.generate(1)) + data_file = _make_data_file(PdfRestFileID.generate(1), mime_type=data_file_mime) output_id = str(PdfRestFileID.generate()) payload_dump = PdfImportFormDataPayload.model_validate( @@ -324,3 +358,49 @@ def test_import_form_data_validation(monkeypatch: pytest.MonkeyPatch) -> None: pdf_file, [data_file, _make_data_file(PdfRestFileID.generate())], ) + + +@pytest.mark.asyncio +async def test_async_import_form_data_validation( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + pdf_file = make_pdf_file(PdfRestFileID.generate(1)) + data_file = _make_data_file(PdfRestFileID.generate(2)) + png_file = PdfRestFile.model_validate( + build_file_info_payload( + PdfRestFileID.generate(), + "example.png", + "image/png", + ) + ) + transport = httpx.MockTransport(lambda request: (_ for _ in ()).throw(RuntimeError)) + + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + with pytest.raises(ValidationError, match="Must be a PDF file"): + await client.import_form_data(png_file, data_file) + + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + with pytest.raises( + ValidationError, + match="Data file must be an XFDF, XDP, XFD, FDF, or XML file", + ): + await client.import_form_data(pdf_file, png_file) + + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + with pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ): + await client.import_form_data( + [pdf_file, make_pdf_file(PdfRestFileID.generate())], + data_file, + ) + + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + with pytest.raises( + ValidationError, match="List should have at most 1 item after validation" + ): + await client.import_form_data( + pdf_file, + [data_file, _make_data_file(PdfRestFileID.generate())], + ) From 67cc64e172d817454d3539e04c8bfa0e48621189 Mon Sep 17 00:00:00 2001 From: Christopher Green Date: Wed, 18 Feb 2026 14:03:00 -0600 Subject: [PATCH 8/8] export-form-data: Fix `ruff` error --- src/pdfrest/models/_internal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 78d229bd..8e31dba5 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -23,10 +23,10 @@ from pdfrest.types.public import PdfRedactionPreset from ..types import ( + ExportDataFormat, HtmlPageOrientation, HtmlPageSize, HtmlWebLayout, - ExportDataFormat, OcrLanguage, PdfAType, PdfConversionCompression,