diff --git a/src/pdfrest/client.py b/src/pdfrest/client.py index 8a7f1bb7..505f1288 100644 --- a/src/pdfrest/client.py +++ b/src/pdfrest/client.py @@ -75,7 +75,13 @@ from .models._internal import ( BasePdfRestGraphicPayload, BmpPdfRestPayload, + ConvertEmailToPdfPayload, + ConvertHtmlToPdfPayload, + ConvertImageToPdfPayload, + ConvertOfficeToPdfPayload, + ConvertPostscriptToPdfPayload, ConvertToMarkdownPayload, + ConvertUrlToPdfPayload, DeletePayload, ExtractImagesPayload, ExtractTextPayload, @@ -125,10 +131,16 @@ FlattenQuality, GifColorModel, GraphicSmoothing, + HtmlPageOrientation, + HtmlPageSize, + HtmlWebLayout, JpegColorModel, OcrLanguage, PdfAddTextObject, PdfAType, + PdfConversionCompression, + PdfConversionDownsample, + PdfConversionLocale, PdfInfoQuery, PdfMergeInput, PdfPageOrientation, @@ -3292,6 +3304,218 @@ def rasterize_pdf( timeout=timeout, ) + def convert_office_to_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + compression: PdfConversionCompression = "lossy", + downsample: PdfConversionDownsample = 300, + tagged_pdf: bool = False, + locale: PdfConversionLocale | None = None, + page_size: HtmlPageSize | None = None, + page_margin: str | None = None, + page_orientation: HtmlPageOrientation | None = None, + web_layout: HtmlWebLayout | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Convert a Microsoft Office file (Word, Excel, PowerPoint) to PDF.""" + + payload: dict[str, Any] = { + "files": file, + "output": output, + "compression": compression, + "downsample": downsample, + "tagged_pdf": tagged_pdf, + "locale": locale, + "page_size": page_size, + "page_margin": page_margin, + "page_orientation": page_orientation, + "web_layout": web_layout, + } + payload = {key: value for key, value in payload.items() if value is not None} + + return self._post_file_operation( + endpoint="/pdf", + payload=payload, + payload_model=ConvertOfficeToPdfPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + + def convert_postscript_to_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + compression: PdfConversionCompression = "lossy", + downsample: PdfConversionDownsample | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Convert a PostScript or EPS file to PDF.""" + + payload: dict[str, Any] = { + "files": file, + "output": output, + "compression": compression, + "downsample": downsample, + } + payload = {key: value for key, value in payload.items() if value is not None} + + return self._post_file_operation( + endpoint="/pdf", + payload=payload, + payload_model=ConvertPostscriptToPdfPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + + def convert_email_to_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Convert an RFC822 email file to PDF.""" + + payload: dict[str, Any] = { + "files": file, + "output": output, + } + payload = {key: value for key, value in payload.items() if value is not None} + + return self._post_file_operation( + endpoint="/pdf", + payload=payload, + payload_model=ConvertEmailToPdfPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + + def convert_image_to_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Convert a supported image file to PDF.""" + + payload: dict[str, Any] = { + "files": file, + "output": output, + } + payload = {key: value for key, value in payload.items() if value is not None} + + return self._post_file_operation( + endpoint="/pdf", + payload=payload, + payload_model=ConvertImageToPdfPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + + def convert_html_to_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + compression: PdfConversionCompression = "lossy", + downsample: PdfConversionDownsample = 300, + page_size: HtmlPageSize = "letter", + page_margin: str = "1.0in", + page_orientation: HtmlPageOrientation = "portrait", + web_layout: HtmlWebLayout = "desktop", + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Convert an uploaded HTML file to PDF.""" + + payload: dict[str, Any] = { + "files": file, + "output": output, + "compression": compression, + "downsample": downsample, + "page_size": page_size, + "page_margin": page_margin, + "page_orientation": page_orientation, + "web_layout": web_layout, + } + payload = {key: value for key, value in payload.items() if value is not None} + + return self._post_file_operation( + endpoint="/pdf", + payload=payload, + payload_model=ConvertHtmlToPdfPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + + def convert_url_to_pdf( + self, + url: UrlValue, + *, + output: str | None = None, + compression: PdfConversionCompression = "lossy", + downsample: PdfConversionDownsample = 300, + page_size: HtmlPageSize = "letter", + page_margin: str = "1.0in", + page_orientation: HtmlPageOrientation = "portrait", + web_layout: HtmlWebLayout = "desktop", + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Convert HTML content from one URL to PDF.""" + + payload: dict[str, Any] = { + "url": url, + "output": output, + "compression": compression, + "downsample": downsample, + "page_size": page_size, + "page_margin": page_margin, + "page_orientation": page_orientation, + "web_layout": web_layout, + } + payload = {key: value for key, value in payload.items() if value is not None} + + return self._post_file_operation( + endpoint="/pdf", + payload=payload, + payload_model=ConvertUrlToPdfPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + def convert_to_pdfa( self, file: PdfRestFile | Sequence[PdfRestFile], @@ -4783,6 +5007,218 @@ async def rasterize_pdf( timeout=timeout, ) + async def convert_office_to_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + compression: PdfConversionCompression = "lossy", + downsample: PdfConversionDownsample = 300, + tagged_pdf: bool = False, + locale: PdfConversionLocale | None = None, + page_size: HtmlPageSize | None = None, + page_margin: str | None = None, + page_orientation: HtmlPageOrientation | None = None, + web_layout: HtmlWebLayout | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously convert a Microsoft Office file to PDF.""" + + payload: dict[str, Any] = { + "files": file, + "output": output, + "compression": compression, + "downsample": downsample, + "tagged_pdf": tagged_pdf, + "locale": locale, + "page_size": page_size, + "page_margin": page_margin, + "page_orientation": page_orientation, + "web_layout": web_layout, + } + payload = {key: value for key, value in payload.items() if value is not None} + + return await self._post_file_operation( + endpoint="/pdf", + payload=payload, + payload_model=ConvertOfficeToPdfPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + + async def convert_postscript_to_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + compression: PdfConversionCompression = "lossy", + downsample: PdfConversionDownsample | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously convert a PostScript or EPS file to PDF.""" + + payload: dict[str, Any] = { + "files": file, + "output": output, + "compression": compression, + "downsample": downsample, + } + payload = {key: value for key, value in payload.items() if value is not None} + + return await self._post_file_operation( + endpoint="/pdf", + payload=payload, + payload_model=ConvertPostscriptToPdfPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + + async def convert_email_to_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously convert an RFC822 email file to PDF.""" + + payload: dict[str, Any] = { + "files": file, + "output": output, + } + payload = {key: value for key, value in payload.items() if value is not None} + + return await self._post_file_operation( + endpoint="/pdf", + payload=payload, + payload_model=ConvertEmailToPdfPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + + async def convert_image_to_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously convert a supported image file to PDF.""" + + payload: dict[str, Any] = { + "files": file, + "output": output, + } + payload = {key: value for key, value in payload.items() if value is not None} + + return await self._post_file_operation( + endpoint="/pdf", + payload=payload, + payload_model=ConvertImageToPdfPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + + async def convert_html_to_pdf( + self, + file: PdfRestFile | Sequence[PdfRestFile], + *, + output: str | None = None, + compression: PdfConversionCompression = "lossy", + downsample: PdfConversionDownsample = 300, + page_size: HtmlPageSize = "letter", + page_margin: str = "1.0in", + page_orientation: HtmlPageOrientation = "portrait", + web_layout: HtmlWebLayout = "desktop", + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously convert an uploaded HTML file to PDF.""" + + payload: dict[str, Any] = { + "files": file, + "output": output, + "compression": compression, + "downsample": downsample, + "page_size": page_size, + "page_margin": page_margin, + "page_orientation": page_orientation, + "web_layout": web_layout, + } + payload = {key: value for key, value in payload.items() if value is not None} + + return await self._post_file_operation( + endpoint="/pdf", + payload=payload, + payload_model=ConvertHtmlToPdfPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + + async def convert_url_to_pdf( + self, + url: UrlValue, + *, + output: str | None = None, + compression: PdfConversionCompression = "lossy", + downsample: PdfConversionDownsample = 300, + page_size: HtmlPageSize = "letter", + page_margin: str = "1.0in", + page_orientation: HtmlPageOrientation = "portrait", + web_layout: HtmlWebLayout = "desktop", + extra_query: Query | None = None, + extra_headers: AnyMapping | None = None, + extra_body: Body | None = None, + timeout: TimeoutTypes | None = None, + ) -> PdfRestFileBasedResponse: + """Asynchronously convert HTML content from one URL to PDF.""" + + payload: dict[str, Any] = { + "url": url, + "output": output, + "compression": compression, + "downsample": downsample, + "page_size": page_size, + "page_margin": page_margin, + "page_orientation": page_orientation, + "web_layout": web_layout, + } + payload = {key: value for key, value in payload.items() if value is not None} + + return await self._post_file_operation( + endpoint="/pdf", + payload=payload, + payload_model=ConvertUrlToPdfPayload, + extra_query=extra_query, + extra_headers=extra_headers, + extra_body=extra_body, + timeout=timeout, + ) + async def convert_to_pdfa( self, file: PdfRestFile | Sequence[PdfRestFile], diff --git a/src/pdfrest/models/_internal.py b/src/pdfrest/models/_internal.py index 2b7691b2..65f65197 100644 --- a/src/pdfrest/models/_internal.py +++ b/src/pdfrest/models/_internal.py @@ -23,8 +23,14 @@ from pdfrest.types.public import PdfRedactionPreset from ..types import ( + HtmlPageOrientation, + HtmlPageSize, + HtmlWebLayout, OcrLanguage, PdfAType, + PdfConversionCompression, + PdfConversionDownsample, + PdfConversionLocale, PdfInfoQuery, PdfPageOrientation, PdfPageSize, @@ -114,6 +120,10 @@ def _serialize_as_first_file_id(value: list[PdfRestFile]) -> str: return str(value[0].id) +def _serialize_as_first_url(value: list[HttpUrl]) -> str: + return str(value[0]) + + def _serialize_as_comma_separated_string(value: list[Any] | None) -> str | None: if value is None: return None @@ -243,6 +253,9 @@ def _validate_output_language(value: str) -> str: return trimmed +_PAGE_MARGIN_REGEX = r"^(?:\d+(?:\.\d+)?)(?:mm|in)$" + + class UploadURLs(BaseModel): url: Annotated[ list[HttpUrl] | HttpUrl, @@ -551,6 +564,276 @@ class ConvertToMarkdownPayload(BaseModel): ] = None +_PDF_WORD_MIME_TYPES = { + "application/msword", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", +} +_PDF_EXCEL_MIME_TYPES = { + "application/vnd.ms-excel", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", +} +_PDF_POWERPOINT_MIME_TYPES = { + "application/vnd.ms-powerpoint", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", +} +_PDF_OFFICE_MIME_TYPES = ( + _PDF_WORD_MIME_TYPES | _PDF_EXCEL_MIME_TYPES | _PDF_POWERPOINT_MIME_TYPES +) +_PDF_POSTSCRIPT_MIME_TYPES = { + "application/postscript", + "application/eps", + "application/x-eps", +} +_PDF_EMAIL_MIME_TYPES = {"message/rfc822"} +_PDF_IMAGE_MIME_TYPES = { + "image/jpeg", + "image/tiff", + "image/bmp", + "image/png", +} +_PDF_HTML_MIME_TYPES = {"text/html"} + + +class ConvertOfficeToPdfPayload(BaseModel): + """Adapt caller options into a pdfRest-ready office-to-pdf payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types( + *_PDF_OFFICE_MIME_TYPES, + error_msg="Must be a Microsoft Office file.", + ) + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + compression: Annotated[ + PdfConversionCompression | None, + Field(serialization_alias="compression", default=None), + ] = None + downsample: Annotated[ + PdfConversionDownsample | None, + Field(serialization_alias="downsample", default=None), + ] = None + tagged_pdf: Annotated[ + Literal["on", "off"] | None, + Field(serialization_alias="tagged_pdf", default=None), + BeforeValidator(_bool_to_on_off), + ] = None + locale: Annotated[ + PdfConversionLocale | None, + Field(serialization_alias="locale", default=None), + ] = None + + @model_validator(mode="after") + def _validate_option_compatibility(self) -> ConvertOfficeToPdfPayload: + mime_type = self.files[0].type + if self.locale is not None and mime_type not in _PDF_EXCEL_MIME_TYPES: + msg = "locale is only supported for Excel inputs." + raise ValueError(msg) + return self + + +class ConvertPostscriptToPdfPayload(BaseModel): + """Adapt caller options into a pdfRest-ready postscript-to-pdf payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types( + *_PDF_POSTSCRIPT_MIME_TYPES, + error_msg="Must be a PostScript or EPS file.", + ) + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + compression: Annotated[ + PdfConversionCompression | None, + Field(serialization_alias="compression", default=None), + ] = None + downsample: Annotated[ + PdfConversionDownsample | None, + Field(serialization_alias="downsample", default=None), + ] = None + + +class ConvertEmailToPdfPayload(BaseModel): + """Adapt caller options into a pdfRest-ready email-to-pdf payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types( + *_PDF_EMAIL_MIME_TYPES, + error_msg="Must be an RFC822 email file.", + ) + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + +class ConvertImageToPdfPayload(BaseModel): + """Adapt caller options into a pdfRest-ready image-to-pdf payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types( + *_PDF_IMAGE_MIME_TYPES, + error_msg="Must be a supported image file type.", + ) + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + + +class ConvertHtmlToPdfPayload(BaseModel): + """Adapt caller options into a pdfRest-ready html-to-pdf payload.""" + + files: Annotated[ + list[PdfRestFile], + Field( + min_length=1, + max_length=1, + validation_alias=AliasChoices("file", "files"), + serialization_alias="id", + ), + BeforeValidator(_ensure_list), + AfterValidator( + _allowed_mime_types( + *_PDF_HTML_MIME_TYPES, + error_msg="Must be an HTML file.", + ) + ), + PlainSerializer(_serialize_as_first_file_id), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + compression: Annotated[ + PdfConversionCompression | None, + Field(serialization_alias="compression", default=None), + ] = None + downsample: Annotated[ + PdfConversionDownsample | None, + Field(serialization_alias="downsample", default=None), + ] = None + page_size: Annotated[ + HtmlPageSize | None, + Field(serialization_alias="page_size", default=None), + ] = None + page_margin: Annotated[ + str | None, + Field( + serialization_alias="page_margin", + pattern=_PAGE_MARGIN_REGEX, + default=None, + ), + ] = None + page_orientation: Annotated[ + HtmlPageOrientation | None, + Field(serialization_alias="page_orientation", default=None), + ] = None + web_layout: Annotated[ + HtmlWebLayout | None, + Field(serialization_alias="web_layout", default=None), + ] = None + + +class ConvertUrlToPdfPayload(BaseModel): + """Adapt caller options into a pdfRest-ready convert-to-pdf payload for one URL.""" + + url: Annotated[ + list[HttpUrl], + Field(serialization_alias="url", min_length=1, max_length=1), + BeforeValidator(_ensure_list), + PlainSerializer(_serialize_as_first_url), + ] + output: Annotated[ + str | None, + Field(serialization_alias="output", min_length=1, default=None), + AfterValidator(_validate_output_prefix), + ] = None + compression: Annotated[ + PdfConversionCompression | None, + Field(serialization_alias="compression", default=None), + ] = None + downsample: Annotated[ + PdfConversionDownsample | None, + Field(serialization_alias="downsample", default=None), + ] = None + page_size: Annotated[ + HtmlPageSize | None, + Field(serialization_alias="page_size", default=None), + ] = None + page_margin: Annotated[ + str | None, + Field( + serialization_alias="page_margin", + pattern=_PAGE_MARGIN_REGEX, + default=None, + ), + ] = None + page_orientation: Annotated[ + HtmlPageOrientation | None, + Field(serialization_alias="page_orientation", default=None), + ] = None + web_layout: Annotated[ + HtmlWebLayout | None, + Field(serialization_alias="web_layout", default=None), + ] = None + + class TranslatePdfTextPayload(BaseModel): """Adapt caller options into a pdfRest-ready translate request payload.""" diff --git a/src/pdfrest/types/__init__.py b/src/pdfrest/types/__init__.py index 474356f0..b7db11ae 100644 --- a/src/pdfrest/types/__init__.py +++ b/src/pdfrest/types/__init__.py @@ -10,11 +10,17 @@ FlattenQuality, GifColorModel, GraphicSmoothing, + HtmlPageOrientation, + HtmlPageSize, + HtmlWebLayout, JpegColorModel, OcrLanguage, PdfAddTextObject, PdfAType, PdfCmykColor, + PdfConversionCompression, + PdfConversionDownsample, + PdfConversionLocale, PdfCustomPageSize, PdfInfoQuery, PdfMergeInput, @@ -46,11 +52,17 @@ "FlattenQuality", "GifColorModel", "GraphicSmoothing", + "HtmlPageOrientation", + "HtmlPageSize", + "HtmlWebLayout", "JpegColorModel", "OcrLanguage", "PdfAType", "PdfAddTextObject", "PdfCmykColor", + "PdfConversionCompression", + "PdfConversionDownsample", + "PdfConversionLocale", "PdfCustomPageSize", "PdfInfoQuery", "PdfMergeInput", diff --git a/src/pdfrest/types/public.py b/src/pdfrest/types/public.py index 43391f21..20c73d97 100644 --- a/src/pdfrest/types/public.py +++ b/src/pdfrest/types/public.py @@ -22,11 +22,17 @@ "FlattenQuality", "GifColorModel", "GraphicSmoothing", + "HtmlPageOrientation", + "HtmlPageSize", + "HtmlWebLayout", "JpegColorModel", "OcrLanguage", "PdfAType", "PdfAddTextObject", "PdfCmykColor", + "PdfConversionCompression", + "PdfConversionDownsample", + "PdfConversionLocale", "PdfCustomPageSize", "PdfInfoQuery", "PdfMergeInput", @@ -144,6 +150,12 @@ class PdfMergeSource(TypedDict, total=False): PdfMergeInput = PdfRestFile | PdfMergeSource | tuple[PdfRestFile, PdfPageSelection] +PdfConversionCompression = Literal["lossy", "lossless"] +PdfConversionDownsample = Literal["off", 75, 150, 300, 600, 1200] +PdfConversionLocale = Literal["US", "Germany"] +HtmlPageSize = Literal["letter", "legal", "ledger", "A3", "A4", "A5"] +HtmlPageOrientation = Literal["portrait", "landscape"] +HtmlWebLayout = Literal["desktop", "tablet", "mobile"] PdfAType = Literal["PDF/A-1b", "PDF/A-2b", "PDF/A-2u", "PDF/A-3b", "PDF/A-3u"] PdfXType = Literal["PDF/X-1a", "PDF/X-3", "PDF/X-4", "PDF/X-6"] ExtractTextGranularity = Literal["off", "by_page", "document"] diff --git a/tests/convert_to_pdf_test_helpers.py b/tests/convert_to_pdf_test_helpers.py new file mode 100644 index 00000000..86eaf32c --- /dev/null +++ b/tests/convert_to_pdf_test_helpers.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from pdfrest.models import PdfRestFile + + +def make_source_file(file_id: str, mime_type: str, name: str) -> PdfRestFile: + return PdfRestFile.model_validate( + { + "id": file_id, + "name": name, + "url": f"https://api.pdfrest.com/resource/{file_id}", + "type": mime_type, + "size": 512, + "modified": "2024-01-01T00:00:00Z", + "scheduledDeletionTimeUtc": None, + } + ) diff --git a/tests/live/test_live_convert_email_to_pdf.py b/tests/live/test_live_convert_email_to_pdf.py new file mode 100644 index 00000000..2507aeaf --- /dev/null +++ b/tests/live/test_live_convert_email_to_pdf.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_email_for_pdf( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("test.eml") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +def test_live_convert_email_to_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_email_for_pdf: PdfRestFile, +) -> None: + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.convert_email_to_pdf( + uploaded_email_for_pdf, + output="live-email", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) == str(uploaded_email_for_pdf.id) + assert output_file.name.startswith("live-email") + + +@pytest.mark.asyncio +async def test_live_async_convert_email_to_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_email_for_pdf: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.convert_email_to_pdf( + uploaded_email_for_pdf, + output="live-email-async", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) == str(uploaded_email_for_pdf.id) + assert output_file.name.startswith("live-email-async") + + +def test_live_convert_email_to_pdf_invalid_id_override( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_email_for_pdf: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError, match=r"(?i)id|file|resource|not found"), + ): + client.convert_email_to_pdf( + uploaded_email_for_pdf, + extra_body={"id": "00000000-0000-0000-0000-000000000000"}, + ) diff --git a/tests/live/test_live_convert_html_to_pdf.py b/tests/live/test_live_convert_html_to_pdf.py new file mode 100644 index 00000000..507a09c9 --- /dev/null +++ b/tests/live/test_live_convert_html_to_pdf.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_html_for_pdf( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("sample.html") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +def test_live_convert_html_to_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_html_for_pdf: PdfRestFile, +) -> None: + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.convert_html_to_pdf( + uploaded_html_for_pdf, + output="live-html-file", + page_size="letter", + page_margin="8mm", + page_orientation="portrait", + web_layout="desktop", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) == str(uploaded_html_for_pdf.id) + assert output_file.name.startswith("live-html-file") + + +@pytest.mark.asyncio +async def test_live_async_convert_html_to_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_html_for_pdf: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.convert_html_to_pdf( + uploaded_html_for_pdf, + output="live-html-file-async", + page_orientation="landscape", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) == str(uploaded_html_for_pdf.id) + assert output_file.name.startswith("live-html-file-async") + + +def test_live_convert_html_to_pdf_invalid_page_size( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_html_for_pdf: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError, match=r"(?i)page_size|page size"), + ): + client.convert_html_to_pdf( + uploaded_html_for_pdf, + extra_body={"page_size": "poster"}, + ) + + +@pytest.mark.asyncio +async def test_live_async_convert_html_to_pdf_invalid_page_size( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_html_for_pdf: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError, match=r"(?i)page_size|page size"): + await client.convert_html_to_pdf( + uploaded_html_for_pdf, + extra_body={"page_size": "poster"}, + ) diff --git a/tests/live/test_live_convert_image_to_pdf.py b/tests/live/test_live_convert_image_to_pdf.py new file mode 100644 index 00000000..9a13364b --- /dev/null +++ b/tests/live/test_live_convert_image_to_pdf.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_image_for_pdf( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("ducky.png") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +def test_live_convert_image_to_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_image_for_pdf: PdfRestFile, +) -> None: + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.convert_image_to_pdf( + uploaded_image_for_pdf, + output="live-image", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) == str(uploaded_image_for_pdf.id) + assert output_file.name.startswith("live-image") + + +@pytest.mark.asyncio +async def test_live_async_convert_image_to_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_image_for_pdf: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.convert_image_to_pdf( + uploaded_image_for_pdf, + output="live-image-async", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) == str(uploaded_image_for_pdf.id) + assert output_file.name.startswith("live-image-async") + + +def test_live_convert_image_to_pdf_invalid_id_override( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_image_for_pdf: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError, match=r"(?i)id|file|resource|not found"), + ): + client.convert_image_to_pdf( + uploaded_image_for_pdf, + extra_body={"id": "00000000-0000-0000-0000-000000000000"}, + ) diff --git a/tests/live/test_live_convert_office_to_pdf.py b/tests/live/test_live_convert_office_to_pdf.py new file mode 100644 index 00000000..c213a194 --- /dev/null +++ b/tests/live/test_live_convert_office_to_pdf.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_docx_for_pdf( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("report.docx") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + ("output_name", "compression", "downsample"), + [ + pytest.param(None, None, None, id="defaults"), + pytest.param("live-docx", "lossless", 600, id="customized"), + ], +) +def test_live_convert_office_to_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_docx_for_pdf: PdfRestFile, + output_name: str | None, + compression: str | None, + downsample: int | None, +) -> None: + kwargs: dict[str, object] = {} + if output_name is not None: + kwargs["output"] = output_name + if compression is not None: + kwargs["compression"] = compression + if downsample is not None: + kwargs["downsample"] = downsample + + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.convert_office_to_pdf(uploaded_docx_for_pdf, **kwargs) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) == str(uploaded_docx_for_pdf.id) + if output_name is not None: + assert output_file.name.startswith(output_name) + else: + assert output_file.name.endswith(".pdf") + + +@pytest.mark.asyncio +async def test_live_async_convert_office_to_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_docx_for_pdf: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.convert_office_to_pdf( + uploaded_docx_for_pdf, + output="async-docx", + tagged_pdf=True, + ) + + assert response.output_files + output_file = response.output_file + assert output_file.name.startswith("async-docx") + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) == str(uploaded_docx_for_pdf.id) + + +def test_live_convert_office_to_pdf_invalid_downsample( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_docx_for_pdf: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError, match=r"(?i)downsample"), + ): + client.convert_office_to_pdf( + uploaded_docx_for_pdf, + extra_body={"downsample": 0}, + ) diff --git a/tests/live/test_live_convert_postscript_to_pdf.py b/tests/live/test_live_convert_postscript_to_pdf.py new file mode 100644 index 00000000..9b02145d --- /dev/null +++ b/tests/live/test_live_convert_postscript_to_pdf.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient +from pdfrest.models import PdfRestFile + +from ..resources import get_test_resource_path + + +@pytest.fixture(scope="module") +def uploaded_postscript_for_pdf( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> PdfRestFile: + resource = get_test_resource_path("sample.eps") + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + return client.files.create_from_paths([resource])[0] + + +@pytest.mark.parametrize( + ("output_name", "compression", "downsample"), + [ + pytest.param(None, None, None, id="defaults"), + pytest.param("live-postscript", "lossy", 300, id="customized"), + ], +) +def test_live_convert_postscript_to_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_postscript_for_pdf: PdfRestFile, + output_name: str | None, + compression: str | None, + downsample: int | None, +) -> None: + kwargs: dict[str, object] = {} + if output_name is not None: + kwargs["output"] = output_name + if compression is not None: + kwargs["compression"] = compression + if downsample is not None: + kwargs["downsample"] = downsample + + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.convert_postscript_to_pdf( + uploaded_postscript_for_pdf, + **kwargs, + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) == str(uploaded_postscript_for_pdf.id) + if output_name is not None: + assert output_file.name.startswith(output_name) + else: + assert output_file.name.endswith(".pdf") + + +@pytest.mark.asyncio +async def test_live_async_convert_postscript_to_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_postscript_for_pdf: PdfRestFile, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.convert_postscript_to_pdf( + uploaded_postscript_for_pdf, + output="live-postscript-async", + compression="lossless", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) == str(uploaded_postscript_for_pdf.id) + assert output_file.name.startswith("live-postscript-async") + + +def test_live_convert_postscript_to_pdf_invalid_downsample( + pdfrest_api_key: str, + pdfrest_live_base_url: str, + uploaded_postscript_for_pdf: PdfRestFile, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError, match=r"(?i)downsample"), + ): + client.convert_postscript_to_pdf( + uploaded_postscript_for_pdf, + extra_body={"downsample": 0}, + ) diff --git a/tests/live/test_live_convert_url_to_pdf.py b/tests/live/test_live_convert_url_to_pdf.py new file mode 100644 index 00000000..897f81cc --- /dev/null +++ b/tests/live/test_live_convert_url_to_pdf.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +import pytest + +from pdfrest import AsyncPdfRestClient, PdfRestApiError, PdfRestClient + +LIVE_HTML_URL = "https://example.com" + + +def test_live_convert_url_to_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + with PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = client.convert_url_to_pdf( + LIVE_HTML_URL, + output="live-html-url", + page_size="letter", + page_margin="8mm", + page_orientation="portrait", + web_layout="desktop", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) + assert output_file.name.startswith("live-html-url") + + +@pytest.mark.asyncio +async def test_live_async_convert_url_to_pdf_success( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + response = await client.convert_url_to_pdf( + LIVE_HTML_URL, + output="live-html-url-async", + page_orientation="portrait", + ) + + assert response.output_files + output_file = response.output_file + assert output_file.type == "application/pdf" + assert output_file.size > 0 + assert response.warning is None + assert str(response.input_id) + assert output_file.name.startswith("live-html-url-async") + + +def test_live_convert_url_to_pdf_invalid_page_size( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + with ( + PdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client, + pytest.raises(PdfRestApiError, match=r"(?i)page_size|page size"), + ): + client.convert_url_to_pdf( + LIVE_HTML_URL, + extra_body={"page_size": "poster"}, + ) + + +@pytest.mark.asyncio +async def test_live_async_convert_url_to_pdf_invalid_page_size( + pdfrest_api_key: str, + pdfrest_live_base_url: str, +) -> None: + async with AsyncPdfRestClient( + api_key=pdfrest_api_key, + base_url=pdfrest_live_base_url, + ) as client: + with pytest.raises(PdfRestApiError, match=r"(?i)page_size|page size"): + await client.convert_url_to_pdf( + LIVE_HTML_URL, + extra_body={"page_size": "poster"}, + ) diff --git a/tests/resources/ducky.png b/tests/resources/ducky.png new file mode 100644 index 00000000..5e40cc7b Binary files /dev/null and b/tests/resources/ducky.png differ diff --git a/tests/resources/sample.eps b/tests/resources/sample.eps new file mode 100644 index 00000000..4c534abd Binary files /dev/null and b/tests/resources/sample.eps differ diff --git a/tests/resources/sample.html b/tests/resources/sample.html new file mode 100644 index 00000000..ef114f96 --- /dev/null +++ b/tests/resources/sample.html @@ -0,0 +1,10 @@ + + + + pdfRest HTML Sample + + +

Hello from pdfRest

+

This is a sample HTML page used for live convert-to-pdf testing.

+ + diff --git a/tests/resources/test.eml b/tests/resources/test.eml new file mode 100644 index 00000000..35652b1e --- /dev/null +++ b/tests/resources/test.eml @@ -0,0 +1,15 @@ +From: sender@example.com +To: recipient@example.com +Subject: Test Email +Date: Fri, 7 Jun 2024 12:00:00 +0000 +Message-ID: <1234567890@example.com> +MIME-Version: 1.0 +Content-Type: text/plain; charset="UTF-8" +Content-Transfer-Encoding: 7bit + +Hello, + +This is a simple test email message sent as a sample .eml file. + +Best regards, +Sender diff --git a/tests/test_convert_email_to_pdf.py b/tests/test_convert_email_to_pdf.py new file mode 100644 index 00000000..754a2f45 --- /dev/null +++ b/tests/test_convert_email_to_pdf.py @@ -0,0 +1,276 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import ConvertEmailToPdfPayload + +from .convert_to_pdf_test_helpers import make_source_file +from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, build_file_info_payload + + +def test_convert_email_to_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(1)), + "message/rfc822", + "example.eml", + ) + output_id = str(PdfRestFileID.generate()) + payload_dump = ConvertEmailToPdfPayload.model_validate( + { + "files": [input_file], + "output": "converted", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "converted.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_email_to_pdf( + input_file, + output="converted", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "converted.pdf" + assert response.output_file.type == "application/pdf" + assert response.warning is None + assert str(response.input_id) == str(input_file.id) + + +def test_convert_email_to_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(1)), + "message/rfc822", + "message.eml", + ) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] is True + assert payload["id"] == str(input_file.id) + assert payload["output"] == "custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "custom.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_email_to_pdf( + input_file, + output="custom", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": True}, + timeout=0.5, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.5) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.5) + + +def test_convert_email_to_pdf_validation_errors() -> None: + email_file = make_source_file( + str(PdfRestFileID.generate(1)), + "message/rfc822", + "message.eml", + ) + image_file = make_source_file( + str(PdfRestFileID.generate(2)), + "image/jpeg", + "photo.jpg", + ) + + with pytest.raises(ValidationError, match="Must be an RFC822 email file"): + ConvertEmailToPdfPayload.model_validate({"files": [image_file]}) + + with pytest.raises( + ValidationError, + match="List should have at most 1 item after validation", + ): + ConvertEmailToPdfPayload.model_validate({"files": [email_file, image_file]}) + + +@pytest.mark.asyncio +async def test_async_convert_email_to_pdf_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(2)), + "message/rfc822", + "async-message.eml", + ) + output_id = str(PdfRestFileID.generate()) + payload_dump = ConvertEmailToPdfPayload.model_validate( + { + "files": [input_file], + "output": "async-converted", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "async-converted.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_email_to_pdf( + input_file, + output="async-converted", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-converted.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_convert_email_to_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(2)), + "message/rfc822", + "async-message.eml", + ) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] is True + assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "async-custom.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_email_to_pdf( + input_file, + output="async-custom", + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": True}, + timeout=0.6, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.6) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.6) diff --git a/tests/test_convert_html_to_pdf.py b/tests/test_convert_html_to_pdf.py new file mode 100644 index 00000000..294ab2ad --- /dev/null +++ b/tests/test_convert_html_to_pdf.py @@ -0,0 +1,356 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import ConvertHtmlToPdfPayload + +from .convert_to_pdf_test_helpers import make_source_file +from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, build_file_info_payload + + +def test_convert_html_to_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(2)), + "text/html", + "example.html", + ) + output_id = str(PdfRestFileID.generate()) + + payload_dump = ConvertHtmlToPdfPayload.model_validate( + { + "files": [input_file], + "output": "html-out", + "page_size": "A4", + "page_margin": "10mm", + "page_orientation": "landscape", + "web_layout": "tablet", + "compression": "lossy", + "downsample": 300, + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + "warning": "html warning", + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "html-out.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_html_to_pdf( + input_file, + output="html-out", + page_size="A4", + page_margin="10mm", + page_orientation="landscape", + web_layout="tablet", + compression="lossy", + ) + + assert response.output_file.name == "html-out.pdf" + assert response.output_file.type == "application/pdf" + assert response.warning == "html warning" + + +def test_convert_html_to_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(1)), + "text/html", + "example.html", + ) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] is True + assert payload["downsample"] == 300 + assert payload["id"] == str(input_file.id) + assert payload["output"] == "custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "custom.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_html_to_pdf( + input_file, + output="custom", + downsample=300, + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": True}, + timeout=0.5, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.5) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.5) + + +def test_convert_html_to_pdf_validation_errors() -> None: + with pytest.raises(ValidationError, match="Must be an HTML file"): + ConvertHtmlToPdfPayload.model_validate( + { + "files": [ + make_source_file( + str(PdfRestFileID.generate(2)), + "application/msword", + "example.doc", + ) + ] + } + ) + + with pytest.raises(ValidationError, match="String should match pattern"): + ConvertHtmlToPdfPayload.model_validate( + { + "files": [ + make_source_file( + str(PdfRestFileID.generate()), + "text/html", + "example.html", + ) + ], + "page_margin": "bad-margin", + } + ) + + +@pytest.mark.parametrize( + "page_margin", + [ + pytest.param("8mm", id="whole-millimeters"), + pytest.param("2.5in", id="decimal-inches"), + pytest.param("10.25mm", id="long-decimal-millimeters"), + pytest.param("0in", id="zero-inches"), + ], +) +def test_convert_html_to_pdf_page_margin_accepts_documented_values( + page_margin: str, +) -> None: + payload = ConvertHtmlToPdfPayload.model_validate( + { + "files": [ + make_source_file( + str(PdfRestFileID.generate()), + "text/html", + "example.html", + ) + ], + "page_margin": page_margin, + } + ) + + assert payload.page_margin == page_margin + + +@pytest.mark.parametrize( + "page_margin", + [ + pytest.param("8", id="missing-unit"), + pytest.param("mm", id="missing-number"), + pytest.param("2.5 in", id="embedded-space"), + pytest.param("8MM", id="uppercase-unit"), + pytest.param(" 8mm", id="leading-space"), + pytest.param("8mm ", id="trailing-space"), + ], +) +def test_convert_html_to_pdf_page_margin_rejects_invalid_values( + page_margin: str, +) -> None: + with pytest.raises(ValidationError, match="String should match pattern"): + ConvertHtmlToPdfPayload.model_validate( + { + "files": [ + make_source_file( + str(PdfRestFileID.generate()), + "text/html", + "example.html", + ) + ], + "page_margin": page_margin, + } + ) + + +@pytest.mark.asyncio +async def test_async_convert_html_to_pdf_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(2)), + "text/html", + "example.html", + ) + output_id = str(PdfRestFileID.generate()) + payload_dump = ConvertHtmlToPdfPayload.model_validate( + { + "files": [input_file], + "output": "async-converted", + "compression": "lossy", + "downsample": 300, + "page_size": "letter", + "page_margin": "1.0in", + "page_orientation": "portrait", + "web_layout": "desktop", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "async-converted.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_html_to_pdf( + input_file, + output="async-converted", + page_orientation="portrait", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-converted.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_convert_html_to_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(2)), + "text/html", + "example.html", + ) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] is True + assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" + assert payload["downsample"] == 300 + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "async-custom.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_html_to_pdf( + input_file, + output="async-custom", + downsample=300, + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": True}, + timeout=0.6, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.6) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.6) diff --git a/tests/test_convert_image_to_pdf.py b/tests/test_convert_image_to_pdf.py new file mode 100644 index 00000000..5c569a06 --- /dev/null +++ b/tests/test_convert_image_to_pdf.py @@ -0,0 +1,269 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import ConvertImageToPdfPayload + +from .convert_to_pdf_test_helpers import make_source_file +from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, build_file_info_payload + + +def test_convert_image_to_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(1)), + "image/png", + "example.png", + ) + output_id = str(PdfRestFileID.generate()) + payload_dump = ConvertImageToPdfPayload.model_validate( + { + "files": [input_file], + "output": "converted", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "converted.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_image_to_pdf( + input_file, + output="converted", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "converted.pdf" + assert response.output_file.type == "application/pdf" + assert response.warning is None + assert str(response.input_id) == str(input_file.id) + + +def test_convert_image_to_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(1)), + "image/jpeg", + "photo.jpg", + ) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] is True + assert payload["id"] == str(input_file.id) + assert payload["output"] == "custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "custom.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_image_to_pdf( + input_file, + output="custom", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": True}, + timeout=0.5, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.5) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.5) + + +def test_convert_image_to_pdf_validation_errors() -> None: + with pytest.raises(ValidationError, match="Must be a supported image file type"): + ConvertImageToPdfPayload.model_validate( + { + "files": [ + make_source_file( + str(PdfRestFileID.generate(2)), + "text/html", + "example.html", + ) + ] + } + ) + + +@pytest.mark.asyncio +async def test_async_convert_image_to_pdf_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(2)), + "image/tiff", + "scan.tiff", + ) + output_id = str(PdfRestFileID.generate()) + payload_dump = ConvertImageToPdfPayload.model_validate( + { + "files": [input_file], + "output": "async-converted", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "async-converted.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_image_to_pdf( + input_file, + output="async-converted", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-converted.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_convert_image_to_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(2)), + "image/png", + "async.png", + ) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] is True + assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "async-custom.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_image_to_pdf( + input_file, + output="async-custom", + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": True}, + timeout=0.6, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.6) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.6) diff --git a/tests/test_convert_office_to_pdf.py b/tests/test_convert_office_to_pdf.py new file mode 100644 index 00000000..7a77f29b --- /dev/null +++ b/tests/test_convert_office_to_pdf.py @@ -0,0 +1,306 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import ConvertOfficeToPdfPayload + +from .convert_to_pdf_test_helpers import make_source_file +from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, build_file_info_payload + + +def test_convert_office_to_pdf_word_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(1)), + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "example.docx", + ) + output_id = str(PdfRestFileID.generate()) + payload_dump = ConvertOfficeToPdfPayload.model_validate( + { + "files": [input_file], + "output": "converted", + "compression": "lossless", + "downsample": 150, + "tagged_pdf": True, + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "converted.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_office_to_pdf( + input_file, + output="converted", + compression="lossless", + downsample=150, + tagged_pdf=True, + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "converted.pdf" + assert response.output_file.type == "application/pdf" + assert response.warning is None + assert str(response.input_id) == str(input_file.id) + + +def test_convert_office_to_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(1)), + "application/vnd.ms-excel", + "example.xls", + ) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] is True + assert payload["compression"] == "lossy" + assert payload["downsample"] == 600 + assert payload["locale"] == "US" + assert payload["id"] == str(input_file.id) + assert payload["output"] == "custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "custom.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_office_to_pdf( + input_file, + output="custom", + compression="lossy", + downsample=600, + locale="US", + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": True}, + timeout=0.5, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.5) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.5) + + +def test_convert_office_to_pdf_validation_errors() -> None: + word_file = make_source_file( + str(PdfRestFileID.generate(1)), + "application/msword", + "example.doc", + ) + image_file = make_source_file( + str(PdfRestFileID.generate(2)), + "image/jpeg", + "photo.jpg", + ) + + with pytest.raises(ValidationError, match="Must be a Microsoft Office file"): + ConvertOfficeToPdfPayload.model_validate({"files": [image_file]}) + + with pytest.raises( + ValidationError, match="locale is only supported for Excel inputs" + ): + ConvertOfficeToPdfPayload.model_validate( + {"files": [word_file], "locale": "Germany"} + ) + + with pytest.raises( + ValidationError, + match="List should have at most 1 item after validation", + ): + ConvertOfficeToPdfPayload.model_validate({"files": [word_file, image_file]}) + + +@pytest.mark.asyncio +async def test_async_convert_office_to_pdf_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(2)), + "application/vnd.ms-excel", + "sheet.xls", + ) + output_id = str(PdfRestFileID.generate()) + payload_dump = ConvertOfficeToPdfPayload.model_validate( + { + "files": [input_file], + "output": "async-converted", + "compression": "lossy", + "downsample": 300, + "tagged_pdf": False, + "locale": "US", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "async-converted.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_office_to_pdf( + input_file, + output="async-converted", + locale="US", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-converted.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_convert_office_to_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(2)), + "application/vnd.ms-excel", + "sheet.xls", + ) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] is True + assert payload["compression"] == "lossy" + assert payload["downsample"] == 600 + assert payload["locale"] == "US" + assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "async-custom.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_office_to_pdf( + input_file, + output="async-custom", + compression="lossy", + downsample=600, + locale="US", + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": True}, + timeout=0.6, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.6) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.6) diff --git a/tests/test_convert_postscript_to_pdf.py b/tests/test_convert_postscript_to_pdf.py new file mode 100644 index 00000000..20ae98ac --- /dev/null +++ b/tests/test_convert_postscript_to_pdf.py @@ -0,0 +1,283 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import ConvertPostscriptToPdfPayload + +from .convert_to_pdf_test_helpers import make_source_file +from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, build_file_info_payload + + +def test_convert_postscript_to_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(1)), + "application/postscript", + "example.ps", + ) + output_id = str(PdfRestFileID.generate()) + payload_dump = ConvertPostscriptToPdfPayload.model_validate( + { + "files": [input_file], + "output": "converted", + "compression": "lossless", + "downsample": 150, + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + assert request.url.params["format"] == "info" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "converted.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_postscript_to_pdf( + input_file, + output="converted", + compression="lossless", + downsample=150, + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "converted.pdf" + assert response.output_file.type == "application/pdf" + assert response.warning is None + assert str(response.input_id) == str(input_file.id) + + +def test_convert_postscript_to_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(1)), + "application/eps", + "example.eps", + ) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] is True + assert payload["compression"] == "lossy" + assert payload["downsample"] == 600 + assert payload["id"] == str(input_file.id) + assert payload["output"] == "custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["trace"] == "true" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "custom.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_postscript_to_pdf( + input_file, + output="custom", + compression="lossy", + downsample=600, + extra_query={"trace": "true"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": True}, + timeout=0.5, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.5) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.5) + + +def test_convert_postscript_to_pdf_validation_errors() -> None: + with pytest.raises(ValidationError, match="Must be a PostScript or EPS file"): + ConvertPostscriptToPdfPayload.model_validate( + { + "files": [ + make_source_file( + str(PdfRestFileID.generate(2)), + "message/rfc822", + "message.eml", + ) + ] + } + ) + + +@pytest.mark.asyncio +async def test_async_convert_postscript_to_pdf_success( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(2)), + "application/x-eps", + "graphic.eps", + ) + output_id = str(PdfRestFileID.generate()) + payload_dump = ConvertPostscriptToPdfPayload.model_validate( + { + "files": [input_file], + "output": "async-converted", + "compression": "lossless", + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + seen: dict[str, int] = {"post": 0, "get": 0} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + seen["post"] += 1 + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + seen["get"] += 1 + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "async-converted.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_postscript_to_pdf( + input_file, + output="async-converted", + compression="lossless", + ) + + assert seen == {"post": 1, "get": 1} + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-converted.pdf" + assert response.output_file.type == "application/pdf" + assert str(response.input_id) == str(input_file.id) + + +@pytest.mark.asyncio +async def test_async_convert_postscript_to_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + input_file = make_source_file( + str(PdfRestFileID.generate(2)), + "application/postscript", + "graphic.ps", + ) + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["debug"] is True + assert payload["compression"] == "lossy" + assert payload["downsample"] == 600 + assert payload["id"] == str(input_file.id) + assert payload["output"] == "async-custom" + return httpx.Response( + 200, + json={ + "inputId": [input_file.id], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "async-custom.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_postscript_to_pdf( + input_file, + output="async-custom", + compression="lossy", + downsample=600, + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": True}, + timeout=0.6, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-custom.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.6) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.6) diff --git a/tests/test_convert_url_to_pdf.py b/tests/test_convert_url_to_pdf.py new file mode 100644 index 00000000..7f78bdbd --- /dev/null +++ b/tests/test_convert_url_to_pdf.py @@ -0,0 +1,247 @@ +from __future__ import annotations + +import json + +import httpx +import pytest +from pydantic import ValidationError + +from pdfrest import AsyncPdfRestClient, PdfRestClient +from pdfrest.models import PdfRestFileBasedResponse, PdfRestFileID +from pdfrest.models._internal import ConvertUrlToPdfPayload + +from .graphics_test_helpers import ASYNC_API_KEY, VALID_API_KEY, build_file_info_payload + + +def test_convert_url_to_pdf_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + url = "https://example.com/page" + output_id = str(PdfRestFileID.generate()) + payload_dump = ConvertUrlToPdfPayload.model_validate( + { + "url": url, + "output": "url-out", + "compression": "lossy", + "page_size": "letter", + "page_margin": "2.5in", + "page_orientation": "portrait", + "web_layout": "desktop", + "downsample": 300, + } + ).model_dump(mode="json", by_alias=True, exclude_none=True, exclude_unset=True) + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + payload = json.loads(request.content.decode("utf-8")) + assert payload == payload_dump + return httpx.Response( + 200, + json={ + "inputId": [PdfRestFileID.generate()], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "url-out.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_url_to_pdf( + url, + output="url-out", + page_size="letter", + page_margin="2.5in", + page_orientation="portrait", + web_layout="desktop", + downsample=300, + ) + + assert response.output_file.name == "url-out.pdf" + assert response.output_file.type == "application/pdf" + + +def test_convert_url_to_pdf_validation_errors() -> None: + with pytest.raises(ValidationError, match="Input should be a valid URL"): + ConvertUrlToPdfPayload.model_validate({"url": "not-a-url"}) + + with pytest.raises(ValidationError, match="at least 1 item"): + ConvertUrlToPdfPayload.model_validate({"url": []}) + + with pytest.raises(ValidationError, match="at most 1 item"): + ConvertUrlToPdfPayload.model_validate( + {"url": ["https://example.com/one", "https://example.com/two"]} + ) + + with pytest.raises(ValidationError, match="String should match pattern"): + ConvertUrlToPdfPayload.model_validate( + {"url": "https://example.com", "page_margin": "mm"} + ) + + +@pytest.mark.parametrize( + "page_margin", + [ + pytest.param("8mm", id="whole-millimeters"), + pytest.param("2.5in", id="decimal-inches"), + pytest.param("10.25mm", id="long-decimal-millimeters"), + pytest.param("0in", id="zero-inches"), + ], +) +def test_convert_url_to_pdf_page_margin_accepts_documented_values( + page_margin: str, +) -> None: + payload = ConvertUrlToPdfPayload.model_validate( + {"url": "https://example.com/page", "page_margin": page_margin} + ) + + assert payload.page_margin == page_margin + + +@pytest.mark.parametrize( + "page_margin", + [ + pytest.param("8", id="missing-unit"), + pytest.param("mm", id="missing-number"), + pytest.param("2.5 in", id="embedded-space"), + pytest.param("8MM", id="uppercase-unit"), + pytest.param(" 8mm", id="leading-space"), + pytest.param("8mm ", id="trailing-space"), + ], +) +def test_convert_url_to_pdf_page_margin_rejects_invalid_values( + page_margin: str, +) -> None: + with pytest.raises(ValidationError, match="String should match pattern"): + ConvertUrlToPdfPayload.model_validate( + {"url": "https://example.com/page", "page_margin": page_margin} + ) + + +def test_convert_url_to_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + url = "https://example.com/page" + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + assert request.url.params["trace"] == "sync" + assert request.headers["X-Debug"] == "sync" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["url"] == url + assert payload["output"] == "sync-url" + assert payload["page_orientation"] == "portrait" + assert payload["debug"] == "yes" + return httpx.Response( + 200, + json={ + "inputId": [PdfRestFileID.generate()], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["trace"] == "sync" + assert request.headers["X-Debug"] == "sync" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "sync-url.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + with PdfRestClient(api_key=VALID_API_KEY, transport=transport) as client: + response = client.convert_url_to_pdf( + url, + output="sync-url", + page_orientation="portrait", + extra_query={"trace": "sync"}, + extra_headers={"X-Debug": "sync"}, + extra_body={"debug": "yes"}, + timeout=0.5, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "sync-url.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.5) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.5) + + +@pytest.mark.asyncio +async def test_async_convert_url_to_pdf_request_customization( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.delenv("PDFREST_API_KEY", raising=False) + url = "https://example.com/page" + output_id = str(PdfRestFileID.generate()) + captured_timeout: dict[str, float | dict[str, float] | None] = {} + + def handler(request: httpx.Request) -> httpx.Response: + if request.method == "POST" and request.url.path == "/pdf": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + captured_timeout["value"] = request.extensions.get("timeout") + payload = json.loads(request.content.decode("utf-8")) + assert payload["url"] == url + assert payload["output"] == "async-url" + assert payload["page_orientation"] == "portrait" + assert payload["debug"] == "yes" + return httpx.Response( + 200, + json={ + "inputId": [PdfRestFileID.generate()], + "outputId": [output_id], + }, + ) + if request.method == "GET" and request.url.path == f"/resource/{output_id}": + assert request.url.params["trace"] == "async" + assert request.headers["X-Debug"] == "async" + return httpx.Response( + 200, + json=build_file_info_payload( + output_id, "async-url.pdf", "application/pdf" + ), + ) + msg = f"Unexpected request {request.method} {request.url}" + raise AssertionError(msg) + + transport = httpx.MockTransport(handler) + async with AsyncPdfRestClient(api_key=ASYNC_API_KEY, transport=transport) as client: + response = await client.convert_url_to_pdf( + url, + output="async-url", + page_orientation="portrait", + extra_query={"trace": "async"}, + extra_headers={"X-Debug": "async"}, + extra_body={"debug": "yes"}, + timeout=0.6, + ) + + assert isinstance(response, PdfRestFileBasedResponse) + assert response.output_file.name == "async-url.pdf" + timeout_value = captured_timeout["value"] + assert timeout_value is not None + if isinstance(timeout_value, dict): + assert all( + component == pytest.approx(0.6) for component in timeout_value.values() + ) + else: + assert timeout_value == pytest.approx(0.6)