Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
436 changes: 436 additions & 0 deletions src/pdfrest/client.py

Large diffs are not rendered by default.

283 changes: 283 additions & 0 deletions src/pdfrest/models/_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,14 @@
from pdfrest.types.public import PdfRedactionPreset

from ..types import (
HtmlPageOrientation,
HtmlPageSize,
HtmlWebLayout,
OcrLanguage,
PdfAType,
PdfConversionCompression,
PdfConversionDownsample,
PdfConversionLocale,
PdfInfoQuery,
PdfPageOrientation,
PdfPageSize,
Expand Down Expand Up @@ -114,6 +120,10 @@ def _serialize_as_first_file_id(value: list[PdfRestFile]) -> str:
return str(value[0].id)


def _serialize_as_first_url(value: list[HttpUrl]) -> str:
return str(value[0])


def _serialize_as_comma_separated_string(value: list[Any] | None) -> str | None:
if value is None:
return None
Expand Down Expand Up @@ -243,6 +253,9 @@ def _validate_output_language(value: str) -> str:
return trimmed


_PAGE_MARGIN_REGEX = r"^(?:\d+(?:\.\d+)?)(?:mm|in)$"


class UploadURLs(BaseModel):
url: Annotated[
list[HttpUrl] | HttpUrl,
Expand Down Expand Up @@ -551,6 +564,276 @@ class ConvertToMarkdownPayload(BaseModel):
] = None


_PDF_WORD_MIME_TYPES = {
"application/msword",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
}
_PDF_EXCEL_MIME_TYPES = {
"application/vnd.ms-excel",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
}
_PDF_POWERPOINT_MIME_TYPES = {
"application/vnd.ms-powerpoint",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
}
_PDF_OFFICE_MIME_TYPES = (
_PDF_WORD_MIME_TYPES | _PDF_EXCEL_MIME_TYPES | _PDF_POWERPOINT_MIME_TYPES
)
_PDF_POSTSCRIPT_MIME_TYPES = {
"application/postscript",
"application/eps",
"application/x-eps",
}
_PDF_EMAIL_MIME_TYPES = {"message/rfc822"}
_PDF_IMAGE_MIME_TYPES = {
"image/jpeg",
"image/tiff",
"image/bmp",
"image/png",
}
_PDF_HTML_MIME_TYPES = {"text/html"}


class ConvertOfficeToPdfPayload(BaseModel):
"""Adapt caller options into a pdfRest-ready office-to-pdf payload."""

files: Annotated[
list[PdfRestFile],
Field(
min_length=1,
max_length=1,
validation_alias=AliasChoices("file", "files"),
serialization_alias="id",
),
BeforeValidator(_ensure_list),
AfterValidator(
_allowed_mime_types(
*_PDF_OFFICE_MIME_TYPES,
error_msg="Must be a Microsoft Office file.",
)
),
PlainSerializer(_serialize_as_first_file_id),
]
output: Annotated[
str | None,
Field(serialization_alias="output", min_length=1, default=None),
AfterValidator(_validate_output_prefix),
] = None
compression: Annotated[
PdfConversionCompression | None,
Field(serialization_alias="compression", default=None),
] = None
downsample: Annotated[
PdfConversionDownsample | None,
Field(serialization_alias="downsample", default=None),
] = None
tagged_pdf: Annotated[
Literal["on", "off"] | None,
Field(serialization_alias="tagged_pdf", default=None),
BeforeValidator(_bool_to_on_off),
] = None
locale: Annotated[
PdfConversionLocale | None,
Field(serialization_alias="locale", default=None),
] = None

@model_validator(mode="after")
def _validate_option_compatibility(self) -> ConvertOfficeToPdfPayload:
mime_type = self.files[0].type
if self.locale is not None and mime_type not in _PDF_EXCEL_MIME_TYPES:
msg = "locale is only supported for Excel inputs."
raise ValueError(msg)
return self


class ConvertPostscriptToPdfPayload(BaseModel):
"""Adapt caller options into a pdfRest-ready postscript-to-pdf payload."""

files: Annotated[
list[PdfRestFile],
Field(
min_length=1,
max_length=1,
validation_alias=AliasChoices("file", "files"),
serialization_alias="id",
),
BeforeValidator(_ensure_list),
AfterValidator(
_allowed_mime_types(
*_PDF_POSTSCRIPT_MIME_TYPES,
error_msg="Must be a PostScript or EPS file.",
)
),
PlainSerializer(_serialize_as_first_file_id),
]
output: Annotated[
str | None,
Field(serialization_alias="output", min_length=1, default=None),
AfterValidator(_validate_output_prefix),
] = None
compression: Annotated[
PdfConversionCompression | None,
Field(serialization_alias="compression", default=None),
] = None
downsample: Annotated[
PdfConversionDownsample | None,
Field(serialization_alias="downsample", default=None),
] = None


class ConvertEmailToPdfPayload(BaseModel):
"""Adapt caller options into a pdfRest-ready email-to-pdf payload."""

files: Annotated[
list[PdfRestFile],
Field(
min_length=1,
max_length=1,
validation_alias=AliasChoices("file", "files"),
serialization_alias="id",
),
BeforeValidator(_ensure_list),
AfterValidator(
_allowed_mime_types(
*_PDF_EMAIL_MIME_TYPES,
error_msg="Must be an RFC822 email file.",
)
),
PlainSerializer(_serialize_as_first_file_id),
]
output: Annotated[
str | None,
Field(serialization_alias="output", min_length=1, default=None),
AfterValidator(_validate_output_prefix),
] = None


class ConvertImageToPdfPayload(BaseModel):
"""Adapt caller options into a pdfRest-ready image-to-pdf payload."""

files: Annotated[
list[PdfRestFile],
Field(
min_length=1,
max_length=1,
validation_alias=AliasChoices("file", "files"),
serialization_alias="id",
),
BeforeValidator(_ensure_list),
AfterValidator(
_allowed_mime_types(
*_PDF_IMAGE_MIME_TYPES,
error_msg="Must be a supported image file type.",
)
),
PlainSerializer(_serialize_as_first_file_id),
]
output: Annotated[
str | None,
Field(serialization_alias="output", min_length=1, default=None),
AfterValidator(_validate_output_prefix),
] = None


class ConvertHtmlToPdfPayload(BaseModel):
"""Adapt caller options into a pdfRest-ready html-to-pdf payload."""

files: Annotated[
list[PdfRestFile],
Field(
min_length=1,
max_length=1,
validation_alias=AliasChoices("file", "files"),
serialization_alias="id",
),
BeforeValidator(_ensure_list),
AfterValidator(
_allowed_mime_types(
*_PDF_HTML_MIME_TYPES,
error_msg="Must be an HTML file.",
)
),
PlainSerializer(_serialize_as_first_file_id),
]
output: Annotated[
str | None,
Field(serialization_alias="output", min_length=1, default=None),
AfterValidator(_validate_output_prefix),
] = None
compression: Annotated[
PdfConversionCompression | None,
Field(serialization_alias="compression", default=None),
] = None
downsample: Annotated[
PdfConversionDownsample | None,
Field(serialization_alias="downsample", default=None),
] = None
page_size: Annotated[
HtmlPageSize | None,
Field(serialization_alias="page_size", default=None),
] = None
page_margin: Annotated[
str | None,
Field(
serialization_alias="page_margin",
pattern=_PAGE_MARGIN_REGEX,
default=None,
),
] = None
page_orientation: Annotated[
HtmlPageOrientation | None,
Field(serialization_alias="page_orientation", default=None),
] = None
web_layout: Annotated[
HtmlWebLayout | None,
Field(serialization_alias="web_layout", default=None),
] = None


class ConvertUrlToPdfPayload(BaseModel):
"""Adapt caller options into a pdfRest-ready convert-to-pdf payload for one URL."""

url: Annotated[
list[HttpUrl],
Field(serialization_alias="url", min_length=1, max_length=1),
BeforeValidator(_ensure_list),
PlainSerializer(_serialize_as_first_url),
]
output: Annotated[
str | None,
Field(serialization_alias="output", min_length=1, default=None),
AfterValidator(_validate_output_prefix),
] = None
compression: Annotated[
PdfConversionCompression | None,
Field(serialization_alias="compression", default=None),
] = None
downsample: Annotated[
PdfConversionDownsample | None,
Field(serialization_alias="downsample", default=None),
] = None
page_size: Annotated[
HtmlPageSize | None,
Field(serialization_alias="page_size", default=None),
] = None
page_margin: Annotated[
str | None,
Field(
serialization_alias="page_margin",
pattern=_PAGE_MARGIN_REGEX,
default=None,
),
] = None
page_orientation: Annotated[
HtmlPageOrientation | None,
Field(serialization_alias="page_orientation", default=None),
] = None
web_layout: Annotated[
HtmlWebLayout | None,
Field(serialization_alias="web_layout", default=None),
] = None


class TranslatePdfTextPayload(BaseModel):
"""Adapt caller options into a pdfRest-ready translate request payload."""

Expand Down
12 changes: 12 additions & 0 deletions src/pdfrest/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,17 @@
FlattenQuality,
GifColorModel,
GraphicSmoothing,
HtmlPageOrientation,
HtmlPageSize,
HtmlWebLayout,
JpegColorModel,
OcrLanguage,
PdfAddTextObject,
PdfAType,
PdfCmykColor,
PdfConversionCompression,
PdfConversionDownsample,
PdfConversionLocale,
PdfCustomPageSize,
PdfInfoQuery,
PdfMergeInput,
Expand Down Expand Up @@ -46,11 +52,17 @@
"FlattenQuality",
"GifColorModel",
"GraphicSmoothing",
"HtmlPageOrientation",
"HtmlPageSize",
"HtmlWebLayout",
"JpegColorModel",
"OcrLanguage",
"PdfAType",
"PdfAddTextObject",
"PdfCmykColor",
"PdfConversionCompression",
"PdfConversionDownsample",
"PdfConversionLocale",
"PdfCustomPageSize",
"PdfInfoQuery",
"PdfMergeInput",
Expand Down
12 changes: 12 additions & 0 deletions src/pdfrest/types/public.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,17 @@
"FlattenQuality",
"GifColorModel",
"GraphicSmoothing",
"HtmlPageOrientation",
"HtmlPageSize",
"HtmlWebLayout",
"JpegColorModel",
"OcrLanguage",
"PdfAType",
"PdfAddTextObject",
"PdfCmykColor",
"PdfConversionCompression",
"PdfConversionDownsample",
"PdfConversionLocale",
"PdfCustomPageSize",
"PdfInfoQuery",
"PdfMergeInput",
Expand Down Expand Up @@ -144,6 +150,12 @@ class PdfMergeSource(TypedDict, total=False):

PdfMergeInput = PdfRestFile | PdfMergeSource | tuple[PdfRestFile, PdfPageSelection]

PdfConversionCompression = Literal["lossy", "lossless"]
PdfConversionDownsample = Literal["off", 75, 150, 300, 600, 1200]
PdfConversionLocale = Literal["US", "Germany"]
HtmlPageSize = Literal["letter", "legal", "ledger", "A3", "A4", "A5"]
HtmlPageOrientation = Literal["portrait", "landscape"]
HtmlWebLayout = Literal["desktop", "tablet", "mobile"]
PdfAType = Literal["PDF/A-1b", "PDF/A-2b", "PDF/A-2u", "PDF/A-3b", "PDF/A-3u"]
PdfXType = Literal["PDF/X-1a", "PDF/X-3", "PDF/X-4", "PDF/X-6"]
ExtractTextGranularity = Literal["off", "by_page", "document"]
Expand Down
17 changes: 17 additions & 0 deletions tests/convert_to_pdf_test_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from __future__ import annotations

from pdfrest.models import PdfRestFile


def make_source_file(file_id: str, mime_type: str, name: str) -> PdfRestFile:
return PdfRestFile.model_validate(
{
"id": file_id,
"name": name,
"url": f"https://api.pdfrest.com/resource/{file_id}",
"type": mime_type,
"size": 512,
"modified": "2024-01-01T00:00:00Z",
"scheduledDeletionTimeUtc": None,
}
)
Loading