From 6369d76e9543b8d41efbfebc448e715f3cd37eac Mon Sep 17 00:00:00 2001 From: William Lima Date: Sun, 31 May 2026 18:10:06 -0100 Subject: [PATCH 1/6] chore(repo): ignore Python artifacts and remove tracked bytecode --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..185d1ab --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*.egg-info/ +.pytest_cache/ +.mypy_cache/ From d5e559f92385f8a38d5182a615bcf5452df177d1 Mon Sep 17 00:00:00 2001 From: William Lima Date: Sun, 31 May 2026 18:10:20 -0100 Subject: [PATCH 2/6] feat(dwg-preview): add DWG to SVG preview endpoint --- ckanext/malmo/dwg_preview.py | 565 ++++++++++++++++++++++++++++++++++ ckanext/malmo/logic/action.py | 18 ++ ckanext/malmo/plugin.py | 7 + ckanext/malmo/views.py | 103 +++++++ requirements.txt | 3 +- setup.py | 3 +- 6 files changed, 697 insertions(+), 2 deletions(-) create mode 100644 ckanext/malmo/dwg_preview.py create mode 100644 ckanext/malmo/logic/action.py create mode 100644 ckanext/malmo/views.py diff --git a/ckanext/malmo/dwg_preview.py b/ckanext/malmo/dwg_preview.py new file mode 100644 index 0000000..5cbf7e9 --- /dev/null +++ b/ckanext/malmo/dwg_preview.py @@ -0,0 +1,565 @@ +from __future__ import annotations + +import logging +import os +import re +import shutil +import subprocess +from tempfile import TemporaryDirectory +from typing import Any +from urllib.parse import urlparse + +import requests + +import ckan.lib.uploader as uploader +import ckan.logic as logic +from ckan.plugins import toolkit + +log = logging.getLogger(__name__) + +ValidationError = logic.ValidationError +NotAuthorized = logic.NotAuthorized +NotFound = logic.NotFound + +DWG_MIME_TYPES = { + "application/acad", + "application/autocad_dwg", + "application/dwg", + "application/x-acad", + "application/x-autocad", + "application/x-dwg", + "image/vnd.dwg", + "image/x-dwg", +} +DEFAULT_TIMEOUT_SECONDS = 30 +DEFAULT_DOWNLOAD_TIMEOUT_SECONDS = 30 +DEFAULT_MAX_DOWNLOAD_BYTES = 100 * 1024 * 1024 +DOWNLOAD_CHUNK_SIZE = 64 * 1024 +SVG_VIEWBOX_RE = re.compile( + r'viewBox="(?P-?\d+(?:\.\d+)?)\s+' + r'(?P-?\d+(?:\.\d+)?)\s+' + r'(?P\d+(?:\.\d+)?)\s+' + r'(?P\d+(?:\.\d+)?)"' +) +SVG_ROOT_TAG_RE = re.compile(r"]*>", re.IGNORECASE | re.DOTALL) +SVG_WIDTH_ATTR_RE = re.compile(r'width="[^"]*"', re.IGNORECASE) +SVG_HEIGHT_ATTR_RE = re.compile(r'height="[^"]*"', re.IGNORECASE) +SVG_DRAWABLE_TAG_RE = re.compile( + r"<(?:use|path|line|polyline|polygon|circle|ellipse|text)\b", + re.IGNORECASE, +) + + +def build_preview_payload(context: dict[str, Any], data_dict: dict[str, Any]) -> dict[str, Any]: + """ + Build a binary preview payload from a DWG resource. + + The returned dictionary is meant for internal Python callers. The Flask + route turns this payload into an HTTP response with the correct mimetype. + """ + resource_id = (data_dict or {}).get("resource_id") + if not resource_id: + raise ValidationError({"resource_id": ["Missing value"]}) + + resource = _get_resource_for_preview(context, resource_id) + if not _is_dwg_resource(resource): + raise ValidationError( + {"resource_id": ["Resource must be a DWG file to generate a preview"]} + ) + + conversion_timeout = _get_int_config( + "ckanext.malmo.dwg_preview_timeout", + DEFAULT_TIMEOUT_SECONDS, + ) + download_timeout = _get_int_config( + "ckanext.malmo.dwg_preview_download_timeout", + DEFAULT_DOWNLOAD_TIMEOUT_SECONDS, + ) + max_download_bytes = _get_int_config( + "ckanext.malmo.dwg_preview_max_download_bytes", + DEFAULT_MAX_DOWNLOAD_BYTES, + ) + + with TemporaryDirectory(prefix="ckan-dwg-preview-") as tmp_dir: + source_path = _stage_resource_dwg( + resource, + tmp_dir, + max_download_bytes=max_download_bytes, + download_timeout=download_timeout, + ) + output_path = _convert_dwg_to_best_svg( + source_path, + tmp_dir, + timeout=conversion_timeout, + ) + with open(output_path, "rb") as output_file: + content = output_file.read() + + return { + "content": content, + "filename": _build_output_filename(resource), + "mimetype": "image/svg+xml", + "resource_id": resource_id, + } + + +def _get_resource_for_preview(context: dict[str, Any], resource_id: str) -> dict[str, Any]: + try: + return toolkit.get_action("resource_show")(context, {"id": resource_id}) + except NotFound: + raise ValidationError({"resource_id": ["Resource does not exist"]}) + except NotAuthorized: + raise ValidationError({"resource_id": ["User cannot view this resource"]}) + + +def _is_dwg_resource(resource: dict[str, Any]) -> bool: + resource_format = str(resource.get("format") or "").strip().lower() + if resource_format: + normalized_format = resource_format.lstrip(".") + if normalized_format == "dwg" or "dwg" in normalized_format: + return True + + for mime_field in ("mimetype", "mimetype_inner"): + mimetype_value = str(resource.get(mime_field) or "").strip().lower() + if mimetype_value in DWG_MIME_TYPES or mimetype_value.endswith("/dwg"): + return True + + for path_field in ("url", "name"): + raw_value = str(resource.get(path_field) or "") + extension = os.path.splitext(urlparse(raw_value).path)[1].lower() + if extension == ".dwg": + return True + + return False + + +def _stage_resource_dwg( + resource: dict[str, Any], + tmp_dir: str, + max_download_bytes: int, + download_timeout: int, +) -> str: + source_path = os.path.join(tmp_dir, "source.dwg") + + if resource.get("url_type") == "upload": + _copy_uploaded_resource( + resource, + source_path, + max_download_bytes=max_download_bytes, + download_timeout=download_timeout, + ) + else: + resource_url = str(resource.get("url") or "").strip() + if not resource_url: + raise ValidationError({"resource_id": ["Resource does not have a downloadable URL"]}) + _download_to_path( + resource_url, + source_path, + max_download_bytes=max_download_bytes, + download_timeout=download_timeout, + source_label="external DWG resource", + ) + + if not os.path.exists(source_path) or os.path.getsize(source_path) == 0: + raise ValidationError({"resource_id": ["DWG source file could not be prepared"]}) + + return source_path + + +def _copy_uploaded_resource( + resource: dict[str, Any], + destination_path: str, + max_download_bytes: int, + download_timeout: int, +) -> None: + """ + Resolve a CKAN-uploaded file into a temp file. + + The filesystem branch matches the default CKAN storage backend. The signed + URL branch is an adaptation point for storage backends such as + ckanext-s3filestore, which this repository currently enables. + """ + resource_upload = uploader.get_resource_uploader(dict(resource)) + resource_id = resource["id"] + resource_name = os.path.basename(str(resource.get("url") or "")) or f"{resource_id}.dwg" + + local_path = None + try: + local_path = resource_upload.get_path(resource_id) + except TypeError: + # Some backends, such as s3filestore, require the stored filename. + local_path = None + except Exception as err: + log.debug("Failed to resolve local upload path for %s: %s", resource_id, err) + + if local_path and os.path.exists(local_path): + _copy_local_file( + local_path, + destination_path, + max_download_bytes=max_download_bytes, + ) + return + + if all( + hasattr(resource_upload, attribute) + for attribute in ("bucket_name", "get_path", "get_signed_url_to_key") + ): + use_readonly_credentials = bool( + getattr(resource_upload, "p_key_readonly", None) + and getattr(resource_upload, "s_key_readonly", None) + ) + try: + remote_key = resource_upload.get_path(resource_id, resource_name) + signed_url = resource_upload.get_signed_url_to_key( + remote_key, + read_only=use_readonly_credentials, + ) + except Exception as err: + if use_readonly_credentials: + try: + signed_url = resource_upload.get_signed_url_to_key( + remote_key, + read_only=False, + ) + except Exception: + log.exception( + "Failed to resolve uploaded resource %s from remote storage", + resource_id, + ) + raise ValidationError( + {"resource_id": [f"Could not resolve uploaded resource: {err}"]} + ) + else: + log.exception( + "Failed to resolve uploaded resource %s from remote storage", + resource_id, + ) + raise ValidationError( + {"resource_id": [f"Could not resolve uploaded resource: {err}"]} + ) + + _download_to_path( + signed_url, + destination_path, + max_download_bytes=max_download_bytes, + download_timeout=download_timeout, + source_label="uploaded DWG resource", + ) + return + + raise ValidationError( + { + "resource_id": [ + "Uploaded resource storage backend is not supported by dwg_preview_convert" + ] + } + ) + + +def _copy_local_file(source_path: str, destination_path: str, max_download_bytes: int) -> None: + file_size = os.path.getsize(source_path) + if file_size > max_download_bytes: + raise ValidationError( + { + "resource_id": [ + f"DWG source file exceeds the maximum allowed size of {max_download_bytes} bytes" + ] + } + ) + shutil.copyfile(source_path, destination_path) + + +def _download_to_path( + url: str, + destination_path: str, + max_download_bytes: int, + download_timeout: int, + source_label: str, +) -> None: + parsed_url = urlparse(url) + if parsed_url.scheme.lower() not in {"http", "https"}: + raise ValidationError({"resource_id": [f"Unsupported URL scheme for {source_label}"]}) + + bytes_downloaded = 0 + try: + with requests.get( + url, + stream=True, + timeout=(10, download_timeout), + allow_redirects=True, + ) as response: + response.raise_for_status() + with open(destination_path, "wb") as destination_file: + for chunk in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE): + if not chunk: + continue + bytes_downloaded += len(chunk) + if bytes_downloaded > max_download_bytes: + raise ValidationError( + { + "resource_id": [ + f"{source_label.capitalize()} exceeds the maximum allowed size of {max_download_bytes} bytes" + ] + } + ) + destination_file.write(chunk) + except ValidationError: + if os.path.exists(destination_path): + os.remove(destination_path) + raise + except requests.RequestException as err: + if os.path.exists(destination_path): + os.remove(destination_path) + raise ValidationError({"resource_id": [f"Could not download {source_label}: {err}"]}) + + +def _convert_dwg_to_best_svg( + source_path: str, + tmp_dir: str, + timeout: int, +) -> str: + default_variant = _attempt_svg_conversion( + source_path, + os.path.join(tmp_dir, "preview.default.svg"), + timeout=timeout, + mspace_only=False, + mode_label="default", + ) + mspace_variant = _attempt_svg_conversion( + source_path, + os.path.join(tmp_dir, "preview.mspace.svg"), + timeout=timeout, + mspace_only=True, + mode_label="mspace", + ) + selected_variant = _select_best_svg_variant(default_variant, mspace_variant) + log.debug( + "DWG preview chose %s conversion (score=%s, bytes=%s, drawables=%s)", + selected_variant["mode"], + selected_variant["score"], + selected_variant["size_bytes"], + selected_variant["drawable_count"], + ) + return selected_variant["path"] + + +def _attempt_svg_conversion( + source_path: str, + svg_path: str, + timeout: int, + mspace_only: bool, + mode_label: str, +) -> dict[str, Any]: + try: + _run_dwg_to_svg(source_path, svg_path, timeout, mspace_only=mspace_only) + _normalize_svg_viewbox(svg_path) + drawable_count, size_bytes = _measure_svg_preview(svg_path) + return { + "mode": mode_label, + "path": svg_path, + "score": drawable_count * 1000 + size_bytes, + "drawable_count": drawable_count, + "size_bytes": size_bytes, + } + except ValidationError as err: + log.warning("DWG preview %s conversion failed: %s", mode_label, err.error_dict) + return {"mode": mode_label, "error": err} + + +def _select_best_svg_variant(*variants: dict[str, Any]) -> dict[str, Any]: + successful_variants = [variant for variant in variants if "error" not in variant] + if successful_variants: + return max( + successful_variants, + key=lambda variant: ( + variant["score"], + variant["drawable_count"], + variant["size_bytes"], + ), + ) + + error_messages = [] + for variant in variants: + error = variant.get("error") + if not error: + continue + error_messages.append(f'{variant["mode"]}: {error.error_dict}') + + raise ValidationError( + { + "conversion": [ + "DWG conversion failed for all modes" + + (f" ({'; '.join(error_messages)})" if error_messages else "") + ] + } + ) + + +def _measure_svg_preview(svg_path: str) -> tuple[int, int]: + try: + with open(svg_path, "r", encoding="utf-8") as svg_file: + svg_text = svg_file.read() + except OSError as err: + raise ValidationError({"conversion": [f"Could not read generated SVG: {err}"]}) + + drawable_count = len(SVG_DRAWABLE_TAG_RE.findall(svg_text)) + size_bytes = len(svg_text.encode("utf-8")) + return drawable_count, size_bytes + + +def _run_dwg_to_svg( + source_path: str, + svg_path: str, + timeout: int, + mspace_only: bool = False, +) -> None: + _require_command("dwg2SVG", "libredwg-tools") + command = ["dwg2SVG"] + if mspace_only: + command.append("--mspace") + command.append(source_path) + with open(svg_path, "wb") as svg_file: + result = _run_subprocess( + command, + stdout=svg_file, + timeout=timeout, + ) + + if result.returncode != 0 or not os.path.exists(svg_path) or os.path.getsize(svg_path) == 0: + stderr = _decode_subprocess_output(result.stderr) + raise ValidationError( + { + "conversion": [ + "DWG to SVG conversion failed" + + (f": {stderr}" if stderr else "") + ] + } + ) + +def _normalize_svg_viewbox(svg_path: str) -> None: + """ + Rebase libredwg SVG output when the viewBox origin is left in world coords. + + Some DWG files are emitted with a large absolute viewBox origin while the + visible geometry is already shifted near 0,0. That mismatch causes + rasterization to render an empty transparent image. + """ + try: + with open(svg_path, "r", encoding="utf-8") as svg_file: + svg_text = svg_file.read() + except OSError as err: + raise ValidationError({"conversion": [f"Could not read generated SVG: {err}"]}) + + match = SVG_VIEWBOX_RE.search(svg_text) + if not match: + return + + min_x = float(match.group("min_x")) + min_y = float(match.group("min_y")) + width = match.group("width") + height = match.group("height") + normalized_svg = svg_text + + if min_x != 0 or min_y != 0: + normalized_viewbox = f'viewBox="0 0 {width} {height}"' + normalized_svg = SVG_VIEWBOX_RE.sub(normalized_viewbox, normalized_svg, count=1) + + normalized_svg = _normalize_svg_root_size(normalized_svg, width=width, height=height) + + if normalized_svg == svg_text: + return + + try: + with open(svg_path, "w", encoding="utf-8") as svg_file: + svg_file.write(normalized_svg) + except OSError as err: + raise ValidationError({"conversion": [f"Could not normalize generated SVG: {err}"]}) + + +def _normalize_svg_root_size(svg_text: str, width: str, height: str) -> str: + """ + Ensure generated SVGs have intrinsic dimensions when embedded as images. + + libredwg emits root SVG tags with width/height set to 100%, which renders + fine in a browser tab but can collapse or scale unpredictably when the SVG + is used as an source. Replacing those root dimensions with concrete + values derived from the viewBox gives the browser a stable intrinsic size. + """ + root_tag_match = SVG_ROOT_TAG_RE.search(svg_text) + if not root_tag_match: + return svg_text + + root_tag = root_tag_match.group(0) + normalized_root_tag = SVG_WIDTH_ATTR_RE.sub(f'width="{width}"', root_tag, count=1) + normalized_root_tag = SVG_HEIGHT_ATTR_RE.sub( + f'height="{height}"', + normalized_root_tag, + count=1, + ) + + if normalized_root_tag == root_tag: + return svg_text + + start, end = root_tag_match.span() + return svg_text[:start] + normalized_root_tag + svg_text[end:] + + +def _run_subprocess( + command: list[str], + timeout: int, + stdout: Any | None = None, +) -> subprocess.CompletedProcess[bytes]: + try: + return subprocess.run( + command, + stdin=subprocess.DEVNULL, + stdout=stdout if stdout is not None else subprocess.PIPE, + stderr=subprocess.PIPE, + check=False, + timeout=timeout, + ) + except subprocess.TimeoutExpired: + raise ValidationError( + {"conversion": [f"Conversion exceeded the timeout of {timeout} seconds"]} + ) + except OSError as err: + raise ValidationError({"conversion": [f"Conversion process failed to start: {err}"]}) + + +def _require_command(command_name: str, package_name: str) -> None: + if shutil.which(command_name): + return + raise ValidationError( + { + "converter": [ + f'{command_name} is not installed. Install the "{package_name}" package.' + ] + } + ) + + +def _decode_subprocess_output(output: bytes | None) -> str: + if not output: + return "" + return output.decode("utf-8", errors="replace").strip().splitlines()[0][:400] + + +def _build_output_filename(resource: dict[str, Any]) -> str: + raw_name = ( + str(resource.get("name") or "").strip() + or os.path.basename(str(resource.get("url") or "").strip()) + or resource["id"] + ) + base_name = os.path.splitext(raw_name)[0] or resource["id"] + return f"{base_name}.svg" + + +def _get_int_config(config_key: str, default_value: int) -> int: + raw_value = toolkit.config.get(config_key) + if raw_value in (None, ""): + return default_value + try: + return int(raw_value) + except (TypeError, ValueError): + log.warning("Invalid integer config for %s=%r, using default %s", config_key, raw_value, default_value) + return default_value diff --git a/ckanext/malmo/logic/action.py b/ckanext/malmo/logic/action.py new file mode 100644 index 0000000..c0b50d9 --- /dev/null +++ b/ckanext/malmo/logic/action.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from ckan.plugins import toolkit + +from ckanext.malmo import dwg_preview + + +@toolkit.side_effect_free +def dwg_preview_convert(context, data_dict): + """ + Convert a DWG resource into a previewable image payload. + + This action returns a Python dictionary containing binary bytes for + internal callers. The public HTTP endpoint is exposed via a Flask + blueprint at /api/3/action/dwg_preview_convert so CKAN can return the + image directly instead of JSON-wrapping the response. + """ + return dwg_preview.build_preview_payload(context, data_dict) diff --git a/ckanext/malmo/plugin.py b/ckanext/malmo/plugin.py index 25ba943..05a4375 100644 --- a/ckanext/malmo/plugin.py +++ b/ckanext/malmo/plugin.py @@ -3,6 +3,8 @@ from ckan.plugins import toolkit from ckanext.malmo import actions as malmo_actions +from ckanext.malmo.logic import action as malmo_logic_actions +from ckanext.malmo import views as malmo_views log = logging.getLogger(__name__) @@ -11,6 +13,7 @@ class MalmoPlugin(plugins.SingletonPlugin): plugins.implements(plugins.IPackageController, inherit=True) plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IActions) + plugins.implements(plugins.IBlueprint) def update_config(self, config): """ @@ -21,6 +24,7 @@ def update_config(self, config): def get_actions(self): return { + 'dwg_preview_convert': malmo_logic_actions.dwg_preview_convert, 'package_update': malmo_actions.package_update, 'package_create': malmo_actions.package_create, 'package_patch': malmo_actions.package_patch, @@ -38,3 +42,6 @@ def get_actions(self): 'group_patch': malmo_actions.group_patch, 'group_show': malmo_actions.group_show, } + + def get_blueprint(self): + return malmo_views.get_blueprints() diff --git a/ckanext/malmo/views.py b/ckanext/malmo/views.py new file mode 100644 index 0000000..345892f --- /dev/null +++ b/ckanext/malmo/views.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +import logging +from typing import Any + +import flask +from flask_login import current_user + +import ckan.logic as logic +import ckan.model as model +from ckan.plugins import toolkit + +log = logging.getLogger(__name__) + +dwg_preview_blueprint = flask.Blueprint("malmo_dwg_preview", __name__) + +ValidationError = logic.ValidationError + + +@dwg_preview_blueprint.route("/api/3/action/dwg_preview_convert", methods=["GET", "POST"]) +def dwg_preview_convert() -> flask.Response: + """ + Binary endpoint that mirrors an action URL. + + CKAN 2.11 wraps normal action responses in JSON, so this blueprint exposes + the same action name as a concrete Flask route and returns the image bytes + directly. + """ + data_dict = _get_request_data() + context = _build_context() + + try: + payload = toolkit.get_action("dwg_preview_convert")(context, data_dict) + except ValidationError as err: + return _validation_error_response(err) + except Exception: + log.exception("Unexpected error while generating DWG preview") + return flask.jsonify( + { + "help": _help_url(), + "success": False, + "error": { + "__type": "Internal Server Error", + "message": "Internal Server Error", + }, + } + ), 500 + + response = flask.Response(payload["content"], mimetype=payload["mimetype"]) + response.headers["Content-Disposition"] = f'inline; filename="{payload["filename"]}"' + response.headers["Cache-Control"] = "private, no-store, max-age=0" + return response + + +def get_blueprints(): + return [dwg_preview_blueprint] + + +def _build_context() -> dict[str, Any]: + is_authenticated = bool(getattr(current_user, "is_authenticated", False)) + return { + "model": model, + "session": model.Session, + "user": current_user.name if is_authenticated else "", + "auth_user_obj": current_user if is_authenticated else None, + } + + +def _get_request_data() -> dict[str, Any]: + if flask.request.method == "GET": + return flask.request.args.to_dict(flat=True) + + if flask.request.is_json: + payload = flask.request.get_json(silent=True) + if isinstance(payload, dict): + return payload + + return flask.request.form.to_dict(flat=True) + + +def _validation_error_response(error: ValidationError) -> tuple[flask.Response, int]: + error_dict = dict(error.error_dict) + error_dict["__type"] = "Validation Error" + return ( + flask.jsonify( + { + "help": _help_url(), + "success": False, + "error": error_dict, + } + ), + 409, + ) + + +def _help_url() -> str: + return toolkit.url_for( + "api.action", + logic_function="help_show", + ver=3, + name="dwg_preview_convert", + _external=True, + ) diff --git a/requirements.txt b/requirements.txt index 44b9834..a7f2249 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -html2text \ No newline at end of file +html2text +requests diff --git a/setup.py b/setup.py index 6069b46..eed77dc 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,8 @@ include_package_data=True, zip_safe=False, install_requires=[ - # list of dependencies + 'html2text', + 'requests', ], entry_points=''' [ckan.plugins] From 3d3d91238606ad9bb2df519715b8723ae57bfc22 Mon Sep 17 00:00:00 2001 From: William Lima Date: Sun, 31 May 2026 18:10:27 -0100 Subject: [PATCH 3/6] docs(readme): add DWG runtime and configuration notes --- README.md | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 124475f..7232ff9 100644 --- a/README.md +++ b/README.md @@ -6,16 +6,48 @@ Customizations for the City of Malmö CKAN instance. - CKAN 2.10+ (tested on 2.11) +## DWG Preview Requirements + +This extension includes a DWG preview endpoint that converts DWG resources to SVG for browser preview. + +Important: +- `dwg2SVG` is required at runtime +- `dwg2SVG` is provided by LibreDWG +- this is a system dependency, not a Python package dependency + +That means installing the extension with `pip` is not enough by itself. The CKAN environment that runs this extension must also have LibreDWG installed and available on `PATH`. + +In the local Docker-based development setup, LibreDWG is installed in the CKAN image build. + +Tested runtime dependency: +- LibreDWG / `dwg2SVG` 0.13.x + +## DWG Preview Configuration + +The DWG preview code supports these CKAN config settings: + +- `ckanext.malmo.dwg_preview_timeout` + Conversion timeout in seconds. + +- `ckanext.malmo.dwg_preview_download_timeout` + Download timeout in seconds for remote DWG resources. + +- `ckanext.malmo.dwg_preview_max_download_bytes` + Maximum DWG download size in bytes. + +If these settings are not provided, the extension uses built-in defaults. + ## Installation To install `ckanext-malmo`: -1. Clone this repository (or copy the extension files). -2. Install the extension in your environment: - ```bash - pip install -e ckan/extensions/ckanext-malmo - ``` -3. Add `malmo` to the `ckan.plugins` setting in your CKAN configuration file (`ckan.ini`): - ```ini - ckan.plugins = ... malmo - ``` +1. Clone this repository (or copy the extension files). +2. Install the extension in your environment: + ```bash + pip install -e ckan/extensions/ckanext-malmo + ``` +3. Make sure LibreDWG / `dwg2SVG` is installed in the runtime environment. +4. Add `malmo` to the `ckan.plugins` setting in your CKAN configuration file (`ckan.ini`): + ```ini + ckan.plugins = ... malmo + ``` From eded0e33a2fa0d20ba44906a4cb7ab79b08711a5 Mon Sep 17 00:00:00 2001 From: William Lima Date: Sun, 31 May 2026 20:01:20 -0100 Subject: [PATCH 4/6] fix(svg): improve svg width, color and opacity --- README.md | 9 +++ ckanext/malmo/dwg_preview.py | 103 +++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/README.md b/README.md index 7232ff9..e01ee22 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,15 @@ The DWG preview code supports these CKAN config settings: - `ckanext.malmo.dwg_preview_max_download_bytes` Maximum DWG download size in bytes. +- `ckanext.malmo.dwg_preview_stroke_min_width` + Minimum stroke width (in px) enforced on generated SVG previews. Default: `1.4`. + +- `ckanext.malmo.dwg_preview_stroke_color` + Stroke color enforced on generated SVG previews. Default: `#111111`. + +- `ckanext.malmo.dwg_preview_stroke_opacity` + Stroke opacity enforced on generated SVG previews. Range: `0.0` to `1.0`. Default: `1.0`. + If these settings are not provided, the extension uses built-in defaults. ## Installation diff --git a/ckanext/malmo/dwg_preview.py b/ckanext/malmo/dwg_preview.py index 5cbf7e9..1e1355d 100644 --- a/ckanext/malmo/dwg_preview.py +++ b/ckanext/malmo/dwg_preview.py @@ -34,6 +34,9 @@ DEFAULT_TIMEOUT_SECONDS = 30 DEFAULT_DOWNLOAD_TIMEOUT_SECONDS = 30 DEFAULT_MAX_DOWNLOAD_BYTES = 100 * 1024 * 1024 +DEFAULT_STROKE_MIN_WIDTH = 1.2 +DEFAULT_STROKE_COLOR = "#111111" +DEFAULT_STROKE_OPACITY = 1.0 DOWNLOAD_CHUNK_SIZE = 64 * 1024 SVG_VIEWBOX_RE = re.compile( r'viewBox="(?P-?\d+(?:\.\d+)?)\s+' @@ -44,6 +47,7 @@ SVG_ROOT_TAG_RE = re.compile(r"]*>", re.IGNORECASE | re.DOTALL) SVG_WIDTH_ATTR_RE = re.compile(r'width="[^"]*"', re.IGNORECASE) SVG_HEIGHT_ATTR_RE = re.compile(r'height="[^"]*"', re.IGNORECASE) +SVG_DEFS_CLOSE_RE = re.compile(r"", re.IGNORECASE) SVG_DRAWABLE_TAG_RE = re.compile( r"<(?:use|path|line|polyline|polygon|circle|ellipse|text)\b", re.IGNORECASE, @@ -353,6 +357,7 @@ def _attempt_svg_conversion( try: _run_dwg_to_svg(source_path, svg_path, timeout, mspace_only=mspace_only) _normalize_svg_viewbox(svg_path) + _enhance_svg_strokes(svg_path) drawable_count, size_bytes = _measure_svg_preview(svg_path) return { "mode": mode_label, @@ -436,6 +441,88 @@ def _run_dwg_to_svg( } ) + +def _enhance_svg_strokes(svg_path: str) -> None: + min_stroke_width = _get_float_config( + "ckanext.malmo.dwg_preview_stroke_min_width", + DEFAULT_STROKE_MIN_WIDTH, + ) + stroke_color = str( + toolkit.config.get("ckanext.malmo.dwg_preview_stroke_color") or DEFAULT_STROKE_COLOR + ).strip() or DEFAULT_STROKE_COLOR + stroke_opacity = _get_float_config( + "ckanext.malmo.dwg_preview_stroke_opacity", + DEFAULT_STROKE_OPACITY, + ) + + min_stroke_width = max(0.1, min_stroke_width) + stroke_opacity = min(max(0.0, stroke_opacity), 1.0) + + try: + with open(svg_path, "r", encoding="utf-8") as svg_file: + svg_text = svg_file.read() + except OSError as err: + raise ValidationError({"conversion": [f"Could not read generated SVG: {err}"]}) + + style_block = ( + "" + ) + + normalized_svg = svg_text + + if "ckan-dwg-preview-scope" not in normalized_svg: + root_tag_match = SVG_ROOT_TAG_RE.search(normalized_svg) + if root_tag_match: + root_tag = root_tag_match.group(0) + if "class=" in root_tag: + scoped_root = re.sub( + r'class="([^"]*)"', + r'class="\1 ckan-dwg-preview-scope"', + root_tag, + count=1, + flags=re.IGNORECASE, + ) + else: + scoped_root = root_tag[:-1] + ' class="ckan-dwg-preview-scope">' + start, end = root_tag_match.span() + normalized_svg = normalized_svg[:start] + scoped_root + normalized_svg[end:] + + if "ckan-dwg-preview-stroke-style" not in normalized_svg: + defs_close_match = SVG_DEFS_CLOSE_RE.search(normalized_svg) + if defs_close_match: + insert_at = defs_close_match.end() + normalized_svg = normalized_svg[:insert_at] + style_block + normalized_svg[insert_at:] + else: + root_tag_match = SVG_ROOT_TAG_RE.search(normalized_svg) + if root_tag_match: + insert_at = root_tag_match.end() + normalized_svg = normalized_svg[:insert_at] + style_block + normalized_svg[insert_at:] + + if normalized_svg == svg_text: + return + + try: + with open(svg_path, "w", encoding="utf-8") as svg_file: + svg_file.write(normalized_svg) + except OSError as err: + raise ValidationError({"conversion": [f"Could not normalize generated SVG: {err}"]}) + + def _normalize_svg_viewbox(svg_path: str) -> None: """ Rebase libredwg SVG output when the viewBox origin is left in world coords. @@ -563,3 +650,19 @@ def _get_int_config(config_key: str, default_value: int) -> int: except (TypeError, ValueError): log.warning("Invalid integer config for %s=%r, using default %s", config_key, raw_value, default_value) return default_value + + +def _get_float_config(config_key: str, default_value: float) -> float: + raw_value = toolkit.config.get(config_key) + if raw_value in (None, ""): + return default_value + try: + return float(raw_value) + except (TypeError, ValueError): + log.warning( + "Invalid float config for %s=%r, using default %s", + config_key, + raw_value, + default_value, + ) + return default_value From 94fdcf21f0ffd10690e526029fcf964ed29af846 Mon Sep 17 00:00:00 2001 From: William Lima Date: Thu, 4 Jun 2026 21:16:48 -0100 Subject: [PATCH 5/6] Refactor DWG preview to PDF pipeline --- README.md | 82 ++++- ckanext/malmo/dwg_preview.py | 604 +++++++++++++++++----------------- ckanext/malmo/logic/action.py | 4 +- ckanext/malmo/views.py | 2 +- requirements.txt | 2 + setup.py | 2 + 6 files changed, 381 insertions(+), 315 deletions(-) diff --git a/README.md b/README.md index e01ee22..74ca75e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ckanext-malmo -Customizations for the City of Malmö CKAN instance. +Customizations for the City of Malm? CKAN instance. ## Requirements @@ -8,26 +8,37 @@ Customizations for the City of Malmö CKAN instance. ## DWG Preview Requirements -This extension includes a DWG preview endpoint that converts DWG resources to SVG for browser preview. +This extension includes a DWG preview endpoint that converts DWG resources into browser-previewable PDFs. Important: -- `dwg2SVG` is required at runtime -- `dwg2SVG` is provided by LibreDWG -- this is a system dependency, not a Python package dependency +- ODA File Converter is required at runtime for DWG -> DXF conversion +- Python rendering dependencies are required at runtime for DXF -> PDF rendering +- these are runtime dependencies, not just Python package dependencies -That means installing the extension with `pip` is not enough by itself. The CKAN environment that runs this extension must also have LibreDWG installed and available on `PATH`. +The preview pipeline is: +1. stage the DWG resource into a temporary file +2. convert DWG -> DXF with ODA File Converter +3. render DXF -> PDF +4. return the generated PDF through the existing CKAN action and endpoint -In the local Docker-based development setup, LibreDWG is installed in the CKAN image build. +That means installing the extension with `pip` is not enough by itself. The CKAN environment that runs this extension must also have ODA File Converter installed and available on `PATH`. -Tested runtime dependency: -- LibreDWG / `dwg2SVG` 0.13.x +In the local Docker-based development setup, the CKAN image installs ODA File Converter automatically from the official ODA Linux AppImage by default. You can also override that by placing an official asset in `ckan/vendor/oda/`. + +Python runtime dependencies: +- `ezdxf` +- `PyMuPDF` + +System/runtime dependencies: +- ODA File Converter Linux asset (`.AppImage` or `.deb`) +- `xvfb` for headless execution of ODA File Converter when the Linux build only exposes the Qt `xcb` plugin ## DWG Preview Configuration The DWG preview code supports these CKAN config settings: - `ckanext.malmo.dwg_preview_timeout` - Conversion timeout in seconds. + Conversion timeout in seconds. Default: `45`. - `ckanext.malmo.dwg_preview_download_timeout` Download timeout in seconds for remote DWG resources. @@ -35,17 +46,54 @@ The DWG preview code supports these CKAN config settings: - `ckanext.malmo.dwg_preview_max_download_bytes` Maximum DWG download size in bytes. -- `ckanext.malmo.dwg_preview_stroke_min_width` - Minimum stroke width (in px) enforced on generated SVG previews. Default: `1.4`. +- `ckanext.malmo.dwg_preview_oda_executable` + Absolute path or executable name for ODA File Converter. Default: `ODAFileConverter`. + +- `ckanext.malmo.dwg_preview_oda_output_version` + DXF target version passed to ODA File Converter. Default: `ACAD2018`. + +- `ckanext.malmo.dwg_preview_render_margin_mm` + Extra page margin applied around rendered geometry to avoid edge clipping in previews. Default: `4`. -- `ckanext.malmo.dwg_preview_stroke_color` - Stroke color enforced on generated SVG previews. Default: `#111111`. +- `ckanext.malmo.dwg_preview_render_page_size_mm` + Fixed square page size used for generated previews so large drawings are scaled down into a browser-friendly PDF. Default: `160`. -- `ckanext.malmo.dwg_preview_stroke_opacity` - Stroke opacity enforced on generated SVG previews. Range: `0.0` to `1.0`. Default: `1.0`. +- `ckanext.malmo.dwg_preview_xvfb_screen` + Screen configuration passed to `xvfb-run` when launching ODA File Converter in headless Docker environments. Default: `-screen 0 1024x768x24`. + +- `ckanext.malmo.dwg_preview_min_preview_bytes` + Minimum byte size for accepting a generated preview. Default: `1024`. + +- `ckanext.malmo.dwg_preview_max_modelspace_entities` + Maximum entity count allowed for a modelspace-only inline preview. Files above this limit fail fast with a download-first message. Default: `5000`. If these settings are not provided, the extension uses built-in defaults. +## Docker Setup + +By default, the Docker image downloads the ODA File Converter Linux AppImage from the official ODA site during build. + +Optional local override directory: + +```text +ckan/vendor/oda/ +``` + +Supported local asset formats: +- `.AppImage` +- `.deb` + +Default configured asset name: + +```text +ODAFileConverter_QT6_lnxX64_8.3dll_27.1.AppImage +``` + +Build resolution order: +1. use the file named by `ODA_FILE_CONVERTER_ASSET` if it exists in `ckan/vendor/oda/` +2. otherwise download that filename from the official ODA site +3. if `ODA_FILE_CONVERTER_ASSET` is empty, discover the current AppImage filename from the official ODA catalog page and download it + ## Installation To install `ckanext-malmo`: @@ -55,7 +103,7 @@ To install `ckanext-malmo`: ```bash pip install -e ckan/extensions/ckanext-malmo ``` -3. Make sure LibreDWG / `dwg2SVG` is installed in the runtime environment. +3. Install ODA File Converter and make sure `ODAFileConverter` is available in the runtime environment. 4. Add `malmo` to the `ckan.plugins` setting in your CKAN configuration file (`ckan.ini`): ```ini ckan.plugins = ... malmo diff --git a/ckanext/malmo/dwg_preview.py b/ckanext/malmo/dwg_preview.py index 1e1355d..5be725e 100644 --- a/ckanext/malmo/dwg_preview.py +++ b/ckanext/malmo/dwg_preview.py @@ -2,7 +2,6 @@ import logging import os -import re import shutil import subprocess from tempfile import TemporaryDirectory @@ -31,36 +30,20 @@ "image/vnd.dwg", "image/x-dwg", } -DEFAULT_TIMEOUT_SECONDS = 30 +DEFAULT_TIMEOUT_SECONDS = 45 DEFAULT_DOWNLOAD_TIMEOUT_SECONDS = 30 DEFAULT_MAX_DOWNLOAD_BYTES = 100 * 1024 * 1024 -DEFAULT_STROKE_MIN_WIDTH = 1.2 -DEFAULT_STROKE_COLOR = "#111111" -DEFAULT_STROKE_OPACITY = 1.0 +DEFAULT_ODA_OUTPUT_VERSION = "ACAD2018" +DEFAULT_RENDER_MARGIN_MM = 4 +DEFAULT_RENDER_PAGE_SIZE_MM = 160 +DEFAULT_MIN_PREVIEW_BYTES = 1024 +DEFAULT_MAX_MODELSPACE_ENTITIES = 5000 DOWNLOAD_CHUNK_SIZE = 64 * 1024 -SVG_VIEWBOX_RE = re.compile( - r'viewBox="(?P-?\d+(?:\.\d+)?)\s+' - r'(?P-?\d+(?:\.\d+)?)\s+' - r'(?P\d+(?:\.\d+)?)\s+' - r'(?P\d+(?:\.\d+)?)"' -) -SVG_ROOT_TAG_RE = re.compile(r"]*>", re.IGNORECASE | re.DOTALL) -SVG_WIDTH_ATTR_RE = re.compile(r'width="[^"]*"', re.IGNORECASE) -SVG_HEIGHT_ATTR_RE = re.compile(r'height="[^"]*"', re.IGNORECASE) -SVG_DEFS_CLOSE_RE = re.compile(r"", re.IGNORECASE) -SVG_DRAWABLE_TAG_RE = re.compile( - r"<(?:use|path|line|polyline|polygon|circle|ellipse|text)\b", - re.IGNORECASE, -) +PDF_MIMETYPE = "application/pdf" +PDF_EXTENSION = "pdf" def build_preview_payload(context: dict[str, Any], data_dict: dict[str, Any]) -> dict[str, Any]: - """ - Build a binary preview payload from a DWG resource. - - The returned dictionary is meant for internal Python callers. The Flask - route turns this payload into an HTTP response with the correct mimetype. - """ resource_id = (data_dict or {}).get("resource_id") if not resource_id: raise ValidationError({"resource_id": ["Missing value"]}) @@ -84,6 +67,8 @@ def build_preview_payload(context: dict[str, Any], data_dict: dict[str, Any]) -> DEFAULT_MAX_DOWNLOAD_BYTES, ) + log.info("DWG preview requested for resource=%s format=pdf", resource_id) + with TemporaryDirectory(prefix="ckan-dwg-preview-") as tmp_dir: source_path = _stage_resource_dwg( resource, @@ -91,18 +76,14 @@ def build_preview_payload(context: dict[str, Any], data_dict: dict[str, Any]) -> max_download_bytes=max_download_bytes, download_timeout=download_timeout, ) - output_path = _convert_dwg_to_best_svg( - source_path, - tmp_dir, - timeout=conversion_timeout, - ) - with open(output_path, "rb") as output_file: + preview_path = _build_preview_file(source_path, tmp_dir, timeout=conversion_timeout) + with open(preview_path, "rb") as output_file: content = output_file.read() return { "content": content, "filename": _build_output_filename(resource), - "mimetype": "image/svg+xml", + "mimetype": PDF_MIMETYPE, "resource_id": resource_id, } @@ -146,6 +127,7 @@ def _stage_resource_dwg( source_path = os.path.join(tmp_dir, "source.dwg") if resource.get("url_type") == "upload": + log.info("Preparing uploaded DWG resource=%s", resource.get("id")) _copy_uploaded_resource( resource, source_path, @@ -156,6 +138,7 @@ def _stage_resource_dwg( resource_url = str(resource.get("url") or "").strip() if not resource_url: raise ValidationError({"resource_id": ["Resource does not have a downloadable URL"]}) + log.info("Downloading external DWG resource=%s url=%s", resource.get("id"), resource_url) _download_to_path( resource_url, source_path, @@ -167,6 +150,12 @@ def _stage_resource_dwg( if not os.path.exists(source_path) or os.path.getsize(source_path) == 0: raise ValidationError({"resource_id": ["DWG source file could not be prepared"]}) + log.info( + "Prepared DWG source resource=%s path=%s bytes=%s", + resource.get("id"), + source_path, + os.path.getsize(source_path), + ) return source_path @@ -176,13 +165,6 @@ def _copy_uploaded_resource( max_download_bytes: int, download_timeout: int, ) -> None: - """ - Resolve a CKAN-uploaded file into a temp file. - - The filesystem branch matches the default CKAN storage backend. The signed - URL branch is an adaptation point for storage backends such as - ckanext-s3filestore, which this repository currently enables. - """ resource_upload = uploader.get_resource_uploader(dict(resource)) resource_id = resource["id"] resource_name = os.path.basename(str(resource.get("url") or "")) or f"{resource_id}.dwg" @@ -191,17 +173,13 @@ def _copy_uploaded_resource( try: local_path = resource_upload.get_path(resource_id) except TypeError: - # Some backends, such as s3filestore, require the stored filename. local_path = None except Exception as err: log.debug("Failed to resolve local upload path for %s: %s", resource_id, err) if local_path and os.path.exists(local_path): - _copy_local_file( - local_path, - destination_path, - max_download_bytes=max_download_bytes, - ) + log.info("Copying uploaded DWG from local storage resource=%s path=%s", resource_id, local_path) + _copy_local_file(local_path, destination_path, max_download_bytes=max_download_bytes) return if all( @@ -212,6 +190,7 @@ def _copy_uploaded_resource( getattr(resource_upload, "p_key_readonly", None) and getattr(resource_upload, "s_key_readonly", None) ) + remote_key = None try: remote_key = resource_upload.get_path(resource_id, resource_name) signed_url = resource_upload.get_signed_url_to_key( @@ -219,7 +198,7 @@ def _copy_uploaded_resource( read_only=use_readonly_credentials, ) except Exception as err: - if use_readonly_credentials: + if use_readonly_credentials and remote_key: try: signed_url = resource_upload.get_signed_url_to_key( remote_key, @@ -242,6 +221,11 @@ def _copy_uploaded_resource( {"resource_id": [f"Could not resolve uploaded resource: {err}"]} ) + log.info( + "Downloading uploaded DWG from remote storage resource=%s key=%s", + resource_id, + remote_key, + ) _download_to_path( signed_url, destination_path, @@ -316,280 +300,322 @@ def _download_to_path( os.remove(destination_path) raise ValidationError({"resource_id": [f"Could not download {source_label}: {err}"]}) - -def _convert_dwg_to_best_svg( - source_path: str, - tmp_dir: str, - timeout: int, -) -> str: - default_variant = _attempt_svg_conversion( - source_path, - os.path.join(tmp_dir, "preview.default.svg"), - timeout=timeout, - mspace_only=False, - mode_label="default", - ) - mspace_variant = _attempt_svg_conversion( - source_path, - os.path.join(tmp_dir, "preview.mspace.svg"), - timeout=timeout, - mspace_only=True, - mode_label="mspace", + log.info( + "Downloaded %s path=%s bytes=%s", + source_label, + destination_path, + bytes_downloaded, ) - selected_variant = _select_best_svg_variant(default_variant, mspace_variant) - log.debug( - "DWG preview chose %s conversion (score=%s, bytes=%s, drawables=%s)", - selected_variant["mode"], - selected_variant["score"], - selected_variant["size_bytes"], - selected_variant["drawable_count"], + + +def _build_preview_file(source_path: str, tmp_dir: str, timeout: int) -> str: + dxf_path = _convert_dwg_to_dxf(source_path, tmp_dir, timeout=timeout) + document = _load_dxf_document(dxf_path) + preview_path = _render_best_layout_preview(document, tmp_dir) + log.info("DWG preview generated format=pdf path=%s bytes=%s", preview_path, os.path.getsize(preview_path)) + return preview_path + + +def _convert_dwg_to_dxf(source_path: str, tmp_dir: str, timeout: int) -> str: + executable = _resolve_oda_executable() + output_version = str( + toolkit.config.get("ckanext.malmo.dwg_preview_oda_output_version") + or DEFAULT_ODA_OUTPUT_VERSION + ).strip() or DEFAULT_ODA_OUTPUT_VERSION + + input_dir = os.path.join(tmp_dir, "oda-input") + output_dir = os.path.join(tmp_dir, "oda-output") + os.makedirs(input_dir, exist_ok=True) + os.makedirs(output_dir, exist_ok=True) + + input_name = os.path.basename(source_path) + staged_input_path = os.path.join(input_dir, input_name) + shutil.copyfile(source_path, staged_input_path) + + command = _build_oda_command([ + executable, + input_dir, + output_dir, + output_version, + "DXF", + "0", + "1", + "*.dwg", + ]) + log.info("Running ODA File Converter command=%s", command) + result = _run_subprocess(command, timeout=timeout) + stderr = _decode_subprocess_output(result.stderr) + stdout = _decode_subprocess_output(result.stdout) + log.info( + "ODA File Converter finished code=%s stdout=%s stderr=%s", + result.returncode, + stdout or "", + stderr or "", ) - return selected_variant["path"] + if result.returncode != 0: + raise ValidationError( + { + "conversion": [ + "DWG to DXF conversion failed" + + (f": {stderr}" if stderr else "") + ] + } + ) -def _attempt_svg_conversion( - source_path: str, - svg_path: str, - timeout: int, - mspace_only: bool, - mode_label: str, -) -> dict[str, Any]: + dxf_path = _find_generated_dxf(output_dir, input_name) + if not dxf_path or not os.path.exists(dxf_path) or os.path.getsize(dxf_path) == 0: + raise ValidationError( + {"conversion": ["DWG to DXF conversion did not produce a usable DXF file"]} + ) + + log.info("Generated DXF path=%s bytes=%s", dxf_path, os.path.getsize(dxf_path)) + return dxf_path + + +def _build_oda_command(oda_arguments: list[str]) -> list[str]: + xvfb_run = shutil.which("xvfb-run") + if not xvfb_run: + return oda_arguments + + screen_spec = str( + toolkit.config.get("ckanext.malmo.dwg_preview_xvfb_screen") + or "-screen 0 1024x768x24" + ).strip() or "-screen 0 1024x768x24" + return [xvfb_run, "-a", "-s", screen_spec, *oda_arguments] + + +def _find_generated_dxf(output_dir: str, input_name: str) -> str | None: + expected_name = os.path.splitext(input_name)[0] + ".dxf" + expected_path = os.path.join(output_dir, expected_name) + if os.path.exists(expected_path): + return expected_path + + for root, _dirs, files in os.walk(output_dir): + for file_name in files: + if file_name.lower().endswith(".dxf"): + return os.path.join(root, file_name) + return None + + +def _load_dxf_document(dxf_path: str) -> Any: try: - _run_dwg_to_svg(source_path, svg_path, timeout, mspace_only=mspace_only) - _normalize_svg_viewbox(svg_path) - _enhance_svg_strokes(svg_path) - drawable_count, size_bytes = _measure_svg_preview(svg_path) - return { - "mode": mode_label, - "path": svg_path, - "score": drawable_count * 1000 + size_bytes, - "drawable_count": drawable_count, - "size_bytes": size_bytes, - } - except ValidationError as err: - log.warning("DWG preview %s conversion failed: %s", mode_label, err.error_dict) - return {"mode": mode_label, "error": err} - - -def _select_best_svg_variant(*variants: dict[str, Any]) -> dict[str, Any]: - successful_variants = [variant for variant in variants if "error" not in variant] - if successful_variants: - return max( - successful_variants, - key=lambda variant: ( - variant["score"], - variant["drawable_count"], - variant["size_bytes"], - ), + import ezdxf + from ezdxf import recover + except ImportError as err: + raise ValidationError( + {"converter": [f"DXF renderer dependency is not installed: {err}"]} ) - error_messages = [] - for variant in variants: - error = variant.get("error") - if not error: - continue - error_messages.append(f'{variant["mode"]}: {error.error_dict}') + try: + document = ezdxf.readfile(dxf_path) + log.info("Loaded DXF document path=%s using fast read path", dxf_path) + return document + except Exception as fast_err: + log.warning("Fast DXF load failed for %s, retrying recovery path: %s", dxf_path, fast_err) + + try: + document, auditor = recover.readfile(dxf_path) + except Exception as err: + raise ValidationError({"conversion": [f"Generated DXF could not be parsed: {err}"]}) + + error_count = len(getattr(auditor, "errors", [])) + fixed_error_count = len(getattr(auditor, "fixes", [])) + log.info( + "Loaded DXF document path=%s auditor_errors=%s auditor_fixes=%s", + dxf_path, + error_count, + fixed_error_count, + ) + return document + + +def _render_best_layout_preview(document: Any, tmp_dir: str) -> str: + failed_errors: list[ValidationError] = [] + failed_layouts: list[str] = [] + + for layout_name, layout_kind, layout, entity_count in _iter_layout_candidates(document): + try: + _guard_preview_complexity(layout_name, layout_kind, entity_count) + return _render_layout_preview(document, layout, layout_name, entity_count, tmp_dir) + except ValidationError as err: + failed_errors.append(err) + failed_layouts.append(f"{layout_name}: {err.error_dict}") + log.warning("DWG preview layout render failed layout=%s kind=%s entities=%s error=%s", layout_name, layout_kind, entity_count, err.error_dict) + + if len(failed_errors) == 1: + raise failed_errors[0] raise ValidationError( { "conversion": [ - "DWG conversion failed for all modes" - + (f" ({'; '.join(error_messages)})" if error_messages else "") - ] + "Preview is currently unavailable for this drawing." + ], + "preview_reason": ["preview_unavailable"], } ) -def _measure_svg_preview(svg_path: str) -> tuple[int, int]: - try: - with open(svg_path, "r", encoding="utf-8") as svg_file: - svg_text = svg_file.read() - except OSError as err: - raise ValidationError({"conversion": [f"Could not read generated SVG: {err}"]}) +def _iter_layout_candidates(document: Any) -> list[tuple[str, str, Any, int]]: + candidates: list[tuple[str, str, Any, int]] = [] + + layout_names_method = getattr(document, "layout_names_in_taborder", None) + modelspace_name = str(getattr(document.modelspace(), "name", "Model")) + if callable(layout_names_method): + for layout_name in list(layout_names_method()): + if str(layout_name).lower() == modelspace_name.lower(): + continue + try: + layout = document.paperspace(layout_name) + except Exception as err: + log.warning("Skipping paperspace layout=%s because it could not be loaded: %s", layout_name, err) + continue + entity_count = _count_layout_entities(layout) + log.info("Found paperspace layout=%s entities=%s", layout_name, entity_count) + if entity_count > 0: + candidates.append((str(layout_name), "paperspace", layout, entity_count)) + + modelspace = document.modelspace() + modelspace_entity_count = _count_layout_entities(modelspace) + log.info("Found modelspace layout=%s entities=%s", getattr(modelspace, "name", "Model"), modelspace_entity_count) + candidates.append((getattr(modelspace, "name", "Model"), "modelspace", modelspace, modelspace_entity_count)) + return candidates + + +def _render_layout_preview(document: Any, layout: Any, layout_name: str, entity_count: int, tmp_dir: str) -> str: + if entity_count <= 0: + raise ValidationError( + {"conversion": [f'Layout "{layout_name}" does not contain drawable entities']} + ) - drawable_count = len(SVG_DRAWABLE_TAG_RE.findall(svg_text)) - size_bytes = len(svg_text.encode("utf-8")) - return drawable_count, size_bytes + preview_path = os.path.join( + tmp_dir, + f"preview.{_sanitize_filename_component(layout_name)}.{PDF_EXTENSION}", + ) + log.info("Rendering DXF layout=%s entities=%s target=%s", layout_name, entity_count, preview_path) + _render_layout_to_pdf(document, layout, preview_path) + _validate_rendered_preview(preview_path, layout_name) + log.info("Rendered preview accepted layout=%s bytes=%s", layout_name, os.path.getsize(preview_path)) + return preview_path -def _run_dwg_to_svg( - source_path: str, - svg_path: str, - timeout: int, - mspace_only: bool = False, -) -> None: - _require_command("dwg2SVG", "libredwg-tools") - command = ["dwg2SVG"] - if mspace_only: - command.append("--mspace") - command.append(source_path) - with open(svg_path, "wb") as svg_file: - result = _run_subprocess( - command, - stdout=svg_file, - timeout=timeout, +def _render_layout_to_pdf(document: Any, layout: Any, output_path: str) -> None: + try: + from ezdxf.addons.drawing import Frontend, RenderContext, layout as drawing_layout, pymupdf + except ImportError as err: + raise ValidationError( + {"converter": [f"PDF rendering dependency is not installed: {err}"]} ) - if result.returncode != 0 or not os.path.exists(svg_path) or os.path.getsize(svg_path) == 0: - stderr = _decode_subprocess_output(result.stderr) + margin_mm = max( + 0.0, + _get_float_config("ckanext.malmo.dwg_preview_render_margin_mm", DEFAULT_RENDER_MARGIN_MM), + ) + + try: + context = RenderContext(document) + backend = pymupdf.PyMuPdfBackend() + frontend = Frontend(context, backend) + frontend.draw_layout(layout, finalize=True) + page = _build_preview_page(drawing_layout, margin_mm) + pdf_bytes = backend.get_pdf_bytes(page) + with open(output_path, "wb") as output_file: + output_file.write(pdf_bytes) + except Exception as err: raise ValidationError( { "conversion": [ - "DWG to SVG conversion failed" - + (f": {stderr}" if stderr else "") + f'DXF PDF rendering failed for layout "{getattr(layout, "name", "unknown")}": {err}' ] } ) -def _enhance_svg_strokes(svg_path: str) -> None: - min_stroke_width = _get_float_config( - "ckanext.malmo.dwg_preview_stroke_min_width", - DEFAULT_STROKE_MIN_WIDTH, +def _build_preview_page(drawing_layout: Any, margin_mm: float) -> Any: + page_size_mm = max( + 50.0, + _get_float_config( + "ckanext.malmo.dwg_preview_render_page_size_mm", + DEFAULT_RENDER_PAGE_SIZE_MM, + ), ) - stroke_color = str( - toolkit.config.get("ckanext.malmo.dwg_preview_stroke_color") or DEFAULT_STROKE_COLOR - ).strip() or DEFAULT_STROKE_COLOR - stroke_opacity = _get_float_config( - "ckanext.malmo.dwg_preview_stroke_opacity", - DEFAULT_STROKE_OPACITY, + return drawing_layout.Page( + page_size_mm, + page_size_mm, + drawing_layout.Units.mm, + margins=drawing_layout.Margins.all(margin_mm), ) - min_stroke_width = max(0.1, min_stroke_width) - stroke_opacity = min(max(0.0, stroke_opacity), 1.0) - try: - with open(svg_path, "r", encoding="utf-8") as svg_file: - svg_text = svg_file.read() - except OSError as err: - raise ValidationError({"conversion": [f"Could not read generated SVG: {err}"]}) - - style_block = ( - "" +def _validate_rendered_preview(preview_path: str, layout_name: str) -> None: + if not os.path.exists(preview_path) or os.path.getsize(preview_path) == 0: + raise ValidationError( + {"conversion": [f'Renderer produced no output for layout "{layout_name}"']} + ) + + minimum_size_bytes = _get_int_config( + "ckanext.malmo.dwg_preview_min_preview_bytes", + DEFAULT_MIN_PREVIEW_BYTES, ) + if os.path.getsize(preview_path) < minimum_size_bytes: + raise ValidationError( + {"conversion": [f'Rendered preview for layout "{layout_name}" is too small to be trustworthy']} + ) - normalized_svg = svg_text - - if "ckan-dwg-preview-scope" not in normalized_svg: - root_tag_match = SVG_ROOT_TAG_RE.search(normalized_svg) - if root_tag_match: - root_tag = root_tag_match.group(0) - if "class=" in root_tag: - scoped_root = re.sub( - r'class="([^"]*)"', - r'class="\1 ckan-dwg-preview-scope"', - root_tag, - count=1, - flags=re.IGNORECASE, - ) - else: - scoped_root = root_tag[:-1] + ' class="ckan-dwg-preview-scope">' - start, end = root_tag_match.span() - normalized_svg = normalized_svg[:start] + scoped_root + normalized_svg[end:] - - if "ckan-dwg-preview-stroke-style" not in normalized_svg: - defs_close_match = SVG_DEFS_CLOSE_RE.search(normalized_svg) - if defs_close_match: - insert_at = defs_close_match.end() - normalized_svg = normalized_svg[:insert_at] + style_block + normalized_svg[insert_at:] - else: - root_tag_match = SVG_ROOT_TAG_RE.search(normalized_svg) - if root_tag_match: - insert_at = root_tag_match.end() - normalized_svg = normalized_svg[:insert_at] + style_block + normalized_svg[insert_at:] - - if normalized_svg == svg_text: - return - try: - with open(svg_path, "w", encoding="utf-8") as svg_file: - svg_file.write(normalized_svg) - except OSError as err: - raise ValidationError({"conversion": [f"Could not normalize generated SVG: {err}"]}) +def _guard_preview_complexity(layout_name: str, layout_kind: str, entity_count: int) -> None: + if layout_kind != "modelspace": + return + max_modelspace_entities = _get_int_config( + "ckanext.malmo.dwg_preview_max_modelspace_entities", + DEFAULT_MAX_MODELSPACE_ENTITIES, + ) + if entity_count > max_modelspace_entities: + raise ValidationError( + { + "conversion": [ + "This drawing is too detailed to preview here." + ], + "preview_reason": ["modelspace_too_complex"], + } + ) -def _normalize_svg_viewbox(svg_path: str) -> None: - """ - Rebase libredwg SVG output when the viewBox origin is left in world coords. - Some DWG files are emitted with a large absolute viewBox origin while the - visible geometry is already shifted near 0,0. That mismatch causes - rasterization to render an empty transparent image. - """ +def _count_layout_entities(layout: Any) -> int: try: - with open(svg_path, "r", encoding="utf-8") as svg_file: - svg_text = svg_file.read() - except OSError as err: - raise ValidationError({"conversion": [f"Could not read generated SVG: {err}"]}) - - match = SVG_VIEWBOX_RE.search(svg_text) - if not match: - return + return sum(1 for _entity in layout) + except TypeError: + return len(list(layout)) - min_x = float(match.group("min_x")) - min_y = float(match.group("min_y")) - width = match.group("width") - height = match.group("height") - normalized_svg = svg_text - if min_x != 0 or min_y != 0: - normalized_viewbox = f'viewBox="0 0 {width} {height}"' - normalized_svg = SVG_VIEWBOX_RE.sub(normalized_viewbox, normalized_svg, count=1) +def _resolve_oda_executable() -> str: + configured_path = str( + toolkit.config.get("ckanext.malmo.dwg_preview_oda_executable") or "ODAFileConverter" + ).strip() or "ODAFileConverter" + if os.path.isabs(configured_path): + if os.path.exists(configured_path) and os.access(configured_path, os.X_OK): + return configured_path + raise ValidationError( + { + "converter": [ + f'Configured ODA File Converter is not executable: "{configured_path}"' + ] + } + ) - normalized_svg = _normalize_svg_root_size(normalized_svg, width=width, height=height) + resolved = shutil.which(configured_path) + if resolved: + return resolved - if normalized_svg == svg_text: - return - - try: - with open(svg_path, "w", encoding="utf-8") as svg_file: - svg_file.write(normalized_svg) - except OSError as err: - raise ValidationError({"conversion": [f"Could not normalize generated SVG: {err}"]}) - - -def _normalize_svg_root_size(svg_text: str, width: str, height: str) -> str: - """ - Ensure generated SVGs have intrinsic dimensions when embedded as images. - - libredwg emits root SVG tags with width/height set to 100%, which renders - fine in a browser tab but can collapse or scale unpredictably when the SVG - is used as an source. Replacing those root dimensions with concrete - values derived from the viewBox gives the browser a stable intrinsic size. - """ - root_tag_match = SVG_ROOT_TAG_RE.search(svg_text) - if not root_tag_match: - return svg_text - - root_tag = root_tag_match.group(0) - normalized_root_tag = SVG_WIDTH_ATTR_RE.sub(f'width="{width}"', root_tag, count=1) - normalized_root_tag = SVG_HEIGHT_ATTR_RE.sub( - f'height="{height}"', - normalized_root_tag, - count=1, + raise ValidationError( + { + "converter": [ + 'ODA File Converter is not installed. Configure `ckanext.malmo.dwg_preview_oda_executable` or add `ODAFileConverter` to PATH.' + ] + } ) - if normalized_root_tag == root_tag: - return svg_text - - start, end = root_tag_match.span() - return svg_text[:start] + normalized_root_tag + svg_text[end:] - def _run_subprocess( command: list[str], @@ -613,18 +639,6 @@ def _run_subprocess( raise ValidationError({"conversion": [f"Conversion process failed to start: {err}"]}) -def _require_command(command_name: str, package_name: str) -> None: - if shutil.which(command_name): - return - raise ValidationError( - { - "converter": [ - f'{command_name} is not installed. Install the "{package_name}" package.' - ] - } - ) - - def _decode_subprocess_output(output: bytes | None) -> str: if not output: return "" @@ -638,7 +652,12 @@ def _build_output_filename(resource: dict[str, Any]) -> str: or resource["id"] ) base_name = os.path.splitext(raw_name)[0] or resource["id"] - return f"{base_name}.svg" + return f"{base_name}.{PDF_EXTENSION}" + + +def _sanitize_filename_component(value: str) -> str: + sanitized = "".join(char if char.isalnum() or char in "._-" else "-" for char in value).strip("-") + return sanitized or "layout" def _get_int_config(config_key: str, default_value: int) -> int: @@ -648,7 +667,7 @@ def _get_int_config(config_key: str, default_value: int) -> int: try: return int(raw_value) except (TypeError, ValueError): - log.warning("Invalid integer config for %s=%r, using default %s", config_key, raw_value, default_value) + log.warning("Invalid integer config %s=%r, using default %s", config_key, raw_value, default_value) return default_value @@ -659,10 +678,5 @@ def _get_float_config(config_key: str, default_value: float) -> float: try: return float(raw_value) except (TypeError, ValueError): - log.warning( - "Invalid float config for %s=%r, using default %s", - config_key, - raw_value, - default_value, - ) + log.warning("Invalid float config %s=%r, using default %s", config_key, raw_value, default_value) return default_value diff --git a/ckanext/malmo/logic/action.py b/ckanext/malmo/logic/action.py index c0b50d9..6f605a6 100644 --- a/ckanext/malmo/logic/action.py +++ b/ckanext/malmo/logic/action.py @@ -8,11 +8,11 @@ @toolkit.side_effect_free def dwg_preview_convert(context, data_dict): """ - Convert a DWG resource into a previewable image payload. + Convert a DWG resource into a previewable PDF payload. This action returns a Python dictionary containing binary bytes for internal callers. The public HTTP endpoint is exposed via a Flask blueprint at /api/3/action/dwg_preview_convert so CKAN can return the - image directly instead of JSON-wrapping the response. + preview directly instead of JSON-wrapping the response. """ return dwg_preview.build_preview_payload(context, data_dict) diff --git a/ckanext/malmo/views.py b/ckanext/malmo/views.py index 345892f..f5d389c 100644 --- a/ckanext/malmo/views.py +++ b/ckanext/malmo/views.py @@ -23,7 +23,7 @@ def dwg_preview_convert() -> flask.Response: Binary endpoint that mirrors an action URL. CKAN 2.11 wraps normal action responses in JSON, so this blueprint exposes - the same action name as a concrete Flask route and returns the image bytes + the same action name as a concrete Flask route and returns the preview bytes directly. """ data_dict = _get_request_data() diff --git a/requirements.txt b/requirements.txt index a7f2249..fc6e4d3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ html2text requests +ezdxf +PyMuPDF diff --git a/setup.py b/setup.py index eed77dc..c9631d1 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,8 @@ install_requires=[ 'html2text', 'requests', + 'ezdxf', + 'PyMuPDF', ], entry_points=''' [ckan.plugins] From 8612c9235341b8e5d9eb6745cf538af08dc3df69 Mon Sep 17 00:00:00 2001 From: William Lima Date: Wed, 10 Jun 2026 15:50:32 -0100 Subject: [PATCH 6/6] feat(dwg-preview): switch malmo preview pipeline to png --- README.md | 93 ++-- ckanext/malmo/dwg_preview.py | 682 -------------------------- ckanext/malmo/dwg_preview/__init__.py | 3 + ckanext/malmo/dwg_preview/cache.py | 29 ++ ckanext/malmo/dwg_preview/config.py | 162 ++++++ ckanext/malmo/dwg_preview/oda.py | 124 +++++ ckanext/malmo/dwg_preview/render.py | 354 +++++++++++++ ckanext/malmo/dwg_preview/service.py | 267 ++++++++++ ckanext/malmo/logic/action.py | 6 +- ckanext/malmo/plugin.py | 2 +- ckanext/malmo/views.py | 8 +- requirements.txt | 3 +- setup.py | 3 +- 13 files changed, 994 insertions(+), 742 deletions(-) delete mode 100644 ckanext/malmo/dwg_preview.py create mode 100644 ckanext/malmo/dwg_preview/__init__.py create mode 100644 ckanext/malmo/dwg_preview/cache.py create mode 100644 ckanext/malmo/dwg_preview/config.py create mode 100644 ckanext/malmo/dwg_preview/oda.py create mode 100644 ckanext/malmo/dwg_preview/render.py create mode 100644 ckanext/malmo/dwg_preview/service.py diff --git a/README.md b/README.md index 74ca75e..c9d4054 100644 --- a/README.md +++ b/README.md @@ -1,50 +1,55 @@ # ckanext-malmo -Customizations for the City of Malm? CKAN instance. +Customizations for the City of Malmo CKAN instance. ## Requirements - CKAN 2.10+ (tested on 2.11) -## DWG Preview Requirements +## DWG Preview -This extension includes a DWG preview endpoint that converts DWG resources into browser-previewable PDFs. +This extension exposes a binary preview action at: -Important: -- ODA File Converter is required at runtime for DWG -> DXF conversion -- Python rendering dependencies are required at runtime for DXF -> PDF rendering -- these are runtime dependencies, not just Python package dependencies +```text +/api/3/action/convert_dwg?id= +``` + +The preview flow is: -The preview pipeline is: 1. stage the DWG resource into a temporary file 2. convert DWG -> DXF with ODA File Converter -3. render DXF -> PDF -4. return the generated PDF through the existing CKAN action and endpoint +3. render DXF -> PNG with `ezdxf` and `matplotlib` +4. cache the generated PNG by resource id + file hash +5. return the PNG directly from the CKAN endpoint -That means installing the extension with `pip` is not enough by itself. The CKAN environment that runs this extension must also have ODA File Converter installed and available on `PATH`. +Important runtime requirements: -In the local Docker-based development setup, the CKAN image installs ODA File Converter automatically from the official ODA Linux AppImage by default. You can also override that by placing an official asset in `ckan/vendor/oda/`. +- ODA File Converter must be installed and available on `PATH` +- `xvfb` is used automatically when `xvfb-run` is available +- Python rendering dependencies must be installed in the CKAN runtime Python runtime dependencies: + - `ezdxf` -- `PyMuPDF` +- `matplotlib` System/runtime dependencies: + - ODA File Converter Linux asset (`.AppImage` or `.deb`) -- `xvfb` for headless execution of ODA File Converter when the Linux build only exposes the Qt `xcb` plugin +- `xvfb` ## DWG Preview Configuration -The DWG preview code supports these CKAN config settings: +The DWG preview pipeline supports these CKAN config settings: - `ckanext.malmo.dwg_preview_timeout` Conversion timeout in seconds. Default: `45`. - `ckanext.malmo.dwg_preview_download_timeout` - Download timeout in seconds for remote DWG resources. + Download timeout in seconds for remote DWG resources. Default: `30`. - `ckanext.malmo.dwg_preview_max_download_bytes` - Maximum DWG download size in bytes. + Maximum DWG download size in bytes. Default: `104857600`. - `ckanext.malmo.dwg_preview_oda_executable` Absolute path or executable name for ODA File Converter. Default: `ODAFileConverter`. @@ -52,59 +57,47 @@ The DWG preview code supports these CKAN config settings: - `ckanext.malmo.dwg_preview_oda_output_version` DXF target version passed to ODA File Converter. Default: `ACAD2018`. -- `ckanext.malmo.dwg_preview_render_margin_mm` - Extra page margin applied around rendered geometry to avoid edge clipping in previews. Default: `4`. +- `ckanext.malmo.dwg_preview_xvfb_screen` + Screen configuration passed to `xvfb-run` when launching ODA File Converter in headless Docker environments. Default: `-screen 0 1600x1200x24`. + +- `ckanext.malmo.dwg_preview_render_margin` + Extra margin applied around rendered geometry. Default: `0.05`. -- `ckanext.malmo.dwg_preview_render_page_size_mm` - Fixed square page size used for generated previews so large drawings are scaled down into a browser-friendly PDF. Default: `160`. +- `ckanext.malmo.dwg_preview_image_width` + Output preview width in pixels. Default: `1600`. -- `ckanext.malmo.dwg_preview_xvfb_screen` - Screen configuration passed to `xvfb-run` when launching ODA File Converter in headless Docker environments. Default: `-screen 0 1024x768x24`. +- `ckanext.malmo.dwg_preview_image_height` + Output preview height in pixels. Default: `1200`. - `ckanext.malmo.dwg_preview_min_preview_bytes` Minimum byte size for accepting a generated preview. Default: `1024`. -- `ckanext.malmo.dwg_preview_max_modelspace_entities` - Maximum entity count allowed for a modelspace-only inline preview. Files above this limit fail fast with a download-first message. Default: `5000`. - -If these settings are not provided, the extension uses built-in defaults. +- `ckanext.malmo.dwg_preview_cache_dir` + Directory used for cached PNG previews. Default: system temporary directory + `ckan-dwg-preview-cache`. ## Docker Setup -By default, the Docker image downloads the ODA File Converter Linux AppImage from the official ODA site during build. +In the local development Docker setup: + +- ODA File Converter is installed during image build +- the local `src/ckanext-malmo` extension is installed at container startup from the mounted workspace +- `xvfb` is installed for headless ODA execution -Optional local override directory: +Optional local ODA asset override directory: ```text ckan/vendor/oda/ ``` Supported local asset formats: + - `.AppImage` - `.deb` -Default configured asset name: - -```text -ODAFileConverter_QT6_lnxX64_8.3dll_27.1.AppImage -``` - -Build resolution order: -1. use the file named by `ODA_FILE_CONVERTER_ASSET` if it exists in `ckan/vendor/oda/` -2. otherwise download that filename from the official ODA site -3. if `ODA_FILE_CONVERTER_ASSET` is empty, discover the current AppImage filename from the official ODA catalog page and download it - ## Installation To install `ckanext-malmo`: -1. Clone this repository (or copy the extension files). -2. Install the extension in your environment: - ```bash - pip install -e ckan/extensions/ckanext-malmo - ``` -3. Install ODA File Converter and make sure `ODAFileConverter` is available in the runtime environment. -4. Add `malmo` to the `ckan.plugins` setting in your CKAN configuration file (`ckan.ini`): - ```ini - ckan.plugins = ... malmo - ``` +1. Install the extension in your environment. +2. Install ODA File Converter and make sure `ODAFileConverter` is available in the runtime environment. +3. Add `malmo` to the `ckan.plugins` setting in your CKAN configuration file. diff --git a/ckanext/malmo/dwg_preview.py b/ckanext/malmo/dwg_preview.py deleted file mode 100644 index 5be725e..0000000 --- a/ckanext/malmo/dwg_preview.py +++ /dev/null @@ -1,682 +0,0 @@ -from __future__ import annotations - -import logging -import os -import shutil -import subprocess -from tempfile import TemporaryDirectory -from typing import Any -from urllib.parse import urlparse - -import requests - -import ckan.lib.uploader as uploader -import ckan.logic as logic -from ckan.plugins import toolkit - -log = logging.getLogger(__name__) - -ValidationError = logic.ValidationError -NotAuthorized = logic.NotAuthorized -NotFound = logic.NotFound - -DWG_MIME_TYPES = { - "application/acad", - "application/autocad_dwg", - "application/dwg", - "application/x-acad", - "application/x-autocad", - "application/x-dwg", - "image/vnd.dwg", - "image/x-dwg", -} -DEFAULT_TIMEOUT_SECONDS = 45 -DEFAULT_DOWNLOAD_TIMEOUT_SECONDS = 30 -DEFAULT_MAX_DOWNLOAD_BYTES = 100 * 1024 * 1024 -DEFAULT_ODA_OUTPUT_VERSION = "ACAD2018" -DEFAULT_RENDER_MARGIN_MM = 4 -DEFAULT_RENDER_PAGE_SIZE_MM = 160 -DEFAULT_MIN_PREVIEW_BYTES = 1024 -DEFAULT_MAX_MODELSPACE_ENTITIES = 5000 -DOWNLOAD_CHUNK_SIZE = 64 * 1024 -PDF_MIMETYPE = "application/pdf" -PDF_EXTENSION = "pdf" - - -def build_preview_payload(context: dict[str, Any], data_dict: dict[str, Any]) -> dict[str, Any]: - resource_id = (data_dict or {}).get("resource_id") - if not resource_id: - raise ValidationError({"resource_id": ["Missing value"]}) - - resource = _get_resource_for_preview(context, resource_id) - if not _is_dwg_resource(resource): - raise ValidationError( - {"resource_id": ["Resource must be a DWG file to generate a preview"]} - ) - - conversion_timeout = _get_int_config( - "ckanext.malmo.dwg_preview_timeout", - DEFAULT_TIMEOUT_SECONDS, - ) - download_timeout = _get_int_config( - "ckanext.malmo.dwg_preview_download_timeout", - DEFAULT_DOWNLOAD_TIMEOUT_SECONDS, - ) - max_download_bytes = _get_int_config( - "ckanext.malmo.dwg_preview_max_download_bytes", - DEFAULT_MAX_DOWNLOAD_BYTES, - ) - - log.info("DWG preview requested for resource=%s format=pdf", resource_id) - - with TemporaryDirectory(prefix="ckan-dwg-preview-") as tmp_dir: - source_path = _stage_resource_dwg( - resource, - tmp_dir, - max_download_bytes=max_download_bytes, - download_timeout=download_timeout, - ) - preview_path = _build_preview_file(source_path, tmp_dir, timeout=conversion_timeout) - with open(preview_path, "rb") as output_file: - content = output_file.read() - - return { - "content": content, - "filename": _build_output_filename(resource), - "mimetype": PDF_MIMETYPE, - "resource_id": resource_id, - } - - -def _get_resource_for_preview(context: dict[str, Any], resource_id: str) -> dict[str, Any]: - try: - return toolkit.get_action("resource_show")(context, {"id": resource_id}) - except NotFound: - raise ValidationError({"resource_id": ["Resource does not exist"]}) - except NotAuthorized: - raise ValidationError({"resource_id": ["User cannot view this resource"]}) - - -def _is_dwg_resource(resource: dict[str, Any]) -> bool: - resource_format = str(resource.get("format") or "").strip().lower() - if resource_format: - normalized_format = resource_format.lstrip(".") - if normalized_format == "dwg" or "dwg" in normalized_format: - return True - - for mime_field in ("mimetype", "mimetype_inner"): - mimetype_value = str(resource.get(mime_field) or "").strip().lower() - if mimetype_value in DWG_MIME_TYPES or mimetype_value.endswith("/dwg"): - return True - - for path_field in ("url", "name"): - raw_value = str(resource.get(path_field) or "") - extension = os.path.splitext(urlparse(raw_value).path)[1].lower() - if extension == ".dwg": - return True - - return False - - -def _stage_resource_dwg( - resource: dict[str, Any], - tmp_dir: str, - max_download_bytes: int, - download_timeout: int, -) -> str: - source_path = os.path.join(tmp_dir, "source.dwg") - - if resource.get("url_type") == "upload": - log.info("Preparing uploaded DWG resource=%s", resource.get("id")) - _copy_uploaded_resource( - resource, - source_path, - max_download_bytes=max_download_bytes, - download_timeout=download_timeout, - ) - else: - resource_url = str(resource.get("url") or "").strip() - if not resource_url: - raise ValidationError({"resource_id": ["Resource does not have a downloadable URL"]}) - log.info("Downloading external DWG resource=%s url=%s", resource.get("id"), resource_url) - _download_to_path( - resource_url, - source_path, - max_download_bytes=max_download_bytes, - download_timeout=download_timeout, - source_label="external DWG resource", - ) - - if not os.path.exists(source_path) or os.path.getsize(source_path) == 0: - raise ValidationError({"resource_id": ["DWG source file could not be prepared"]}) - - log.info( - "Prepared DWG source resource=%s path=%s bytes=%s", - resource.get("id"), - source_path, - os.path.getsize(source_path), - ) - return source_path - - -def _copy_uploaded_resource( - resource: dict[str, Any], - destination_path: str, - max_download_bytes: int, - download_timeout: int, -) -> None: - resource_upload = uploader.get_resource_uploader(dict(resource)) - resource_id = resource["id"] - resource_name = os.path.basename(str(resource.get("url") or "")) or f"{resource_id}.dwg" - - local_path = None - try: - local_path = resource_upload.get_path(resource_id) - except TypeError: - local_path = None - except Exception as err: - log.debug("Failed to resolve local upload path for %s: %s", resource_id, err) - - if local_path and os.path.exists(local_path): - log.info("Copying uploaded DWG from local storage resource=%s path=%s", resource_id, local_path) - _copy_local_file(local_path, destination_path, max_download_bytes=max_download_bytes) - return - - if all( - hasattr(resource_upload, attribute) - for attribute in ("bucket_name", "get_path", "get_signed_url_to_key") - ): - use_readonly_credentials = bool( - getattr(resource_upload, "p_key_readonly", None) - and getattr(resource_upload, "s_key_readonly", None) - ) - remote_key = None - try: - remote_key = resource_upload.get_path(resource_id, resource_name) - signed_url = resource_upload.get_signed_url_to_key( - remote_key, - read_only=use_readonly_credentials, - ) - except Exception as err: - if use_readonly_credentials and remote_key: - try: - signed_url = resource_upload.get_signed_url_to_key( - remote_key, - read_only=False, - ) - except Exception: - log.exception( - "Failed to resolve uploaded resource %s from remote storage", - resource_id, - ) - raise ValidationError( - {"resource_id": [f"Could not resolve uploaded resource: {err}"]} - ) - else: - log.exception( - "Failed to resolve uploaded resource %s from remote storage", - resource_id, - ) - raise ValidationError( - {"resource_id": [f"Could not resolve uploaded resource: {err}"]} - ) - - log.info( - "Downloading uploaded DWG from remote storage resource=%s key=%s", - resource_id, - remote_key, - ) - _download_to_path( - signed_url, - destination_path, - max_download_bytes=max_download_bytes, - download_timeout=download_timeout, - source_label="uploaded DWG resource", - ) - return - - raise ValidationError( - { - "resource_id": [ - "Uploaded resource storage backend is not supported by dwg_preview_convert" - ] - } - ) - - -def _copy_local_file(source_path: str, destination_path: str, max_download_bytes: int) -> None: - file_size = os.path.getsize(source_path) - if file_size > max_download_bytes: - raise ValidationError( - { - "resource_id": [ - f"DWG source file exceeds the maximum allowed size of {max_download_bytes} bytes" - ] - } - ) - shutil.copyfile(source_path, destination_path) - - -def _download_to_path( - url: str, - destination_path: str, - max_download_bytes: int, - download_timeout: int, - source_label: str, -) -> None: - parsed_url = urlparse(url) - if parsed_url.scheme.lower() not in {"http", "https"}: - raise ValidationError({"resource_id": [f"Unsupported URL scheme for {source_label}"]}) - - bytes_downloaded = 0 - try: - with requests.get( - url, - stream=True, - timeout=(10, download_timeout), - allow_redirects=True, - ) as response: - response.raise_for_status() - with open(destination_path, "wb") as destination_file: - for chunk in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE): - if not chunk: - continue - bytes_downloaded += len(chunk) - if bytes_downloaded > max_download_bytes: - raise ValidationError( - { - "resource_id": [ - f"{source_label.capitalize()} exceeds the maximum allowed size of {max_download_bytes} bytes" - ] - } - ) - destination_file.write(chunk) - except ValidationError: - if os.path.exists(destination_path): - os.remove(destination_path) - raise - except requests.RequestException as err: - if os.path.exists(destination_path): - os.remove(destination_path) - raise ValidationError({"resource_id": [f"Could not download {source_label}: {err}"]}) - - log.info( - "Downloaded %s path=%s bytes=%s", - source_label, - destination_path, - bytes_downloaded, - ) - - -def _build_preview_file(source_path: str, tmp_dir: str, timeout: int) -> str: - dxf_path = _convert_dwg_to_dxf(source_path, tmp_dir, timeout=timeout) - document = _load_dxf_document(dxf_path) - preview_path = _render_best_layout_preview(document, tmp_dir) - log.info("DWG preview generated format=pdf path=%s bytes=%s", preview_path, os.path.getsize(preview_path)) - return preview_path - - -def _convert_dwg_to_dxf(source_path: str, tmp_dir: str, timeout: int) -> str: - executable = _resolve_oda_executable() - output_version = str( - toolkit.config.get("ckanext.malmo.dwg_preview_oda_output_version") - or DEFAULT_ODA_OUTPUT_VERSION - ).strip() or DEFAULT_ODA_OUTPUT_VERSION - - input_dir = os.path.join(tmp_dir, "oda-input") - output_dir = os.path.join(tmp_dir, "oda-output") - os.makedirs(input_dir, exist_ok=True) - os.makedirs(output_dir, exist_ok=True) - - input_name = os.path.basename(source_path) - staged_input_path = os.path.join(input_dir, input_name) - shutil.copyfile(source_path, staged_input_path) - - command = _build_oda_command([ - executable, - input_dir, - output_dir, - output_version, - "DXF", - "0", - "1", - "*.dwg", - ]) - log.info("Running ODA File Converter command=%s", command) - result = _run_subprocess(command, timeout=timeout) - stderr = _decode_subprocess_output(result.stderr) - stdout = _decode_subprocess_output(result.stdout) - log.info( - "ODA File Converter finished code=%s stdout=%s stderr=%s", - result.returncode, - stdout or "", - stderr or "", - ) - - if result.returncode != 0: - raise ValidationError( - { - "conversion": [ - "DWG to DXF conversion failed" - + (f": {stderr}" if stderr else "") - ] - } - ) - - dxf_path = _find_generated_dxf(output_dir, input_name) - if not dxf_path or not os.path.exists(dxf_path) or os.path.getsize(dxf_path) == 0: - raise ValidationError( - {"conversion": ["DWG to DXF conversion did not produce a usable DXF file"]} - ) - - log.info("Generated DXF path=%s bytes=%s", dxf_path, os.path.getsize(dxf_path)) - return dxf_path - - -def _build_oda_command(oda_arguments: list[str]) -> list[str]: - xvfb_run = shutil.which("xvfb-run") - if not xvfb_run: - return oda_arguments - - screen_spec = str( - toolkit.config.get("ckanext.malmo.dwg_preview_xvfb_screen") - or "-screen 0 1024x768x24" - ).strip() or "-screen 0 1024x768x24" - return [xvfb_run, "-a", "-s", screen_spec, *oda_arguments] - - -def _find_generated_dxf(output_dir: str, input_name: str) -> str | None: - expected_name = os.path.splitext(input_name)[0] + ".dxf" - expected_path = os.path.join(output_dir, expected_name) - if os.path.exists(expected_path): - return expected_path - - for root, _dirs, files in os.walk(output_dir): - for file_name in files: - if file_name.lower().endswith(".dxf"): - return os.path.join(root, file_name) - return None - - -def _load_dxf_document(dxf_path: str) -> Any: - try: - import ezdxf - from ezdxf import recover - except ImportError as err: - raise ValidationError( - {"converter": [f"DXF renderer dependency is not installed: {err}"]} - ) - - try: - document = ezdxf.readfile(dxf_path) - log.info("Loaded DXF document path=%s using fast read path", dxf_path) - return document - except Exception as fast_err: - log.warning("Fast DXF load failed for %s, retrying recovery path: %s", dxf_path, fast_err) - - try: - document, auditor = recover.readfile(dxf_path) - except Exception as err: - raise ValidationError({"conversion": [f"Generated DXF could not be parsed: {err}"]}) - - error_count = len(getattr(auditor, "errors", [])) - fixed_error_count = len(getattr(auditor, "fixes", [])) - log.info( - "Loaded DXF document path=%s auditor_errors=%s auditor_fixes=%s", - dxf_path, - error_count, - fixed_error_count, - ) - return document - - -def _render_best_layout_preview(document: Any, tmp_dir: str) -> str: - failed_errors: list[ValidationError] = [] - failed_layouts: list[str] = [] - - for layout_name, layout_kind, layout, entity_count in _iter_layout_candidates(document): - try: - _guard_preview_complexity(layout_name, layout_kind, entity_count) - return _render_layout_preview(document, layout, layout_name, entity_count, tmp_dir) - except ValidationError as err: - failed_errors.append(err) - failed_layouts.append(f"{layout_name}: {err.error_dict}") - log.warning("DWG preview layout render failed layout=%s kind=%s entities=%s error=%s", layout_name, layout_kind, entity_count, err.error_dict) - - if len(failed_errors) == 1: - raise failed_errors[0] - - raise ValidationError( - { - "conversion": [ - "Preview is currently unavailable for this drawing." - ], - "preview_reason": ["preview_unavailable"], - } - ) - - -def _iter_layout_candidates(document: Any) -> list[tuple[str, str, Any, int]]: - candidates: list[tuple[str, str, Any, int]] = [] - - layout_names_method = getattr(document, "layout_names_in_taborder", None) - modelspace_name = str(getattr(document.modelspace(), "name", "Model")) - if callable(layout_names_method): - for layout_name in list(layout_names_method()): - if str(layout_name).lower() == modelspace_name.lower(): - continue - try: - layout = document.paperspace(layout_name) - except Exception as err: - log.warning("Skipping paperspace layout=%s because it could not be loaded: %s", layout_name, err) - continue - entity_count = _count_layout_entities(layout) - log.info("Found paperspace layout=%s entities=%s", layout_name, entity_count) - if entity_count > 0: - candidates.append((str(layout_name), "paperspace", layout, entity_count)) - - modelspace = document.modelspace() - modelspace_entity_count = _count_layout_entities(modelspace) - log.info("Found modelspace layout=%s entities=%s", getattr(modelspace, "name", "Model"), modelspace_entity_count) - candidates.append((getattr(modelspace, "name", "Model"), "modelspace", modelspace, modelspace_entity_count)) - return candidates - - -def _render_layout_preview(document: Any, layout: Any, layout_name: str, entity_count: int, tmp_dir: str) -> str: - if entity_count <= 0: - raise ValidationError( - {"conversion": [f'Layout "{layout_name}" does not contain drawable entities']} - ) - - preview_path = os.path.join( - tmp_dir, - f"preview.{_sanitize_filename_component(layout_name)}.{PDF_EXTENSION}", - ) - log.info("Rendering DXF layout=%s entities=%s target=%s", layout_name, entity_count, preview_path) - _render_layout_to_pdf(document, layout, preview_path) - _validate_rendered_preview(preview_path, layout_name) - log.info("Rendered preview accepted layout=%s bytes=%s", layout_name, os.path.getsize(preview_path)) - return preview_path - - -def _render_layout_to_pdf(document: Any, layout: Any, output_path: str) -> None: - try: - from ezdxf.addons.drawing import Frontend, RenderContext, layout as drawing_layout, pymupdf - except ImportError as err: - raise ValidationError( - {"converter": [f"PDF rendering dependency is not installed: {err}"]} - ) - - margin_mm = max( - 0.0, - _get_float_config("ckanext.malmo.dwg_preview_render_margin_mm", DEFAULT_RENDER_MARGIN_MM), - ) - - try: - context = RenderContext(document) - backend = pymupdf.PyMuPdfBackend() - frontend = Frontend(context, backend) - frontend.draw_layout(layout, finalize=True) - page = _build_preview_page(drawing_layout, margin_mm) - pdf_bytes = backend.get_pdf_bytes(page) - with open(output_path, "wb") as output_file: - output_file.write(pdf_bytes) - except Exception as err: - raise ValidationError( - { - "conversion": [ - f'DXF PDF rendering failed for layout "{getattr(layout, "name", "unknown")}": {err}' - ] - } - ) - - -def _build_preview_page(drawing_layout: Any, margin_mm: float) -> Any: - page_size_mm = max( - 50.0, - _get_float_config( - "ckanext.malmo.dwg_preview_render_page_size_mm", - DEFAULT_RENDER_PAGE_SIZE_MM, - ), - ) - return drawing_layout.Page( - page_size_mm, - page_size_mm, - drawing_layout.Units.mm, - margins=drawing_layout.Margins.all(margin_mm), - ) - - -def _validate_rendered_preview(preview_path: str, layout_name: str) -> None: - if not os.path.exists(preview_path) or os.path.getsize(preview_path) == 0: - raise ValidationError( - {"conversion": [f'Renderer produced no output for layout "{layout_name}"']} - ) - - minimum_size_bytes = _get_int_config( - "ckanext.malmo.dwg_preview_min_preview_bytes", - DEFAULT_MIN_PREVIEW_BYTES, - ) - if os.path.getsize(preview_path) < minimum_size_bytes: - raise ValidationError( - {"conversion": [f'Rendered preview for layout "{layout_name}" is too small to be trustworthy']} - ) - - -def _guard_preview_complexity(layout_name: str, layout_kind: str, entity_count: int) -> None: - if layout_kind != "modelspace": - return - - max_modelspace_entities = _get_int_config( - "ckanext.malmo.dwg_preview_max_modelspace_entities", - DEFAULT_MAX_MODELSPACE_ENTITIES, - ) - if entity_count > max_modelspace_entities: - raise ValidationError( - { - "conversion": [ - "This drawing is too detailed to preview here." - ], - "preview_reason": ["modelspace_too_complex"], - } - ) - - -def _count_layout_entities(layout: Any) -> int: - try: - return sum(1 for _entity in layout) - except TypeError: - return len(list(layout)) - - -def _resolve_oda_executable() -> str: - configured_path = str( - toolkit.config.get("ckanext.malmo.dwg_preview_oda_executable") or "ODAFileConverter" - ).strip() or "ODAFileConverter" - if os.path.isabs(configured_path): - if os.path.exists(configured_path) and os.access(configured_path, os.X_OK): - return configured_path - raise ValidationError( - { - "converter": [ - f'Configured ODA File Converter is not executable: "{configured_path}"' - ] - } - ) - - resolved = shutil.which(configured_path) - if resolved: - return resolved - - raise ValidationError( - { - "converter": [ - 'ODA File Converter is not installed. Configure `ckanext.malmo.dwg_preview_oda_executable` or add `ODAFileConverter` to PATH.' - ] - } - ) - - -def _run_subprocess( - command: list[str], - timeout: int, - stdout: Any | None = None, -) -> subprocess.CompletedProcess[bytes]: - try: - return subprocess.run( - command, - stdin=subprocess.DEVNULL, - stdout=stdout if stdout is not None else subprocess.PIPE, - stderr=subprocess.PIPE, - check=False, - timeout=timeout, - ) - except subprocess.TimeoutExpired: - raise ValidationError( - {"conversion": [f"Conversion exceeded the timeout of {timeout} seconds"]} - ) - except OSError as err: - raise ValidationError({"conversion": [f"Conversion process failed to start: {err}"]}) - - -def _decode_subprocess_output(output: bytes | None) -> str: - if not output: - return "" - return output.decode("utf-8", errors="replace").strip().splitlines()[0][:400] - - -def _build_output_filename(resource: dict[str, Any]) -> str: - raw_name = ( - str(resource.get("name") or "").strip() - or os.path.basename(str(resource.get("url") or "").strip()) - or resource["id"] - ) - base_name = os.path.splitext(raw_name)[0] or resource["id"] - return f"{base_name}.{PDF_EXTENSION}" - - -def _sanitize_filename_component(value: str) -> str: - sanitized = "".join(char if char.isalnum() or char in "._-" else "-" for char in value).strip("-") - return sanitized or "layout" - - -def _get_int_config(config_key: str, default_value: int) -> int: - raw_value = toolkit.config.get(config_key) - if raw_value in (None, ""): - return default_value - try: - return int(raw_value) - except (TypeError, ValueError): - log.warning("Invalid integer config %s=%r, using default %s", config_key, raw_value, default_value) - return default_value - - -def _get_float_config(config_key: str, default_value: float) -> float: - raw_value = toolkit.config.get(config_key) - if raw_value in (None, ""): - return default_value - try: - return float(raw_value) - except (TypeError, ValueError): - log.warning("Invalid float config %s=%r, using default %s", config_key, raw_value, default_value) - return default_value diff --git a/ckanext/malmo/dwg_preview/__init__.py b/ckanext/malmo/dwg_preview/__init__.py new file mode 100644 index 0000000..b0fec6a --- /dev/null +++ b/ckanext/malmo/dwg_preview/__init__.py @@ -0,0 +1,3 @@ +from .service import build_preview_payload + +__all__ = ["build_preview_payload"] diff --git a/ckanext/malmo/dwg_preview/cache.py b/ckanext/malmo/dwg_preview/cache.py new file mode 100644 index 0000000..b5a5983 --- /dev/null +++ b/ckanext/malmo/dwg_preview/cache.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +import hashlib +import os +import shutil + + +def file_sha256(path: str) -> str: + digest = hashlib.sha256() + with open(path, "rb") as source_file: + for chunk in iter(lambda: source_file.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def build_cache_path(cache_dir: str, resource_id: str, source_hash: str) -> str: + cache_key = hashlib.sha256(f"{resource_id}:{source_hash}".encode("utf-8")).hexdigest() + return os.path.join(cache_dir, f"{cache_key}.png") + + +def is_cached_preview_valid(path: str, min_preview_bytes: int) -> bool: + return os.path.exists(path) and os.path.getsize(path) >= min_preview_bytes + + +def store_cached_preview(source_path: str, cache_path: str) -> None: + os.makedirs(os.path.dirname(cache_path), exist_ok=True) + temp_path = f"{cache_path}.tmp" + shutil.copyfile(source_path, temp_path) + os.replace(temp_path, cache_path) diff --git a/ckanext/malmo/dwg_preview/config.py b/ckanext/malmo/dwg_preview/config.py new file mode 100644 index 0000000..c42611e --- /dev/null +++ b/ckanext/malmo/dwg_preview/config.py @@ -0,0 +1,162 @@ +from __future__ import annotations + +import logging +import os +import tempfile +from dataclasses import dataclass + +from ckan.plugins import toolkit + +log = logging.getLogger(__name__) + +DEFAULT_TIMEOUT_SECONDS = 45 +DEFAULT_DOWNLOAD_TIMEOUT_SECONDS = 30 +DEFAULT_MAX_DOWNLOAD_BYTES = 100 * 1024 * 1024 +DEFAULT_ODA_OUTPUT_VERSION = "ACAD2018" +DEFAULT_XVFB_SCREEN = "-screen 0 1600x1200x24" +DEFAULT_RENDER_MARGIN = 0.05 +DEFAULT_IMAGE_WIDTH = 1600 +DEFAULT_IMAGE_HEIGHT = 1200 +DEFAULT_MIN_PREVIEW_BYTES = 1024 +DEFAULT_CACHE_DIR = os.path.join(tempfile.gettempdir(), "ckan-dwg-preview-cache") +DEFAULT_MIN_CONTENT_COVERAGE = 0.002 +DEFAULT_MAX_INITIAL_COVERAGE = 0.6 +DEFAULT_RETRY_RENDER_MARGIN = 0.01 +DEFAULT_LINEWEIGHT_SCALING = 1.5 +DEFAULT_MIN_OCCUPIED_WIDTH_RATIO = 0.2 +DEFAULT_MIN_OCCUPIED_HEIGHT_RATIO = 0.2 + + +@dataclass(frozen=True) +class PreviewConfig: + timeout: int + download_timeout: int + max_download_bytes: int + oda_executable: str + oda_output_version: str + xvfb_screen: str + render_margin: float + image_width: int + image_height: int + min_preview_bytes: int + cache_dir: str + min_content_coverage: float + max_initial_coverage: float + retry_render_margin: float + lineweight_scaling: float + min_occupied_width_ratio: float + min_occupied_height_ratio: float + + @classmethod + def from_ckan_config(cls) -> "PreviewConfig": + return cls( + timeout=_get_int("ckanext.malmo.dwg_preview_timeout", DEFAULT_TIMEOUT_SECONDS, minimum=1), + download_timeout=_get_int( + "ckanext.malmo.dwg_preview_download_timeout", + DEFAULT_DOWNLOAD_TIMEOUT_SECONDS, + minimum=1, + ), + max_download_bytes=_get_int( + "ckanext.malmo.dwg_preview_max_download_bytes", + DEFAULT_MAX_DOWNLOAD_BYTES, + minimum=1024, + ), + oda_executable=_get_string("ckanext.malmo.dwg_preview_oda_executable", "ODAFileConverter"), + oda_output_version=_get_string( + "ckanext.malmo.dwg_preview_oda_output_version", + DEFAULT_ODA_OUTPUT_VERSION, + ), + xvfb_screen=_get_string( + "ckanext.malmo.dwg_preview_xvfb_screen", + DEFAULT_XVFB_SCREEN, + ), + render_margin=_get_float( + "ckanext.malmo.dwg_preview_render_margin", + DEFAULT_RENDER_MARGIN, + minimum=0.0, + ), + image_width=_get_int( + "ckanext.malmo.dwg_preview_image_width", + DEFAULT_IMAGE_WIDTH, + minimum=256, + ), + image_height=_get_int( + "ckanext.malmo.dwg_preview_image_height", + DEFAULT_IMAGE_HEIGHT, + minimum=256, + ), + min_preview_bytes=_get_int( + "ckanext.malmo.dwg_preview_min_preview_bytes", + DEFAULT_MIN_PREVIEW_BYTES, + minimum=1, + ), + cache_dir=_get_string("ckanext.malmo.dwg_preview_cache_dir", DEFAULT_CACHE_DIR), + min_content_coverage=_get_float( + "ckanext.malmo.dwg_preview_min_content_coverage", + DEFAULT_MIN_CONTENT_COVERAGE, + minimum=0.00001, + ), + max_initial_coverage=_get_float( + "ckanext.malmo.dwg_preview_max_initial_coverage", + DEFAULT_MAX_INITIAL_COVERAGE, + minimum=0.001, + ), + retry_render_margin=_get_float( + "ckanext.malmo.dwg_preview_retry_render_margin", + DEFAULT_RETRY_RENDER_MARGIN, + minimum=0.0, + ), + lineweight_scaling=_get_float( + "ckanext.malmo.dwg_preview_lineweight_scaling", + DEFAULT_LINEWEIGHT_SCALING, + minimum=0.1, + ), + min_occupied_width_ratio=_get_float( + "ckanext.malmo.dwg_preview_min_occupied_width_ratio", + DEFAULT_MIN_OCCUPIED_WIDTH_RATIO, + minimum=0.0, + ), + min_occupied_height_ratio=_get_float( + "ckanext.malmo.dwg_preview_min_occupied_height_ratio", + DEFAULT_MIN_OCCUPIED_HEIGHT_RATIO, + minimum=0.0, + ), + ) + + +def _get_string(config_key: str, default_value: str) -> str: + raw_value = toolkit.config.get(config_key) + if raw_value in (None, ""): + return default_value + value = str(raw_value).strip() + return value or default_value + + +def _get_int(config_key: str, default_value: int, minimum: int | None = None) -> int: + raw_value = toolkit.config.get(config_key) + if raw_value in (None, ""): + return default_value + try: + value = int(raw_value) + except (TypeError, ValueError): + log.warning("Invalid integer config %s=%r, using default %s", config_key, raw_value, default_value) + return default_value + if minimum is not None and value < minimum: + log.warning("Config %s=%r is below minimum %s, using default %s", config_key, value, minimum, default_value) + return default_value + return value + + +def _get_float(config_key: str, default_value: float, minimum: float | None = None) -> float: + raw_value = toolkit.config.get(config_key) + if raw_value in (None, ""): + return default_value + try: + value = float(raw_value) + except (TypeError, ValueError): + log.warning("Invalid float config %s=%r, using default %s", config_key, raw_value, default_value) + return default_value + if minimum is not None and value < minimum: + log.warning("Config %s=%r is below minimum %s, using default %s", config_key, value, minimum, default_value) + return default_value + return value diff --git a/ckanext/malmo/dwg_preview/oda.py b/ckanext/malmo/dwg_preview/oda.py new file mode 100644 index 0000000..0988f95 --- /dev/null +++ b/ckanext/malmo/dwg_preview/oda.py @@ -0,0 +1,124 @@ +from __future__ import annotations + +import logging +import os +import shutil +import subprocess + +import ckan.logic as logic + +from .config import PreviewConfig + +log = logging.getLogger(__name__) + +ValidationError = logic.ValidationError + + +def convert_dwg_to_dxf(source_path: str, working_dir: str, config: PreviewConfig) -> str: + executable = _resolve_oda_executable(config.oda_executable) + input_dir = os.path.join(working_dir, "oda-input") + output_dir = os.path.join(working_dir, "oda-output") + os.makedirs(input_dir, exist_ok=True) + os.makedirs(output_dir, exist_ok=True) + + input_name = os.path.basename(source_path) + staged_input_path = os.path.join(input_dir, input_name) + shutil.copyfile(source_path, staged_input_path) + + command = _build_oda_command( + [ + executable, + input_dir, + output_dir, + config.oda_output_version, + "DXF", + "0", + "1", + "*.dwg", + ], + xvfb_screen=config.xvfb_screen, + ) + log.info("Running ODA File Converter command=%s", command) + result = _run_subprocess(command, timeout=config.timeout) + log.info( + "ODA File Converter finished code=%s stdout=%s stderr=%s", + result.returncode, + _decode_subprocess_output(result.stdout) or "", + _decode_subprocess_output(result.stderr) or "", + ) + + if result.returncode != 0: + raise ValidationError({"conversion": [_format_conversion_error("DWG to DXF conversion failed", result.stderr)]}) + + dxf_path = _find_generated_dxf(output_dir, input_name) + if not dxf_path or not os.path.exists(dxf_path) or os.path.getsize(dxf_path) == 0: + raise ValidationError({"conversion": ["DWG to DXF conversion did not produce a usable DXF file"]}) + + log.info("Generated DXF path=%s bytes=%s", dxf_path, os.path.getsize(dxf_path)) + return dxf_path + + +def _resolve_oda_executable(configured_path: str) -> str: + if os.path.isabs(configured_path): + if os.path.exists(configured_path) and os.access(configured_path, os.X_OK): + return configured_path + raise ValidationError({"converter": [f'Configured ODA File Converter is not executable: "{configured_path}"']}) + + resolved = shutil.which(configured_path) + if resolved: + return resolved + + raise ValidationError( + { + "converter": [ + 'ODA File Converter is not installed. Configure `ckanext.malmo.dwg_preview_oda_executable` or add `ODAFileConverter` to PATH.' + ] + } + ) + + +def _build_oda_command(oda_arguments: list[str], xvfb_screen: str) -> list[str]: + xvfb_run = shutil.which("xvfb-run") + if not xvfb_run: + return oda_arguments + return [xvfb_run, "-a", "-s", xvfb_screen, *oda_arguments] + + +def _run_subprocess(command: list[str], timeout: int) -> subprocess.CompletedProcess[bytes]: + try: + return subprocess.run( + command, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=False, + timeout=timeout, + ) + except subprocess.TimeoutExpired: + raise ValidationError({"conversion": [f"Conversion exceeded the timeout of {timeout} seconds"]}) + except OSError as err: + raise ValidationError({"conversion": [f"Conversion process failed to start: {err}"]}) + + +def _find_generated_dxf(output_dir: str, input_name: str) -> str | None: + expected_name = os.path.splitext(input_name)[0] + ".dxf" + expected_path = os.path.join(output_dir, expected_name) + if os.path.exists(expected_path): + return expected_path + + for root, _dirs, files in os.walk(output_dir): + for file_name in files: + if file_name.lower().endswith(".dxf"): + return os.path.join(root, file_name) + return None + + +def _decode_subprocess_output(output: bytes | None) -> str: + if not output: + return "" + return output.decode("utf-8", errors="replace").strip().splitlines()[0][:400] + + +def _format_conversion_error(prefix: str, stderr: bytes | None) -> str: + decoded_stderr = _decode_subprocess_output(stderr) + return f"{prefix}: {decoded_stderr}" if decoded_stderr else prefix diff --git a/ckanext/malmo/dwg_preview/render.py b/ckanext/malmo/dwg_preview/render.py new file mode 100644 index 0000000..0c5df00 --- /dev/null +++ b/ckanext/malmo/dwg_preview/render.py @@ -0,0 +1,354 @@ +from __future__ import annotations + +import logging +import os +import statistics +from dataclasses import dataclass +from typing import Any + +import ckan.logic as logic + +from .config import PreviewConfig + +log = logging.getLogger(__name__) + +ValidationError = logic.ValidationError + + +@dataclass(frozen=True) +class LayoutCandidate: + name: str + layout: Any + kind: str + entity_count: int + viewport_count: int + text_hint_count: int + frame_hint_count: int + + +@dataclass(frozen=True) +class RenderedPreviewMetrics: + coverage: float + occupied_width: int + occupied_height: int + bbox: tuple[int, int, int, int] | None + + +def render_dxf_to_png(dxf_path: str, output_path: str, config: PreviewConfig) -> None: + document = _load_dxf_document(dxf_path) + last_error: ValidationError | None = None + + for candidate in _iter_layout_candidates(document): + try: + _render_layout(document, candidate, output_path, config) + _validate_preview(output_path, candidate.name, config) + log.info("Rendered preview accepted layout=%s bytes=%s", candidate.name, os.path.getsize(output_path)) + return + except ValidationError as err: + last_error = err + log.warning("DWG preview layout render failed layout=%s error=%s", candidate.name, err.error_dict) + + if last_error is not None: + raise last_error + raise ValidationError({"conversion": ["Preview is currently unavailable for this drawing."]}) + + +def _load_dxf_document(dxf_path: str) -> Any: + try: + import ezdxf + from ezdxf import recover + except ImportError as err: + raise ValidationError({"converter": [f"DXF renderer dependency is not installed: {err}"]}) + + try: + document = ezdxf.readfile(dxf_path) + log.info("Loaded DXF document path=%s using fast read path", dxf_path) + return document + except Exception as fast_err: + log.warning("Fast DXF load failed for %s, retrying recovery path: %s", dxf_path, fast_err) + + try: + document, auditor = recover.readfile(dxf_path) + except Exception as err: + raise ValidationError({"conversion": [f"Generated DXF could not be parsed: {err}"]}) + + log.info( + "Loaded DXF document path=%s auditor_errors=%s auditor_fixes=%s", + dxf_path, + len(getattr(auditor, "errors", [])), + len(getattr(auditor, "fixes", [])), + ) + return document + + +def _iter_layout_candidates(document: Any) -> list[LayoutCandidate]: + candidates: list[LayoutCandidate] = [] + layout_names = getattr(document, "layout_names_in_taborder", None) + modelspace_name = str(getattr(document.modelspace(), "name", "Model")) + + if callable(layout_names): + for layout_name in layout_names(): + if str(layout_name).lower() == modelspace_name.lower(): + continue + try: + layout = document.paperspace(layout_name) + except Exception as err: + log.warning("Skipping paperspace layout=%s because it could not be loaded: %s", layout_name, err) + continue + candidate = _build_layout_candidate(str(layout_name), layout, "paperspace") + if candidate.entity_count > 0: + candidates.append(candidate) + + modelspace = document.modelspace() + model_candidate = _build_layout_candidate(getattr(modelspace, "name", "Model"), modelspace, "modelspace") + if model_candidate.entity_count > 0: + candidates.append(model_candidate) + return sorted(candidates, key=_layout_priority, reverse=True) + + +def _build_layout_candidate(layout_name: str, layout: Any, kind: str) -> LayoutCandidate: + entity_types = [dxftype for dxftype in _iter_entity_types(layout)] + viewport_count = entity_types.count("VIEWPORT") + text_hint_count = sum(1 for dxftype in entity_types if dxftype in {"TEXT", "MTEXT", "ATTRIB", "ATTDEF"}) + frame_hint_count = sum(1 for dxftype in entity_types if dxftype in {"LWPOLYLINE", "POLYLINE", "LINE"}) + return LayoutCandidate( + name=layout_name, + layout=layout, + kind=kind, + entity_count=len(entity_types), + viewport_count=viewport_count, + text_hint_count=text_hint_count, + frame_hint_count=frame_hint_count, + ) + + +def _layout_priority(candidate: LayoutCandidate) -> tuple[int, int, int, int]: + is_paperspace = 1 if candidate.kind == "paperspace" else 0 + return ( + is_paperspace, + candidate.viewport_count, + candidate.text_hint_count, + candidate.entity_count, + ) + + +def _render_layout(document: Any, candidate: LayoutCandidate, output_path: str, config: PreviewConfig) -> None: + layout = candidate.layout + layout_name = candidate.name + if candidate.entity_count <= 0: + raise ValidationError({"conversion": [f'Layout "{layout_name}" does not contain drawable entities']}) + + try: + import matplotlib + + matplotlib.use("Agg") + + import matplotlib.pyplot as plt + from ezdxf.addons.drawing import Frontend, RenderContext, config as drawing_config + from ezdxf.addons.drawing.matplotlib import MatplotlibBackend + from ezdxf.addons.drawing.recorder import Recorder + except ImportError as err: + raise ValidationError({"converter": [f"PNG rendering dependency is not installed: {err}"]}) + + dpi = 100 + figure = plt.figure(figsize=(config.image_width / dpi, config.image_height / dpi), dpi=dpi) + axis = figure.add_axes([0, 0, 1, 1]) + axis.set_axis_off() + axis.set_facecolor("white") + figure.patch.set_facecolor("white") + + try: + frontend_config = drawing_config.Configuration( + background_policy=drawing_config.BackgroundPolicy.WHITE, + color_policy=drawing_config.ColorPolicy.BLACK, + lineweight_scaling=config.lineweight_scaling, + ) + recorder = Recorder() + frontend = Frontend(RenderContext(document), recorder, config=frontend_config) + frontend.draw_layout(layout, finalize=True) + player = recorder.player() + content_bbox = _resolve_content_bbox(player, candidate, config) + if not content_bbox.has_data: + raise ValidationError({"conversion": [f'Layout "{layout_name}" does not contain visible drawable bounds']}) + + backend = MatplotlibBackend(axis) + player.replay(backend) + _set_axis_limits(axis, content_bbox, config.render_margin) + axis.set_aspect("equal", adjustable="datalim") + figure.savefig( + output_path, + format="png", + dpi=dpi, + bbox_inches=None, + pad_inches=0, + facecolor="white", + edgecolor="white", + ) + metrics = _measure_rendered_preview(output_path) + if not _is_preview_coverage_acceptable(metrics, config): + if candidate.kind == "paperspace": + tighter_bbox = _crop_bbox(content_bbox, config.retry_render_margin) + axis.cla() + axis.set_axis_off() + axis.set_facecolor("white") + backend = MatplotlibBackend(axis) + player.replay(backend) + _set_axis_limits(axis, tighter_bbox, config.retry_render_margin) + axis.set_aspect("equal", adjustable="datalim") + figure.savefig( + output_path, + format="png", + dpi=dpi, + bbox_inches=None, + pad_inches=0, + facecolor="white", + edgecolor="white", + ) + metrics = _measure_rendered_preview(output_path) + + if not _is_preview_coverage_acceptable(metrics, config): + raise ValidationError( + { + "conversion": [f'Rendered preview for layout "{layout_name}" occupies too little of the image'], + "preview_reason": ["preview_too_sparse"], + } + ) + except ValidationError: + raise + except Exception as err: + raise ValidationError({"conversion": [f'DXF raster rendering failed for layout "{layout_name}": {err}']}) + finally: + plt.close(figure) + + +def _validate_preview(output_path: str, layout_name: str, config: PreviewConfig) -> None: + if not os.path.exists(output_path) or os.path.getsize(output_path) == 0: + raise ValidationError({"conversion": [f'Renderer produced no output for layout "{layout_name}"']}) + if os.path.getsize(output_path) < config.min_preview_bytes: + raise ValidationError( + {"conversion": [f'Rendered preview for layout "{layout_name}" is too small to be trustworthy']} + ) + + +def _count_entities(layout: Any) -> int: + try: + return sum(1 for _entity in layout) + except TypeError: + return len(list(layout)) + + +def _iter_entity_types(layout: Any) -> list[str]: + types: list[str] = [] + for entity in layout: + try: + types.append(str(entity.dxftype()).upper()) + except Exception: + continue + return types + + +def _resolve_content_bbox(player: Any, candidate: LayoutCandidate, config: PreviewConfig) -> Any: + full_bbox = player.bbox() + if not full_bbox.has_data: + return full_bbox + + if candidate.kind != "paperspace": + return full_bbox + + width = max(float(full_bbox.extmax.x - full_bbox.extmin.x), 1.0) + height = max(float(full_bbox.extmax.y - full_bbox.extmin.y), 1.0) + area = width * height + if area <= 0: + return full_bbox + + # For paperspace layouts, reduce the chance of one stray entity making the + # full sheet look empty by cropping lightly toward the center when the bbox + # is unusually loose. + cropped = _crop_bbox(full_bbox, config.retry_render_margin) + cropped_width = max(float(cropped.extmax.x - cropped.extmin.x), 1.0) + cropped_height = max(float(cropped.extmax.y - cropped.extmin.y), 1.0) + cropped_area = cropped_width * cropped_height + if cropped_area / area < config.max_initial_coverage: + return cropped + return full_bbox + + +def _set_axis_limits(axis: Any, content_bbox: Any, render_margin: float) -> None: + extmin = content_bbox.extmin + extmax = content_bbox.extmax + width = max(float(extmax.x - extmin.x), 1.0) + height = max(float(extmax.y - extmin.y), 1.0) + pad_x = max(width * render_margin, 1.0) + pad_y = max(height * render_margin, 1.0) + + axis.set_xlim(float(extmin.x - pad_x), float(extmax.x + pad_x)) + axis.set_ylim(float(extmin.y - pad_y), float(extmax.y + pad_y)) + + +def _crop_bbox(content_bbox: Any, render_margin: float) -> Any: + try: + from ezdxf.math import BoundingBox2d + except ImportError: + return content_bbox + + extmin = content_bbox.extmin + extmax = content_bbox.extmax + width = max(float(extmax.x - extmin.x), 1.0) + height = max(float(extmax.y - extmin.y), 1.0) + shrink_x = width * min(max(render_margin, 0.0), 0.2) + shrink_y = height * min(max(render_margin, 0.0), 0.2) + return BoundingBox2d( + [ + (float(extmin.x + shrink_x), float(extmin.y + shrink_y)), + (float(extmax.x - shrink_x), float(extmax.y - shrink_y)), + ] + ) + + +def _measure_rendered_preview(output_path: str) -> RenderedPreviewMetrics: + try: + from PIL import Image + except ImportError as err: + raise ValidationError({"converter": [f"Image validation dependency is not installed: {err}"]}) + + with Image.open(output_path) as image: + grayscale = image.convert("L") + width, height = grayscale.size + pixels = grayscale.load() + occupied: list[tuple[int, int]] = [] + for y in range(height): + for x in range(width): + if pixels[x, y] < 245: + occupied.append((x, y)) + + if not occupied: + return RenderedPreviewMetrics(coverage=0.0, occupied_width=0, occupied_height=0, bbox=None) + + xs = [point[0] for point in occupied] + ys = [point[1] for point in occupied] + min_x = min(xs) + max_x = max(xs) + min_y = min(ys) + max_y = max(ys) + occupied_width = max_x - min_x + 1 + occupied_height = max_y - min_y + 1 + coverage = len(occupied) / float(width * height) + + return RenderedPreviewMetrics( + coverage=coverage, + occupied_width=occupied_width, + occupied_height=occupied_height, + bbox=(min_x, min_y, max_x, max_y), + ) + + +def _is_preview_coverage_acceptable(metrics: RenderedPreviewMetrics, config: PreviewConfig) -> bool: + if metrics.coverage >= config.min_content_coverage: + return True + + width_ratio = metrics.occupied_width / float(config.image_width) if config.image_width else 0.0 + height_ratio = metrics.occupied_height / float(config.image_height) if config.image_height else 0.0 + return ( + width_ratio >= config.min_occupied_width_ratio + and height_ratio >= config.min_occupied_height_ratio + ) diff --git a/ckanext/malmo/dwg_preview/service.py b/ckanext/malmo/dwg_preview/service.py new file mode 100644 index 0000000..2d96786 --- /dev/null +++ b/ckanext/malmo/dwg_preview/service.py @@ -0,0 +1,267 @@ +from __future__ import annotations + +import logging +import os +import shutil +from tempfile import TemporaryDirectory +from typing import Any +from urllib.parse import urlparse + +import requests + +import ckan.lib.uploader as uploader +import ckan.logic as logic +from ckan.plugins import toolkit + +from .cache import build_cache_path, file_sha256, is_cached_preview_valid, store_cached_preview +from .config import PreviewConfig +from .oda import convert_dwg_to_dxf +from .render import render_dxf_to_png + +log = logging.getLogger(__name__) + +ValidationError = logic.ValidationError +NotAuthorized = logic.NotAuthorized +NotFound = logic.NotFound + +DWG_MIME_TYPES = { + "application/acad", + "application/autocad_dwg", + "application/dwg", + "application/x-acad", + "application/x-autocad", + "application/x-dwg", + "image/vnd.dwg", + "image/x-dwg", +} +DOWNLOAD_CHUNK_SIZE = 64 * 1024 +PNG_MIMETYPE = "image/png" +PNG_EXTENSION = "png" + + +def build_preview_payload(context: dict[str, Any], data_dict: dict[str, Any]) -> dict[str, Any]: + resource_id = (data_dict or {}).get("id") + if not resource_id: + raise ValidationError({"id": ["Missing value"]}) + + config = PreviewConfig.from_ckan_config() + resource = _get_resource_for_preview(context, resource_id) + if not _is_dwg_resource(resource): + raise ValidationError({"id": ["Resource must be a DWG file to generate a preview"]}) + + log.info("DWG preview requested for resource=%s format=png", resource_id) + + with TemporaryDirectory(prefix="ckan-dwg-preview-") as tmp_dir: + source_path = _stage_resource_dwg(resource, tmp_dir, config) + source_hash = file_sha256(source_path) + cache_path = build_cache_path(config.cache_dir, resource_id, source_hash) + + if is_cached_preview_valid(cache_path, config.min_preview_bytes): + log.info("Serving cached DWG preview resource=%s cache=%s", resource_id, cache_path) + content = _read_file(cache_path) + else: + content = _generate_preview(resource_id, source_path, tmp_dir, cache_path, config) + + return { + "content": content, + "filename": _build_output_filename(resource), + "mimetype": PNG_MIMETYPE, + "resource_id": resource_id, + } + + +def _generate_preview( + resource_id: str, + source_path: str, + tmp_dir: str, + cache_path: str, + config: PreviewConfig, +) -> bytes: + dxf_path = convert_dwg_to_dxf(source_path, tmp_dir, config) + preview_path = os.path.join(tmp_dir, "preview.png") + render_dxf_to_png(dxf_path, preview_path, config) + store_cached_preview(preview_path, cache_path) + log.info("DWG preview generated resource=%s path=%s cache=%s", resource_id, preview_path, cache_path) + return _read_file(preview_path) + + +def _read_file(path: str) -> bytes: + with open(path, "rb") as output_file: + return output_file.read() + + +def _get_resource_for_preview(context: dict[str, Any], resource_id: str) -> dict[str, Any]: + try: + return toolkit.get_action("resource_show")(context, {"id": resource_id}) + except NotFound: + raise ValidationError({"id": ["Resource does not exist"]}) + except NotAuthorized: + raise ValidationError({"id": ["User cannot view this resource"]}) + + +def _is_dwg_resource(resource: dict[str, Any]) -> bool: + resource_format = str(resource.get("format") or "").strip().lower() + if resource_format: + normalized_format = resource_format.lstrip(".") + if normalized_format == "dwg" or "dwg" in normalized_format: + return True + + for mime_field in ("mimetype", "mimetype_inner"): + mimetype_value = str(resource.get(mime_field) or "").strip().lower() + if mimetype_value in DWG_MIME_TYPES or mimetype_value.endswith("/dwg"): + return True + + for path_field in ("url", "name"): + raw_value = str(resource.get(path_field) or "") + extension = os.path.splitext(urlparse(raw_value).path)[1].lower() + if extension == ".dwg": + return True + + return False + + +def _stage_resource_dwg(resource: dict[str, Any], tmp_dir: str, config: PreviewConfig) -> str: + source_path = os.path.join(tmp_dir, "source.dwg") + + if resource.get("url_type") == "upload": + log.info("Preparing uploaded DWG resource=%s", resource.get("id")) + _copy_uploaded_resource(resource, source_path, config) + else: + resource_url = str(resource.get("url") or "").strip() + if not resource_url: + raise ValidationError({"id": ["Resource does not have a downloadable URL"]}) + log.info("Downloading external DWG resource=%s url=%s", resource.get("id"), resource_url) + _download_to_path( + resource_url, + source_path, + max_download_bytes=config.max_download_bytes, + download_timeout=config.download_timeout, + source_label="external DWG resource", + ) + + if not os.path.exists(source_path) or os.path.getsize(source_path) == 0: + raise ValidationError({"id": ["DWG source file could not be prepared"]}) + + log.info( + "Prepared DWG source resource=%s path=%s bytes=%s", + resource.get("id"), + source_path, + os.path.getsize(source_path), + ) + return source_path + + +def _copy_uploaded_resource(resource: dict[str, Any], destination_path: str, config: PreviewConfig) -> None: + resource_upload = uploader.get_resource_uploader(dict(resource)) + resource_id = resource["id"] + resource_name = os.path.basename(str(resource.get("url") or "")) or f"{resource_id}.dwg" + + local_path = None + try: + local_path = resource_upload.get_path(resource_id) + except TypeError: + local_path = None + except Exception as err: + log.debug("Failed to resolve local upload path for %s: %s", resource_id, err) + + if local_path and os.path.exists(local_path): + log.info("Copying uploaded DWG from local storage resource=%s path=%s", resource_id, local_path) + _copy_local_file(local_path, destination_path, config.max_download_bytes) + return + + if all(hasattr(resource_upload, attribute) for attribute in ("bucket_name", "get_path", "get_signed_url_to_key")): + _download_uploaded_resource_from_storage(resource_upload, resource_id, resource_name, destination_path, config) + return + + raise ValidationError({"id": ["Uploaded resource storage backend is not supported by convert_dwg"]}) + + +def _download_uploaded_resource_from_storage( + resource_upload: Any, + resource_id: str, + resource_name: str, + destination_path: str, + config: PreviewConfig, +) -> None: + use_readonly_credentials = bool( + getattr(resource_upload, "p_key_readonly", None) and getattr(resource_upload, "s_key_readonly", None) + ) + remote_key = None + try: + remote_key = resource_upload.get_path(resource_id, resource_name) + signed_url = resource_upload.get_signed_url_to_key(remote_key, read_only=use_readonly_credentials) + except Exception as err: + if use_readonly_credentials and remote_key: + try: + signed_url = resource_upload.get_signed_url_to_key(remote_key, read_only=False) + except Exception: + log.exception("Failed to resolve uploaded resource %s from remote storage", resource_id) + raise ValidationError({"id": [f"Could not resolve uploaded resource: {err}"]}) + else: + log.exception("Failed to resolve uploaded resource %s from remote storage", resource_id) + raise ValidationError({"id": [f"Could not resolve uploaded resource: {err}"]}) + + log.info("Downloading uploaded DWG from remote storage resource=%s key=%s", resource_id, remote_key) + _download_to_path( + signed_url, + destination_path, + max_download_bytes=config.max_download_bytes, + download_timeout=config.download_timeout, + source_label="uploaded DWG resource", + ) + + +def _copy_local_file(source_path: str, destination_path: str, max_download_bytes: int) -> None: + file_size = os.path.getsize(source_path) + if file_size > max_download_bytes: + raise ValidationError( + {"id": [f"DWG source file exceeds the maximum allowed size of {max_download_bytes} bytes"]} + ) + shutil.copyfile(source_path, destination_path) + + +def _download_to_path( + url: str, + destination_path: str, + max_download_bytes: int, + download_timeout: int, + source_label: str, +) -> None: + parsed_url = urlparse(url) + if parsed_url.scheme.lower() not in {"http", "https"}: + raise ValidationError({"id": [f"Unsupported URL scheme for {source_label}"]}) + + bytes_downloaded = 0 + try: + with requests.get(url, stream=True, timeout=(10, download_timeout), allow_redirects=True) as response: + response.raise_for_status() + with open(destination_path, "wb") as destination_file: + for chunk in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE): + if not chunk: + continue + bytes_downloaded += len(chunk) + if bytes_downloaded > max_download_bytes: + raise ValidationError( + {"id": [f"{source_label.capitalize()} exceeds the maximum allowed size of {max_download_bytes} bytes"]} + ) + destination_file.write(chunk) + except ValidationError: + if os.path.exists(destination_path): + os.remove(destination_path) + raise + except requests.RequestException as err: + if os.path.exists(destination_path): + os.remove(destination_path) + raise ValidationError({"id": [f"Could not download {source_label}: {err}"]}) + + log.info("Downloaded %s path=%s bytes=%s", source_label, destination_path, bytes_downloaded) + + +def _build_output_filename(resource: dict[str, Any]) -> str: + raw_name = ( + str(resource.get("name") or "").strip() + or os.path.basename(str(resource.get("url") or "").strip()) + or resource["id"] + ) + base_name = os.path.splitext(raw_name)[0] or resource["id"] + return f"{base_name}.{PNG_EXTENSION}" diff --git a/ckanext/malmo/logic/action.py b/ckanext/malmo/logic/action.py index 6f605a6..98bbba0 100644 --- a/ckanext/malmo/logic/action.py +++ b/ckanext/malmo/logic/action.py @@ -6,13 +6,13 @@ @toolkit.side_effect_free -def dwg_preview_convert(context, data_dict): +def convert_dwg(context, data_dict): """ - Convert a DWG resource into a previewable PDF payload. + Convert a DWG resource into a previewable PNG payload. This action returns a Python dictionary containing binary bytes for internal callers. The public HTTP endpoint is exposed via a Flask - blueprint at /api/3/action/dwg_preview_convert so CKAN can return the + blueprint at /api/3/action/convert_dwg so CKAN can return the preview directly instead of JSON-wrapping the response. """ return dwg_preview.build_preview_payload(context, data_dict) diff --git a/ckanext/malmo/plugin.py b/ckanext/malmo/plugin.py index 05a4375..8aed619 100644 --- a/ckanext/malmo/plugin.py +++ b/ckanext/malmo/plugin.py @@ -24,7 +24,7 @@ def update_config(self, config): def get_actions(self): return { - 'dwg_preview_convert': malmo_logic_actions.dwg_preview_convert, + 'convert_dwg': malmo_logic_actions.convert_dwg, 'package_update': malmo_actions.package_update, 'package_create': malmo_actions.package_create, 'package_patch': malmo_actions.package_patch, diff --git a/ckanext/malmo/views.py b/ckanext/malmo/views.py index f5d389c..bd963be 100644 --- a/ckanext/malmo/views.py +++ b/ckanext/malmo/views.py @@ -17,8 +17,8 @@ ValidationError = logic.ValidationError -@dwg_preview_blueprint.route("/api/3/action/dwg_preview_convert", methods=["GET", "POST"]) -def dwg_preview_convert() -> flask.Response: +@dwg_preview_blueprint.route("/api/3/action/convert_dwg", methods=["GET", "POST"]) +def convert_dwg() -> flask.Response: """ Binary endpoint that mirrors an action URL. @@ -30,7 +30,7 @@ def dwg_preview_convert() -> flask.Response: context = _build_context() try: - payload = toolkit.get_action("dwg_preview_convert")(context, data_dict) + payload = toolkit.get_action("convert_dwg")(context, data_dict) except ValidationError as err: return _validation_error_response(err) except Exception: @@ -98,6 +98,6 @@ def _help_url() -> str: "api.action", logic_function="help_show", ver=3, - name="dwg_preview_convert", + name="convert_dwg", _external=True, ) diff --git a/requirements.txt b/requirements.txt index fc6e4d3..393a103 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ html2text requests ezdxf -PyMuPDF +matplotlib +Pillow diff --git a/setup.py b/setup.py index c9631d1..ee6494f 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,8 @@ 'html2text', 'requests', 'ezdxf', - 'PyMuPDF', + 'matplotlib', + 'Pillow', ], entry_points=''' [ckan.plugins]