diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..185d1ab --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*.egg-info/ +.pytest_cache/ +.mypy_cache/ diff --git a/README.md b/README.md index 124475f..c9d4054 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,103 @@ # ckanext-malmo -Customizations for the City of Malmö CKAN instance. +Customizations for the City of Malmo CKAN instance. ## Requirements - CKAN 2.10+ (tested on 2.11) +## DWG Preview + +This extension exposes a binary preview action at: + +```text +/api/3/action/convert_dwg?id= +``` + +The preview flow is: + +1. stage the DWG resource into a temporary file +2. convert DWG -> DXF with ODA File Converter +3. render DXF -> PNG with `ezdxf` and `matplotlib` +4. cache the generated PNG by resource id + file hash +5. return the PNG directly from the CKAN endpoint + +Important runtime requirements: + +- ODA File Converter must be installed and available on `PATH` +- `xvfb` is used automatically when `xvfb-run` is available +- Python rendering dependencies must be installed in the CKAN runtime + +Python runtime dependencies: + +- `ezdxf` +- `matplotlib` + +System/runtime dependencies: + +- ODA File Converter Linux asset (`.AppImage` or `.deb`) +- `xvfb` + +## DWG Preview Configuration + +The DWG preview pipeline supports these CKAN config settings: + +- `ckanext.malmo.dwg_preview_timeout` + Conversion timeout in seconds. Default: `45`. + +- `ckanext.malmo.dwg_preview_download_timeout` + Download timeout in seconds for remote DWG resources. Default: `30`. + +- `ckanext.malmo.dwg_preview_max_download_bytes` + Maximum DWG download size in bytes. Default: `104857600`. + +- `ckanext.malmo.dwg_preview_oda_executable` + Absolute path or executable name for ODA File Converter. Default: `ODAFileConverter`. + +- `ckanext.malmo.dwg_preview_oda_output_version` + DXF target version passed to ODA File Converter. Default: `ACAD2018`. + +- `ckanext.malmo.dwg_preview_xvfb_screen` + Screen configuration passed to `xvfb-run` when launching ODA File Converter in headless Docker environments. Default: `-screen 0 1600x1200x24`. + +- `ckanext.malmo.dwg_preview_render_margin` + Extra margin applied around rendered geometry. Default: `0.05`. + +- `ckanext.malmo.dwg_preview_image_width` + Output preview width in pixels. Default: `1600`. + +- `ckanext.malmo.dwg_preview_image_height` + Output preview height in pixels. Default: `1200`. + +- `ckanext.malmo.dwg_preview_min_preview_bytes` + Minimum byte size for accepting a generated preview. Default: `1024`. + +- `ckanext.malmo.dwg_preview_cache_dir` + Directory used for cached PNG previews. Default: system temporary directory + `ckan-dwg-preview-cache`. + +## Docker Setup + +In the local development Docker setup: + +- ODA File Converter is installed during image build +- the local `src/ckanext-malmo` extension is installed at container startup from the mounted workspace +- `xvfb` is installed for headless ODA execution + +Optional local ODA asset override directory: + +```text +ckan/vendor/oda/ +``` + +Supported local asset formats: + +- `.AppImage` +- `.deb` + ## Installation To install `ckanext-malmo`: -1. Clone this repository (or copy the extension files). -2. Install the extension in your environment: - ```bash - pip install -e ckan/extensions/ckanext-malmo - ``` -3. Add `malmo` to the `ckan.plugins` setting in your CKAN configuration file (`ckan.ini`): - ```ini - ckan.plugins = ... malmo - ``` +1. Install the extension in your environment. +2. Install ODA File Converter and make sure `ODAFileConverter` is available in the runtime environment. +3. Add `malmo` to the `ckan.plugins` setting in your CKAN configuration file. diff --git a/ckanext/malmo/dwg_preview/__init__.py b/ckanext/malmo/dwg_preview/__init__.py new file mode 100644 index 0000000..b0fec6a --- /dev/null +++ b/ckanext/malmo/dwg_preview/__init__.py @@ -0,0 +1,3 @@ +from .service import build_preview_payload + +__all__ = ["build_preview_payload"] diff --git a/ckanext/malmo/dwg_preview/cache.py b/ckanext/malmo/dwg_preview/cache.py new file mode 100644 index 0000000..b5a5983 --- /dev/null +++ b/ckanext/malmo/dwg_preview/cache.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +import hashlib +import os +import shutil + + +def file_sha256(path: str) -> str: + digest = hashlib.sha256() + with open(path, "rb") as source_file: + for chunk in iter(lambda: source_file.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def build_cache_path(cache_dir: str, resource_id: str, source_hash: str) -> str: + cache_key = hashlib.sha256(f"{resource_id}:{source_hash}".encode("utf-8")).hexdigest() + return os.path.join(cache_dir, f"{cache_key}.png") + + +def is_cached_preview_valid(path: str, min_preview_bytes: int) -> bool: + return os.path.exists(path) and os.path.getsize(path) >= min_preview_bytes + + +def store_cached_preview(source_path: str, cache_path: str) -> None: + os.makedirs(os.path.dirname(cache_path), exist_ok=True) + temp_path = f"{cache_path}.tmp" + shutil.copyfile(source_path, temp_path) + os.replace(temp_path, cache_path) diff --git a/ckanext/malmo/dwg_preview/config.py b/ckanext/malmo/dwg_preview/config.py new file mode 100644 index 0000000..c42611e --- /dev/null +++ b/ckanext/malmo/dwg_preview/config.py @@ -0,0 +1,162 @@ +from __future__ import annotations + +import logging +import os +import tempfile +from dataclasses import dataclass + +from ckan.plugins import toolkit + +log = logging.getLogger(__name__) + +DEFAULT_TIMEOUT_SECONDS = 45 +DEFAULT_DOWNLOAD_TIMEOUT_SECONDS = 30 +DEFAULT_MAX_DOWNLOAD_BYTES = 100 * 1024 * 1024 +DEFAULT_ODA_OUTPUT_VERSION = "ACAD2018" +DEFAULT_XVFB_SCREEN = "-screen 0 1600x1200x24" +DEFAULT_RENDER_MARGIN = 0.05 +DEFAULT_IMAGE_WIDTH = 1600 +DEFAULT_IMAGE_HEIGHT = 1200 +DEFAULT_MIN_PREVIEW_BYTES = 1024 +DEFAULT_CACHE_DIR = os.path.join(tempfile.gettempdir(), "ckan-dwg-preview-cache") +DEFAULT_MIN_CONTENT_COVERAGE = 0.002 +DEFAULT_MAX_INITIAL_COVERAGE = 0.6 +DEFAULT_RETRY_RENDER_MARGIN = 0.01 +DEFAULT_LINEWEIGHT_SCALING = 1.5 +DEFAULT_MIN_OCCUPIED_WIDTH_RATIO = 0.2 +DEFAULT_MIN_OCCUPIED_HEIGHT_RATIO = 0.2 + + +@dataclass(frozen=True) +class PreviewConfig: + timeout: int + download_timeout: int + max_download_bytes: int + oda_executable: str + oda_output_version: str + xvfb_screen: str + render_margin: float + image_width: int + image_height: int + min_preview_bytes: int + cache_dir: str + min_content_coverage: float + max_initial_coverage: float + retry_render_margin: float + lineweight_scaling: float + min_occupied_width_ratio: float + min_occupied_height_ratio: float + + @classmethod + def from_ckan_config(cls) -> "PreviewConfig": + return cls( + timeout=_get_int("ckanext.malmo.dwg_preview_timeout", DEFAULT_TIMEOUT_SECONDS, minimum=1), + download_timeout=_get_int( + "ckanext.malmo.dwg_preview_download_timeout", + DEFAULT_DOWNLOAD_TIMEOUT_SECONDS, + minimum=1, + ), + max_download_bytes=_get_int( + "ckanext.malmo.dwg_preview_max_download_bytes", + DEFAULT_MAX_DOWNLOAD_BYTES, + minimum=1024, + ), + oda_executable=_get_string("ckanext.malmo.dwg_preview_oda_executable", "ODAFileConverter"), + oda_output_version=_get_string( + "ckanext.malmo.dwg_preview_oda_output_version", + DEFAULT_ODA_OUTPUT_VERSION, + ), + xvfb_screen=_get_string( + "ckanext.malmo.dwg_preview_xvfb_screen", + DEFAULT_XVFB_SCREEN, + ), + render_margin=_get_float( + "ckanext.malmo.dwg_preview_render_margin", + DEFAULT_RENDER_MARGIN, + minimum=0.0, + ), + image_width=_get_int( + "ckanext.malmo.dwg_preview_image_width", + DEFAULT_IMAGE_WIDTH, + minimum=256, + ), + image_height=_get_int( + "ckanext.malmo.dwg_preview_image_height", + DEFAULT_IMAGE_HEIGHT, + minimum=256, + ), + min_preview_bytes=_get_int( + "ckanext.malmo.dwg_preview_min_preview_bytes", + DEFAULT_MIN_PREVIEW_BYTES, + minimum=1, + ), + cache_dir=_get_string("ckanext.malmo.dwg_preview_cache_dir", DEFAULT_CACHE_DIR), + min_content_coverage=_get_float( + "ckanext.malmo.dwg_preview_min_content_coverage", + DEFAULT_MIN_CONTENT_COVERAGE, + minimum=0.00001, + ), + max_initial_coverage=_get_float( + "ckanext.malmo.dwg_preview_max_initial_coverage", + DEFAULT_MAX_INITIAL_COVERAGE, + minimum=0.001, + ), + retry_render_margin=_get_float( + "ckanext.malmo.dwg_preview_retry_render_margin", + DEFAULT_RETRY_RENDER_MARGIN, + minimum=0.0, + ), + lineweight_scaling=_get_float( + "ckanext.malmo.dwg_preview_lineweight_scaling", + DEFAULT_LINEWEIGHT_SCALING, + minimum=0.1, + ), + min_occupied_width_ratio=_get_float( + "ckanext.malmo.dwg_preview_min_occupied_width_ratio", + DEFAULT_MIN_OCCUPIED_WIDTH_RATIO, + minimum=0.0, + ), + min_occupied_height_ratio=_get_float( + "ckanext.malmo.dwg_preview_min_occupied_height_ratio", + DEFAULT_MIN_OCCUPIED_HEIGHT_RATIO, + minimum=0.0, + ), + ) + + +def _get_string(config_key: str, default_value: str) -> str: + raw_value = toolkit.config.get(config_key) + if raw_value in (None, ""): + return default_value + value = str(raw_value).strip() + return value or default_value + + +def _get_int(config_key: str, default_value: int, minimum: int | None = None) -> int: + raw_value = toolkit.config.get(config_key) + if raw_value in (None, ""): + return default_value + try: + value = int(raw_value) + except (TypeError, ValueError): + log.warning("Invalid integer config %s=%r, using default %s", config_key, raw_value, default_value) + return default_value + if minimum is not None and value < minimum: + log.warning("Config %s=%r is below minimum %s, using default %s", config_key, value, minimum, default_value) + return default_value + return value + + +def _get_float(config_key: str, default_value: float, minimum: float | None = None) -> float: + raw_value = toolkit.config.get(config_key) + if raw_value in (None, ""): + return default_value + try: + value = float(raw_value) + except (TypeError, ValueError): + log.warning("Invalid float config %s=%r, using default %s", config_key, raw_value, default_value) + return default_value + if minimum is not None and value < minimum: + log.warning("Config %s=%r is below minimum %s, using default %s", config_key, value, minimum, default_value) + return default_value + return value diff --git a/ckanext/malmo/dwg_preview/oda.py b/ckanext/malmo/dwg_preview/oda.py new file mode 100644 index 0000000..0988f95 --- /dev/null +++ b/ckanext/malmo/dwg_preview/oda.py @@ -0,0 +1,124 @@ +from __future__ import annotations + +import logging +import os +import shutil +import subprocess + +import ckan.logic as logic + +from .config import PreviewConfig + +log = logging.getLogger(__name__) + +ValidationError = logic.ValidationError + + +def convert_dwg_to_dxf(source_path: str, working_dir: str, config: PreviewConfig) -> str: + executable = _resolve_oda_executable(config.oda_executable) + input_dir = os.path.join(working_dir, "oda-input") + output_dir = os.path.join(working_dir, "oda-output") + os.makedirs(input_dir, exist_ok=True) + os.makedirs(output_dir, exist_ok=True) + + input_name = os.path.basename(source_path) + staged_input_path = os.path.join(input_dir, input_name) + shutil.copyfile(source_path, staged_input_path) + + command = _build_oda_command( + [ + executable, + input_dir, + output_dir, + config.oda_output_version, + "DXF", + "0", + "1", + "*.dwg", + ], + xvfb_screen=config.xvfb_screen, + ) + log.info("Running ODA File Converter command=%s", command) + result = _run_subprocess(command, timeout=config.timeout) + log.info( + "ODA File Converter finished code=%s stdout=%s stderr=%s", + result.returncode, + _decode_subprocess_output(result.stdout) or "", + _decode_subprocess_output(result.stderr) or "", + ) + + if result.returncode != 0: + raise ValidationError({"conversion": [_format_conversion_error("DWG to DXF conversion failed", result.stderr)]}) + + dxf_path = _find_generated_dxf(output_dir, input_name) + if not dxf_path or not os.path.exists(dxf_path) or os.path.getsize(dxf_path) == 0: + raise ValidationError({"conversion": ["DWG to DXF conversion did not produce a usable DXF file"]}) + + log.info("Generated DXF path=%s bytes=%s", dxf_path, os.path.getsize(dxf_path)) + return dxf_path + + +def _resolve_oda_executable(configured_path: str) -> str: + if os.path.isabs(configured_path): + if os.path.exists(configured_path) and os.access(configured_path, os.X_OK): + return configured_path + raise ValidationError({"converter": [f'Configured ODA File Converter is not executable: "{configured_path}"']}) + + resolved = shutil.which(configured_path) + if resolved: + return resolved + + raise ValidationError( + { + "converter": [ + 'ODA File Converter is not installed. Configure `ckanext.malmo.dwg_preview_oda_executable` or add `ODAFileConverter` to PATH.' + ] + } + ) + + +def _build_oda_command(oda_arguments: list[str], xvfb_screen: str) -> list[str]: + xvfb_run = shutil.which("xvfb-run") + if not xvfb_run: + return oda_arguments + return [xvfb_run, "-a", "-s", xvfb_screen, *oda_arguments] + + +def _run_subprocess(command: list[str], timeout: int) -> subprocess.CompletedProcess[bytes]: + try: + return subprocess.run( + command, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=False, + timeout=timeout, + ) + except subprocess.TimeoutExpired: + raise ValidationError({"conversion": [f"Conversion exceeded the timeout of {timeout} seconds"]}) + except OSError as err: + raise ValidationError({"conversion": [f"Conversion process failed to start: {err}"]}) + + +def _find_generated_dxf(output_dir: str, input_name: str) -> str | None: + expected_name = os.path.splitext(input_name)[0] + ".dxf" + expected_path = os.path.join(output_dir, expected_name) + if os.path.exists(expected_path): + return expected_path + + for root, _dirs, files in os.walk(output_dir): + for file_name in files: + if file_name.lower().endswith(".dxf"): + return os.path.join(root, file_name) + return None + + +def _decode_subprocess_output(output: bytes | None) -> str: + if not output: + return "" + return output.decode("utf-8", errors="replace").strip().splitlines()[0][:400] + + +def _format_conversion_error(prefix: str, stderr: bytes | None) -> str: + decoded_stderr = _decode_subprocess_output(stderr) + return f"{prefix}: {decoded_stderr}" if decoded_stderr else prefix diff --git a/ckanext/malmo/dwg_preview/render.py b/ckanext/malmo/dwg_preview/render.py new file mode 100644 index 0000000..0c5df00 --- /dev/null +++ b/ckanext/malmo/dwg_preview/render.py @@ -0,0 +1,354 @@ +from __future__ import annotations + +import logging +import os +import statistics +from dataclasses import dataclass +from typing import Any + +import ckan.logic as logic + +from .config import PreviewConfig + +log = logging.getLogger(__name__) + +ValidationError = logic.ValidationError + + +@dataclass(frozen=True) +class LayoutCandidate: + name: str + layout: Any + kind: str + entity_count: int + viewport_count: int + text_hint_count: int + frame_hint_count: int + + +@dataclass(frozen=True) +class RenderedPreviewMetrics: + coverage: float + occupied_width: int + occupied_height: int + bbox: tuple[int, int, int, int] | None + + +def render_dxf_to_png(dxf_path: str, output_path: str, config: PreviewConfig) -> None: + document = _load_dxf_document(dxf_path) + last_error: ValidationError | None = None + + for candidate in _iter_layout_candidates(document): + try: + _render_layout(document, candidate, output_path, config) + _validate_preview(output_path, candidate.name, config) + log.info("Rendered preview accepted layout=%s bytes=%s", candidate.name, os.path.getsize(output_path)) + return + except ValidationError as err: + last_error = err + log.warning("DWG preview layout render failed layout=%s error=%s", candidate.name, err.error_dict) + + if last_error is not None: + raise last_error + raise ValidationError({"conversion": ["Preview is currently unavailable for this drawing."]}) + + +def _load_dxf_document(dxf_path: str) -> Any: + try: + import ezdxf + from ezdxf import recover + except ImportError as err: + raise ValidationError({"converter": [f"DXF renderer dependency is not installed: {err}"]}) + + try: + document = ezdxf.readfile(dxf_path) + log.info("Loaded DXF document path=%s using fast read path", dxf_path) + return document + except Exception as fast_err: + log.warning("Fast DXF load failed for %s, retrying recovery path: %s", dxf_path, fast_err) + + try: + document, auditor = recover.readfile(dxf_path) + except Exception as err: + raise ValidationError({"conversion": [f"Generated DXF could not be parsed: {err}"]}) + + log.info( + "Loaded DXF document path=%s auditor_errors=%s auditor_fixes=%s", + dxf_path, + len(getattr(auditor, "errors", [])), + len(getattr(auditor, "fixes", [])), + ) + return document + + +def _iter_layout_candidates(document: Any) -> list[LayoutCandidate]: + candidates: list[LayoutCandidate] = [] + layout_names = getattr(document, "layout_names_in_taborder", None) + modelspace_name = str(getattr(document.modelspace(), "name", "Model")) + + if callable(layout_names): + for layout_name in layout_names(): + if str(layout_name).lower() == modelspace_name.lower(): + continue + try: + layout = document.paperspace(layout_name) + except Exception as err: + log.warning("Skipping paperspace layout=%s because it could not be loaded: %s", layout_name, err) + continue + candidate = _build_layout_candidate(str(layout_name), layout, "paperspace") + if candidate.entity_count > 0: + candidates.append(candidate) + + modelspace = document.modelspace() + model_candidate = _build_layout_candidate(getattr(modelspace, "name", "Model"), modelspace, "modelspace") + if model_candidate.entity_count > 0: + candidates.append(model_candidate) + return sorted(candidates, key=_layout_priority, reverse=True) + + +def _build_layout_candidate(layout_name: str, layout: Any, kind: str) -> LayoutCandidate: + entity_types = [dxftype for dxftype in _iter_entity_types(layout)] + viewport_count = entity_types.count("VIEWPORT") + text_hint_count = sum(1 for dxftype in entity_types if dxftype in {"TEXT", "MTEXT", "ATTRIB", "ATTDEF"}) + frame_hint_count = sum(1 for dxftype in entity_types if dxftype in {"LWPOLYLINE", "POLYLINE", "LINE"}) + return LayoutCandidate( + name=layout_name, + layout=layout, + kind=kind, + entity_count=len(entity_types), + viewport_count=viewport_count, + text_hint_count=text_hint_count, + frame_hint_count=frame_hint_count, + ) + + +def _layout_priority(candidate: LayoutCandidate) -> tuple[int, int, int, int]: + is_paperspace = 1 if candidate.kind == "paperspace" else 0 + return ( + is_paperspace, + candidate.viewport_count, + candidate.text_hint_count, + candidate.entity_count, + ) + + +def _render_layout(document: Any, candidate: LayoutCandidate, output_path: str, config: PreviewConfig) -> None: + layout = candidate.layout + layout_name = candidate.name + if candidate.entity_count <= 0: + raise ValidationError({"conversion": [f'Layout "{layout_name}" does not contain drawable entities']}) + + try: + import matplotlib + + matplotlib.use("Agg") + + import matplotlib.pyplot as plt + from ezdxf.addons.drawing import Frontend, RenderContext, config as drawing_config + from ezdxf.addons.drawing.matplotlib import MatplotlibBackend + from ezdxf.addons.drawing.recorder import Recorder + except ImportError as err: + raise ValidationError({"converter": [f"PNG rendering dependency is not installed: {err}"]}) + + dpi = 100 + figure = plt.figure(figsize=(config.image_width / dpi, config.image_height / dpi), dpi=dpi) + axis = figure.add_axes([0, 0, 1, 1]) + axis.set_axis_off() + axis.set_facecolor("white") + figure.patch.set_facecolor("white") + + try: + frontend_config = drawing_config.Configuration( + background_policy=drawing_config.BackgroundPolicy.WHITE, + color_policy=drawing_config.ColorPolicy.BLACK, + lineweight_scaling=config.lineweight_scaling, + ) + recorder = Recorder() + frontend = Frontend(RenderContext(document), recorder, config=frontend_config) + frontend.draw_layout(layout, finalize=True) + player = recorder.player() + content_bbox = _resolve_content_bbox(player, candidate, config) + if not content_bbox.has_data: + raise ValidationError({"conversion": [f'Layout "{layout_name}" does not contain visible drawable bounds']}) + + backend = MatplotlibBackend(axis) + player.replay(backend) + _set_axis_limits(axis, content_bbox, config.render_margin) + axis.set_aspect("equal", adjustable="datalim") + figure.savefig( + output_path, + format="png", + dpi=dpi, + bbox_inches=None, + pad_inches=0, + facecolor="white", + edgecolor="white", + ) + metrics = _measure_rendered_preview(output_path) + if not _is_preview_coverage_acceptable(metrics, config): + if candidate.kind == "paperspace": + tighter_bbox = _crop_bbox(content_bbox, config.retry_render_margin) + axis.cla() + axis.set_axis_off() + axis.set_facecolor("white") + backend = MatplotlibBackend(axis) + player.replay(backend) + _set_axis_limits(axis, tighter_bbox, config.retry_render_margin) + axis.set_aspect("equal", adjustable="datalim") + figure.savefig( + output_path, + format="png", + dpi=dpi, + bbox_inches=None, + pad_inches=0, + facecolor="white", + edgecolor="white", + ) + metrics = _measure_rendered_preview(output_path) + + if not _is_preview_coverage_acceptable(metrics, config): + raise ValidationError( + { + "conversion": [f'Rendered preview for layout "{layout_name}" occupies too little of the image'], + "preview_reason": ["preview_too_sparse"], + } + ) + except ValidationError: + raise + except Exception as err: + raise ValidationError({"conversion": [f'DXF raster rendering failed for layout "{layout_name}": {err}']}) + finally: + plt.close(figure) + + +def _validate_preview(output_path: str, layout_name: str, config: PreviewConfig) -> None: + if not os.path.exists(output_path) or os.path.getsize(output_path) == 0: + raise ValidationError({"conversion": [f'Renderer produced no output for layout "{layout_name}"']}) + if os.path.getsize(output_path) < config.min_preview_bytes: + raise ValidationError( + {"conversion": [f'Rendered preview for layout "{layout_name}" is too small to be trustworthy']} + ) + + +def _count_entities(layout: Any) -> int: + try: + return sum(1 for _entity in layout) + except TypeError: + return len(list(layout)) + + +def _iter_entity_types(layout: Any) -> list[str]: + types: list[str] = [] + for entity in layout: + try: + types.append(str(entity.dxftype()).upper()) + except Exception: + continue + return types + + +def _resolve_content_bbox(player: Any, candidate: LayoutCandidate, config: PreviewConfig) -> Any: + full_bbox = player.bbox() + if not full_bbox.has_data: + return full_bbox + + if candidate.kind != "paperspace": + return full_bbox + + width = max(float(full_bbox.extmax.x - full_bbox.extmin.x), 1.0) + height = max(float(full_bbox.extmax.y - full_bbox.extmin.y), 1.0) + area = width * height + if area <= 0: + return full_bbox + + # For paperspace layouts, reduce the chance of one stray entity making the + # full sheet look empty by cropping lightly toward the center when the bbox + # is unusually loose. + cropped = _crop_bbox(full_bbox, config.retry_render_margin) + cropped_width = max(float(cropped.extmax.x - cropped.extmin.x), 1.0) + cropped_height = max(float(cropped.extmax.y - cropped.extmin.y), 1.0) + cropped_area = cropped_width * cropped_height + if cropped_area / area < config.max_initial_coverage: + return cropped + return full_bbox + + +def _set_axis_limits(axis: Any, content_bbox: Any, render_margin: float) -> None: + extmin = content_bbox.extmin + extmax = content_bbox.extmax + width = max(float(extmax.x - extmin.x), 1.0) + height = max(float(extmax.y - extmin.y), 1.0) + pad_x = max(width * render_margin, 1.0) + pad_y = max(height * render_margin, 1.0) + + axis.set_xlim(float(extmin.x - pad_x), float(extmax.x + pad_x)) + axis.set_ylim(float(extmin.y - pad_y), float(extmax.y + pad_y)) + + +def _crop_bbox(content_bbox: Any, render_margin: float) -> Any: + try: + from ezdxf.math import BoundingBox2d + except ImportError: + return content_bbox + + extmin = content_bbox.extmin + extmax = content_bbox.extmax + width = max(float(extmax.x - extmin.x), 1.0) + height = max(float(extmax.y - extmin.y), 1.0) + shrink_x = width * min(max(render_margin, 0.0), 0.2) + shrink_y = height * min(max(render_margin, 0.0), 0.2) + return BoundingBox2d( + [ + (float(extmin.x + shrink_x), float(extmin.y + shrink_y)), + (float(extmax.x - shrink_x), float(extmax.y - shrink_y)), + ] + ) + + +def _measure_rendered_preview(output_path: str) -> RenderedPreviewMetrics: + try: + from PIL import Image + except ImportError as err: + raise ValidationError({"converter": [f"Image validation dependency is not installed: {err}"]}) + + with Image.open(output_path) as image: + grayscale = image.convert("L") + width, height = grayscale.size + pixels = grayscale.load() + occupied: list[tuple[int, int]] = [] + for y in range(height): + for x in range(width): + if pixels[x, y] < 245: + occupied.append((x, y)) + + if not occupied: + return RenderedPreviewMetrics(coverage=0.0, occupied_width=0, occupied_height=0, bbox=None) + + xs = [point[0] for point in occupied] + ys = [point[1] for point in occupied] + min_x = min(xs) + max_x = max(xs) + min_y = min(ys) + max_y = max(ys) + occupied_width = max_x - min_x + 1 + occupied_height = max_y - min_y + 1 + coverage = len(occupied) / float(width * height) + + return RenderedPreviewMetrics( + coverage=coverage, + occupied_width=occupied_width, + occupied_height=occupied_height, + bbox=(min_x, min_y, max_x, max_y), + ) + + +def _is_preview_coverage_acceptable(metrics: RenderedPreviewMetrics, config: PreviewConfig) -> bool: + if metrics.coverage >= config.min_content_coverage: + return True + + width_ratio = metrics.occupied_width / float(config.image_width) if config.image_width else 0.0 + height_ratio = metrics.occupied_height / float(config.image_height) if config.image_height else 0.0 + return ( + width_ratio >= config.min_occupied_width_ratio + and height_ratio >= config.min_occupied_height_ratio + ) diff --git a/ckanext/malmo/dwg_preview/service.py b/ckanext/malmo/dwg_preview/service.py new file mode 100644 index 0000000..2d96786 --- /dev/null +++ b/ckanext/malmo/dwg_preview/service.py @@ -0,0 +1,267 @@ +from __future__ import annotations + +import logging +import os +import shutil +from tempfile import TemporaryDirectory +from typing import Any +from urllib.parse import urlparse + +import requests + +import ckan.lib.uploader as uploader +import ckan.logic as logic +from ckan.plugins import toolkit + +from .cache import build_cache_path, file_sha256, is_cached_preview_valid, store_cached_preview +from .config import PreviewConfig +from .oda import convert_dwg_to_dxf +from .render import render_dxf_to_png + +log = logging.getLogger(__name__) + +ValidationError = logic.ValidationError +NotAuthorized = logic.NotAuthorized +NotFound = logic.NotFound + +DWG_MIME_TYPES = { + "application/acad", + "application/autocad_dwg", + "application/dwg", + "application/x-acad", + "application/x-autocad", + "application/x-dwg", + "image/vnd.dwg", + "image/x-dwg", +} +DOWNLOAD_CHUNK_SIZE = 64 * 1024 +PNG_MIMETYPE = "image/png" +PNG_EXTENSION = "png" + + +def build_preview_payload(context: dict[str, Any], data_dict: dict[str, Any]) -> dict[str, Any]: + resource_id = (data_dict or {}).get("id") + if not resource_id: + raise ValidationError({"id": ["Missing value"]}) + + config = PreviewConfig.from_ckan_config() + resource = _get_resource_for_preview(context, resource_id) + if not _is_dwg_resource(resource): + raise ValidationError({"id": ["Resource must be a DWG file to generate a preview"]}) + + log.info("DWG preview requested for resource=%s format=png", resource_id) + + with TemporaryDirectory(prefix="ckan-dwg-preview-") as tmp_dir: + source_path = _stage_resource_dwg(resource, tmp_dir, config) + source_hash = file_sha256(source_path) + cache_path = build_cache_path(config.cache_dir, resource_id, source_hash) + + if is_cached_preview_valid(cache_path, config.min_preview_bytes): + log.info("Serving cached DWG preview resource=%s cache=%s", resource_id, cache_path) + content = _read_file(cache_path) + else: + content = _generate_preview(resource_id, source_path, tmp_dir, cache_path, config) + + return { + "content": content, + "filename": _build_output_filename(resource), + "mimetype": PNG_MIMETYPE, + "resource_id": resource_id, + } + + +def _generate_preview( + resource_id: str, + source_path: str, + tmp_dir: str, + cache_path: str, + config: PreviewConfig, +) -> bytes: + dxf_path = convert_dwg_to_dxf(source_path, tmp_dir, config) + preview_path = os.path.join(tmp_dir, "preview.png") + render_dxf_to_png(dxf_path, preview_path, config) + store_cached_preview(preview_path, cache_path) + log.info("DWG preview generated resource=%s path=%s cache=%s", resource_id, preview_path, cache_path) + return _read_file(preview_path) + + +def _read_file(path: str) -> bytes: + with open(path, "rb") as output_file: + return output_file.read() + + +def _get_resource_for_preview(context: dict[str, Any], resource_id: str) -> dict[str, Any]: + try: + return toolkit.get_action("resource_show")(context, {"id": resource_id}) + except NotFound: + raise ValidationError({"id": ["Resource does not exist"]}) + except NotAuthorized: + raise ValidationError({"id": ["User cannot view this resource"]}) + + +def _is_dwg_resource(resource: dict[str, Any]) -> bool: + resource_format = str(resource.get("format") or "").strip().lower() + if resource_format: + normalized_format = resource_format.lstrip(".") + if normalized_format == "dwg" or "dwg" in normalized_format: + return True + + for mime_field in ("mimetype", "mimetype_inner"): + mimetype_value = str(resource.get(mime_field) or "").strip().lower() + if mimetype_value in DWG_MIME_TYPES or mimetype_value.endswith("/dwg"): + return True + + for path_field in ("url", "name"): + raw_value = str(resource.get(path_field) or "") + extension = os.path.splitext(urlparse(raw_value).path)[1].lower() + if extension == ".dwg": + return True + + return False + + +def _stage_resource_dwg(resource: dict[str, Any], tmp_dir: str, config: PreviewConfig) -> str: + source_path = os.path.join(tmp_dir, "source.dwg") + + if resource.get("url_type") == "upload": + log.info("Preparing uploaded DWG resource=%s", resource.get("id")) + _copy_uploaded_resource(resource, source_path, config) + else: + resource_url = str(resource.get("url") or "").strip() + if not resource_url: + raise ValidationError({"id": ["Resource does not have a downloadable URL"]}) + log.info("Downloading external DWG resource=%s url=%s", resource.get("id"), resource_url) + _download_to_path( + resource_url, + source_path, + max_download_bytes=config.max_download_bytes, + download_timeout=config.download_timeout, + source_label="external DWG resource", + ) + + if not os.path.exists(source_path) or os.path.getsize(source_path) == 0: + raise ValidationError({"id": ["DWG source file could not be prepared"]}) + + log.info( + "Prepared DWG source resource=%s path=%s bytes=%s", + resource.get("id"), + source_path, + os.path.getsize(source_path), + ) + return source_path + + +def _copy_uploaded_resource(resource: dict[str, Any], destination_path: str, config: PreviewConfig) -> None: + resource_upload = uploader.get_resource_uploader(dict(resource)) + resource_id = resource["id"] + resource_name = os.path.basename(str(resource.get("url") or "")) or f"{resource_id}.dwg" + + local_path = None + try: + local_path = resource_upload.get_path(resource_id) + except TypeError: + local_path = None + except Exception as err: + log.debug("Failed to resolve local upload path for %s: %s", resource_id, err) + + if local_path and os.path.exists(local_path): + log.info("Copying uploaded DWG from local storage resource=%s path=%s", resource_id, local_path) + _copy_local_file(local_path, destination_path, config.max_download_bytes) + return + + if all(hasattr(resource_upload, attribute) for attribute in ("bucket_name", "get_path", "get_signed_url_to_key")): + _download_uploaded_resource_from_storage(resource_upload, resource_id, resource_name, destination_path, config) + return + + raise ValidationError({"id": ["Uploaded resource storage backend is not supported by convert_dwg"]}) + + +def _download_uploaded_resource_from_storage( + resource_upload: Any, + resource_id: str, + resource_name: str, + destination_path: str, + config: PreviewConfig, +) -> None: + use_readonly_credentials = bool( + getattr(resource_upload, "p_key_readonly", None) and getattr(resource_upload, "s_key_readonly", None) + ) + remote_key = None + try: + remote_key = resource_upload.get_path(resource_id, resource_name) + signed_url = resource_upload.get_signed_url_to_key(remote_key, read_only=use_readonly_credentials) + except Exception as err: + if use_readonly_credentials and remote_key: + try: + signed_url = resource_upload.get_signed_url_to_key(remote_key, read_only=False) + except Exception: + log.exception("Failed to resolve uploaded resource %s from remote storage", resource_id) + raise ValidationError({"id": [f"Could not resolve uploaded resource: {err}"]}) + else: + log.exception("Failed to resolve uploaded resource %s from remote storage", resource_id) + raise ValidationError({"id": [f"Could not resolve uploaded resource: {err}"]}) + + log.info("Downloading uploaded DWG from remote storage resource=%s key=%s", resource_id, remote_key) + _download_to_path( + signed_url, + destination_path, + max_download_bytes=config.max_download_bytes, + download_timeout=config.download_timeout, + source_label="uploaded DWG resource", + ) + + +def _copy_local_file(source_path: str, destination_path: str, max_download_bytes: int) -> None: + file_size = os.path.getsize(source_path) + if file_size > max_download_bytes: + raise ValidationError( + {"id": [f"DWG source file exceeds the maximum allowed size of {max_download_bytes} bytes"]} + ) + shutil.copyfile(source_path, destination_path) + + +def _download_to_path( + url: str, + destination_path: str, + max_download_bytes: int, + download_timeout: int, + source_label: str, +) -> None: + parsed_url = urlparse(url) + if parsed_url.scheme.lower() not in {"http", "https"}: + raise ValidationError({"id": [f"Unsupported URL scheme for {source_label}"]}) + + bytes_downloaded = 0 + try: + with requests.get(url, stream=True, timeout=(10, download_timeout), allow_redirects=True) as response: + response.raise_for_status() + with open(destination_path, "wb") as destination_file: + for chunk in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE): + if not chunk: + continue + bytes_downloaded += len(chunk) + if bytes_downloaded > max_download_bytes: + raise ValidationError( + {"id": [f"{source_label.capitalize()} exceeds the maximum allowed size of {max_download_bytes} bytes"]} + ) + destination_file.write(chunk) + except ValidationError: + if os.path.exists(destination_path): + os.remove(destination_path) + raise + except requests.RequestException as err: + if os.path.exists(destination_path): + os.remove(destination_path) + raise ValidationError({"id": [f"Could not download {source_label}: {err}"]}) + + log.info("Downloaded %s path=%s bytes=%s", source_label, destination_path, bytes_downloaded) + + +def _build_output_filename(resource: dict[str, Any]) -> str: + raw_name = ( + str(resource.get("name") or "").strip() + or os.path.basename(str(resource.get("url") or "").strip()) + or resource["id"] + ) + base_name = os.path.splitext(raw_name)[0] or resource["id"] + return f"{base_name}.{PNG_EXTENSION}" diff --git a/ckanext/malmo/logic/action.py b/ckanext/malmo/logic/action.py new file mode 100644 index 0000000..98bbba0 --- /dev/null +++ b/ckanext/malmo/logic/action.py @@ -0,0 +1,18 @@ +from __future__ import annotations + +from ckan.plugins import toolkit + +from ckanext.malmo import dwg_preview + + +@toolkit.side_effect_free +def convert_dwg(context, data_dict): + """ + Convert a DWG resource into a previewable PNG payload. + + This action returns a Python dictionary containing binary bytes for + internal callers. The public HTTP endpoint is exposed via a Flask + blueprint at /api/3/action/convert_dwg so CKAN can return the + preview directly instead of JSON-wrapping the response. + """ + return dwg_preview.build_preview_payload(context, data_dict) diff --git a/ckanext/malmo/plugin.py b/ckanext/malmo/plugin.py index 25ba943..8aed619 100644 --- a/ckanext/malmo/plugin.py +++ b/ckanext/malmo/plugin.py @@ -3,6 +3,8 @@ from ckan.plugins import toolkit from ckanext.malmo import actions as malmo_actions +from ckanext.malmo.logic import action as malmo_logic_actions +from ckanext.malmo import views as malmo_views log = logging.getLogger(__name__) @@ -11,6 +13,7 @@ class MalmoPlugin(plugins.SingletonPlugin): plugins.implements(plugins.IPackageController, inherit=True) plugins.implements(plugins.IConfigurer) plugins.implements(plugins.IActions) + plugins.implements(plugins.IBlueprint) def update_config(self, config): """ @@ -21,6 +24,7 @@ def update_config(self, config): def get_actions(self): return { + 'convert_dwg': malmo_logic_actions.convert_dwg, 'package_update': malmo_actions.package_update, 'package_create': malmo_actions.package_create, 'package_patch': malmo_actions.package_patch, @@ -38,3 +42,6 @@ def get_actions(self): 'group_patch': malmo_actions.group_patch, 'group_show': malmo_actions.group_show, } + + def get_blueprint(self): + return malmo_views.get_blueprints() diff --git a/ckanext/malmo/views.py b/ckanext/malmo/views.py new file mode 100644 index 0000000..bd963be --- /dev/null +++ b/ckanext/malmo/views.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +import logging +from typing import Any + +import flask +from flask_login import current_user + +import ckan.logic as logic +import ckan.model as model +from ckan.plugins import toolkit + +log = logging.getLogger(__name__) + +dwg_preview_blueprint = flask.Blueprint("malmo_dwg_preview", __name__) + +ValidationError = logic.ValidationError + + +@dwg_preview_blueprint.route("/api/3/action/convert_dwg", methods=["GET", "POST"]) +def convert_dwg() -> flask.Response: + """ + Binary endpoint that mirrors an action URL. + + CKAN 2.11 wraps normal action responses in JSON, so this blueprint exposes + the same action name as a concrete Flask route and returns the preview bytes + directly. + """ + data_dict = _get_request_data() + context = _build_context() + + try: + payload = toolkit.get_action("convert_dwg")(context, data_dict) + except ValidationError as err: + return _validation_error_response(err) + except Exception: + log.exception("Unexpected error while generating DWG preview") + return flask.jsonify( + { + "help": _help_url(), + "success": False, + "error": { + "__type": "Internal Server Error", + "message": "Internal Server Error", + }, + } + ), 500 + + response = flask.Response(payload["content"], mimetype=payload["mimetype"]) + response.headers["Content-Disposition"] = f'inline; filename="{payload["filename"]}"' + response.headers["Cache-Control"] = "private, no-store, max-age=0" + return response + + +def get_blueprints(): + return [dwg_preview_blueprint] + + +def _build_context() -> dict[str, Any]: + is_authenticated = bool(getattr(current_user, "is_authenticated", False)) + return { + "model": model, + "session": model.Session, + "user": current_user.name if is_authenticated else "", + "auth_user_obj": current_user if is_authenticated else None, + } + + +def _get_request_data() -> dict[str, Any]: + if flask.request.method == "GET": + return flask.request.args.to_dict(flat=True) + + if flask.request.is_json: + payload = flask.request.get_json(silent=True) + if isinstance(payload, dict): + return payload + + return flask.request.form.to_dict(flat=True) + + +def _validation_error_response(error: ValidationError) -> tuple[flask.Response, int]: + error_dict = dict(error.error_dict) + error_dict["__type"] = "Validation Error" + return ( + flask.jsonify( + { + "help": _help_url(), + "success": False, + "error": error_dict, + } + ), + 409, + ) + + +def _help_url() -> str: + return toolkit.url_for( + "api.action", + logic_function="help_show", + ver=3, + name="convert_dwg", + _external=True, + ) diff --git a/requirements.txt b/requirements.txt index 44b9834..393a103 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,5 @@ -html2text \ No newline at end of file +html2text +requests +ezdxf +matplotlib +Pillow diff --git a/setup.py b/setup.py index 6069b46..ee6494f 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,11 @@ include_package_data=True, zip_safe=False, install_requires=[ - # list of dependencies + 'html2text', + 'requests', + 'ezdxf', + 'matplotlib', + 'Pillow', ], entry_points=''' [ckan.plugins]