diff --git a/dev-docs/architecture/overview.md b/dev-docs/architecture/overview.md index 5d20b00..7451903 100644 --- a/dev-docs/architecture/overview.md +++ b/dev-docs/architecture/overview.md @@ -98,10 +98,11 @@ CLI entry point `validate` - `edit.py` contains the Phase 2 editing parser, JSON serialization helpers, and wrappers around `exstruct.edit` -- `exstruct.__init__`, `exstruct.edit.__init__`, and lightweight CLI startup - paths must remain side-effect-free: `--help` and `ops` routing should defer - heavy extraction/edit implementation imports until command execution needs - them +- `exstruct.__init__`, `exstruct.edit.__init__`, `exstruct.engine`, and + lightweight CLI startup paths must remain side-effect-free where practical: + `--help` and `ops` routing should defer heavy extraction/edit implementation + imports until command execution needs them, and importing `exstruct.engine` + should not eagerly load extraction/render runtime dependencies ### edit/ diff --git a/src/exstruct/engine.py b/src/exstruct/engine.py index 84fd0da..5e39dc4 100644 --- a/src/exstruct/engine.py +++ b/src/exstruct/engine.py @@ -15,21 +15,207 @@ validate_libreoffice_extraction_request, validate_libreoffice_process_request, ) -from .core import cells as _cells -from .core.cells import set_table_detection_params -from .core.integrate import extract_workbook -from .io import ( - save_auto_page_break_views, - save_print_area_views, - save_sheets, - serialize_workbook, -) -from .models import SheetData, WorkbookData, convert_workbook_keys_to_alpha -from .render import export_pdf, export_sheet_images +from .models import SheetData, WorkbookData ExtractionMode = Literal["light", "libreoffice", "standard", "verbose"] +def set_table_detection_params( + *, + table_score_threshold: float | None = None, + density_min: float | None = None, + coverage_min: float | None = None, + min_nonempty_cells: int | None = None, +) -> None: + """Lazily proxy table-detection configuration updates.""" + from .core.cells import ( + set_table_detection_params as set_table_detection_params_impl, + ) + + set_table_detection_params_impl( + table_score_threshold=table_score_threshold, + density_min=density_min, + coverage_min=coverage_min, + min_nonempty_cells=min_nonempty_cells, + ) + + +def extract_workbook( + file_path: str | Path, + mode: ExtractionMode = "standard", + *, + include_cell_links: bool | None = None, + include_print_areas: bool | None = None, + include_auto_page_breaks: bool = False, + include_colors_map: bool | None = None, + include_default_background: bool = False, + ignore_colors: set[str] | None = None, + include_formulas_map: bool | None = None, + include_merged_cells: bool | None = None, + include_merged_values_in_rows: bool = True, +) -> WorkbookData: + """Lazily proxy workbook extraction.""" + from .core.integrate import extract_workbook as extract_workbook_impl + + return extract_workbook_impl( + file_path, + mode=mode, + include_cell_links=include_cell_links, + include_print_areas=include_print_areas, + include_auto_page_breaks=include_auto_page_breaks, + include_colors_map=include_colors_map, + include_default_background=include_default_background, + ignore_colors=ignore_colors, + include_formulas_map=include_formulas_map, + include_merged_cells=include_merged_cells, + include_merged_values_in_rows=include_merged_values_in_rows, + ) + + +def convert_workbook_keys_to_alpha(workbook: WorkbookData) -> WorkbookData: + """Lazily proxy workbook key conversion.""" + from .models import ( + convert_workbook_keys_to_alpha as convert_workbook_keys_to_alpha_impl, + ) + + return convert_workbook_keys_to_alpha_impl(workbook) + + +def serialize_workbook( + model: WorkbookData, + fmt: Literal["json", "yaml", "yml", "toon"] = "json", + *, + pretty: bool = False, + indent: int | None = None, + include_backend_metadata: bool = False, +) -> str: + """Lazily proxy workbook serialization.""" + from .io import serialize_workbook as serialize_workbook_impl + + return serialize_workbook_impl( + model, + fmt=fmt, + pretty=pretty, + indent=indent, + include_backend_metadata=include_backend_metadata, + ) + + +def save_sheets( + workbook: WorkbookData, + output_dir: Path, + fmt: Literal["json", "yaml", "yml", "toon"] = "json", + *, + pretty: bool = False, + indent: int | None = None, + include_backend_metadata: bool = False, +) -> dict[str, Path]: + """Lazily proxy per-sheet export.""" + from .io import save_sheets as save_sheets_impl + + return save_sheets_impl( + workbook, + output_dir, + fmt=fmt, + pretty=pretty, + indent=indent, + include_backend_metadata=include_backend_metadata, + ) + + +def save_print_area_views( + workbook: WorkbookData, + output_dir: Path, + fmt: Literal["json", "yaml", "yml", "toon"] = "json", + *, + pretty: bool = False, + indent: int | None = None, + normalize: bool = False, + include_shapes: bool = True, + include_charts: bool = True, + include_shape_size: bool = True, + include_chart_size: bool = True, + include_backend_metadata: bool = False, +) -> dict[str, Path]: + """Lazily proxy print-area export.""" + from .io import save_print_area_views as save_print_area_views_impl + + return save_print_area_views_impl( + workbook, + output_dir, + fmt=fmt, + pretty=pretty, + indent=indent, + normalize=normalize, + include_shapes=include_shapes, + include_charts=include_charts, + include_shape_size=include_shape_size, + include_chart_size=include_chart_size, + include_backend_metadata=include_backend_metadata, + ) + + +def save_auto_page_break_views( + workbook: WorkbookData, + output_dir: Path, + fmt: Literal["json", "yaml", "yml", "toon"] = "json", + *, + pretty: bool = False, + indent: int | None = None, + normalize: bool = False, + include_shapes: bool = True, + include_charts: bool = True, + include_shape_size: bool = True, + include_chart_size: bool = True, + include_backend_metadata: bool = False, +) -> dict[str, Path]: + """Lazily proxy auto page-break export.""" + from .io import ( + save_auto_page_break_views as save_auto_page_break_views_impl, + ) + + return save_auto_page_break_views_impl( + workbook, + output_dir, + fmt=fmt, + pretty=pretty, + indent=indent, + normalize=normalize, + include_shapes=include_shapes, + include_charts=include_charts, + include_shape_size=include_shape_size, + include_chart_size=include_chart_size, + include_backend_metadata=include_backend_metadata, + ) + + +def export_pdf(excel_path: str | Path, output_pdf: str | Path) -> list[str]: + """Lazily proxy PDF rendering.""" + from .render import export_pdf as export_pdf_impl + + return export_pdf_impl(excel_path, output_pdf) + + +def export_sheet_images( + excel_path: str | Path, + output_dir: str | Path, + dpi: int = 144, + *, + sheet: str | None = None, + a1_range: str | None = None, +) -> list[Path]: + """Lazily proxy sheet image rendering.""" + from .render import export_sheet_images as export_sheet_images_impl + + return export_sheet_images_impl( + excel_path, + output_dir, + dpi=dpi, + sheet=sheet, + a1_range=a1_range, + ) + + class TableParams(TypedDict, total=False): """Table detection parameter overrides.""" @@ -240,7 +426,9 @@ def _table_params_scope(self) -> Iterator[None]: if not self.options.table_params: yield return - prev = cast(TableParams, dict(_cells._DETECTION_CONFIG)) + from .core import cells as cells_module + + prev = cast(TableParams, dict(cells_module._DETECTION_CONFIG)) set_table_detection_params(**self.options.table_params) try: yield @@ -496,6 +684,7 @@ def serialize( use_fmt = fmt or self.output.format.fmt use_pretty = self.output.format.pretty if pretty is None else pretty use_indent = self.output.format.indent if indent is None else indent + return serialize_workbook( filtered, fmt=use_fmt, diff --git a/tests/cli/test_cli_lazy_imports.py b/tests/cli/test_cli_lazy_imports.py index e305861..6fbb9ce 100644 --- a/tests/cli/test_cli_lazy_imports.py +++ b/tests/cli/test_cli_lazy_imports.py @@ -46,6 +46,76 @@ def test_import_exstruct_stays_lightweight() -> None: } +def test_import_engine_module_stays_lightweight() -> None: + payload = _run_probe( + """ +import json +import sys +import exstruct.engine +print(json.dumps({ + "engine": "exstruct.engine" in sys.modules, + "core_cells": "exstruct.core.cells" in sys.modules, + "core_integrate": "exstruct.core.integrate" in sys.modules, + "io": "exstruct.io" in sys.modules, + "render": "exstruct.render" in sys.modules, + "numpy": "numpy" in sys.modules, + "pandas": "pandas" in sys.modules, + "openpyxl": "openpyxl" in sys.modules, + "xlwings": "xlwings" in sys.modules, + "PIL": "PIL" in sys.modules, +})) +""" + ) + + assert payload == { + "engine": True, + "core_cells": False, + "core_integrate": False, + "io": False, + "render": False, + "numpy": False, + "pandas": False, + "openpyxl": False, + "xlwings": False, + "PIL": False, + } + + +def test_import_public_engine_export_stays_lightweight() -> None: + payload = _run_probe( + """ +import json +import sys +from exstruct import ExStructEngine +print(json.dumps({ + "engine": "exstruct.engine" in sys.modules, + "core_cells": "exstruct.core.cells" in sys.modules, + "core_integrate": "exstruct.core.integrate" in sys.modules, + "io": "exstruct.io" in sys.modules, + "render": "exstruct.render" in sys.modules, + "numpy": "numpy" in sys.modules, + "pandas": "pandas" in sys.modules, + "openpyxl": "openpyxl" in sys.modules, + "xlwings": "xlwings" in sys.modules, + "PIL": "PIL" in sys.modules, +})) +""" + ) + + assert payload == { + "engine": True, + "core_cells": False, + "core_integrate": False, + "io": False, + "render": False, + "numpy": False, + "pandas": False, + "openpyxl": False, + "xlwings": False, + "PIL": False, + } + + def test_import_cli_main_does_not_load_edit_or_extraction_modules() -> None: payload = _run_probe( """