diff --git a/dev-docs/architecture/overview.md b/dev-docs/architecture/overview.md index 4e202da..5d20b00 100644 --- a/dev-docs/architecture/overview.md +++ b/dev-docs/architecture/overview.md @@ -98,6 +98,10 @@ CLI entry point `validate` - `edit.py` contains the Phase 2 editing parser, JSON serialization helpers, and wrappers around `exstruct.edit` +- `exstruct.__init__`, `exstruct.edit.__init__`, and lightweight CLI startup + paths must remain side-effect-free: `--help` and `ops` routing should defer + heavy extraction/edit implementation imports until command execution needs + them ### edit/ diff --git a/src/exstruct/__init__.py b/src/exstruct/__init__.py index 3972b5b..8672fc9 100644 --- a/src/exstruct/__init__.py +++ b/src/exstruct/__init__.py @@ -2,53 +2,47 @@ from __future__ import annotations +from collections.abc import Callable import logging from pathlib import Path -from typing import Literal, TextIO - -from .core.cells import set_table_detection_params -from .core.integrate import extract_workbook -from .engine import ( - ColorsOptions, - DestinationOptions, - ExStructEngine, - FilterOptions, - FormatOptions, - OutputOptions, - StructOptions, -) -from .errors import ( - ConfigError, - ExstructError, - MissingDependencyError, - PrintAreaError, - RenderError, - SerializationError, -) -from .io import ( - save_as_json, - save_as_toon, - save_as_yaml, - save_auto_page_break_views, - save_print_area_views, - save_sheets, - serialize_workbook, -) -from .models import ( - CellRow, - Chart, - ChartSeries, - PrintArea, - PrintAreaView, - Shape, - SheetData, - WorkbookData, - col_index_to_alpha, - convert_row_keys_to_alpha, - convert_sheet_keys_to_alpha, - convert_workbook_keys_to_alpha, -) -from .render import export_pdf, export_sheet_images +from typing import TYPE_CHECKING, Literal, TextIO + +if TYPE_CHECKING: + from .core.cells import set_table_detection_params + from .core.integrate import extract_workbook + from .engine import ( + ColorsOptions, + DestinationOptions, + ExStructEngine, + FilterOptions, + FormatOptions, + OutputOptions, + StructOptions, + ) + from .errors import ( + ConfigError, + ExstructError, + MissingDependencyError, + PrintAreaError, + RenderError, + SerializationError, + ) + from .io import serialize_workbook + from .models import ( + CellRow, + Chart, + ChartSeries, + PrintArea, + PrintAreaView, + Shape, + SheetData, + WorkbookData, + col_index_to_alpha, + convert_row_keys_to_alpha, + convert_sheet_keys_to_alpha, + convert_workbook_keys_to_alpha, + ) + from .render import export_pdf, export_sheet_images logger = logging.getLogger(__name__) @@ -97,6 +91,110 @@ ExtractionMode = Literal["light", "libreoffice", "standard", "verbose"] +LazyExportLoader = Callable[[], object] + + +def _load_engine_attr(name: str) -> object: + from . import engine as engine_module + + return getattr(engine_module, name) + + +def _load_error_attr(name: str) -> object: + from . import errors as errors_module + + return getattr(errors_module, name) + + +def _load_model_attr(name: str) -> object: + from . import models as models_module + + return getattr(models_module, name) + + +def _load_render_attr(name: str) -> object: + from . import render as render_module + + return getattr(render_module, name) + + +def _load_io_attr(name: str) -> object: + from . import io as io_module + + return getattr(io_module, name) + + +def _load_core_cells_attr(name: str) -> object: + from .core import cells as cells_module + + return getattr(cells_module, name) + + +def _load_core_integrate_attr(name: str) -> object: + from .core import integrate as integrate_module + + return getattr(integrate_module, name) + + +_LAZY_EXPORTS: dict[str, LazyExportLoader] = { + "ColorsOptions": lambda: _load_engine_attr("ColorsOptions"), + "ConfigError": lambda: _load_error_attr("ConfigError"), + "DestinationOptions": lambda: _load_engine_attr("DestinationOptions"), + "ExStructEngine": lambda: _load_engine_attr("ExStructEngine"), + "ExstructError": lambda: _load_error_attr("ExstructError"), + "FilterOptions": lambda: _load_engine_attr("FilterOptions"), + "FormatOptions": lambda: _load_engine_attr("FormatOptions"), + "MissingDependencyError": lambda: _load_error_attr("MissingDependencyError"), + "OutputOptions": lambda: _load_engine_attr("OutputOptions"), + "PrintArea": lambda: _load_model_attr("PrintArea"), + "PrintAreaError": lambda: _load_error_attr("PrintAreaError"), + "PrintAreaView": lambda: _load_model_attr("PrintAreaView"), + "RenderError": lambda: _load_error_attr("RenderError"), + "SerializationError": lambda: _load_error_attr("SerializationError"), + "StructOptions": lambda: _load_engine_attr("StructOptions"), + "WorkbookData": lambda: _load_model_attr("WorkbookData"), + "CellRow": lambda: _load_model_attr("CellRow"), + "Chart": lambda: _load_model_attr("Chart"), + "ChartSeries": lambda: _load_model_attr("ChartSeries"), + "Shape": lambda: _load_model_attr("Shape"), + "SheetData": lambda: _load_model_attr("SheetData"), + "col_index_to_alpha": lambda: _load_model_attr("col_index_to_alpha"), + "convert_row_keys_to_alpha": lambda: _load_model_attr("convert_row_keys_to_alpha"), + "convert_sheet_keys_to_alpha": lambda: _load_model_attr( + "convert_sheet_keys_to_alpha" + ), + "convert_workbook_keys_to_alpha": lambda: _load_model_attr( + "convert_workbook_keys_to_alpha" + ), + "export_pdf": lambda: _load_render_attr("export_pdf"), + "export_sheet_images": lambda: _load_render_attr("export_sheet_images"), + "extract_workbook": lambda: _load_core_integrate_attr("extract_workbook"), + "serialize_workbook": lambda: _load_io_attr("serialize_workbook"), + "set_table_detection_params": lambda: _load_core_cells_attr( + "set_table_detection_params" + ), +} + + +def _resolve_lazy_export(name: str) -> object: + value = _LAZY_EXPORTS[name]() + globals()[name] = value + return value + + +def _lazy_type(name: str) -> object: + return _resolve_lazy_export(name) + + +def __getattr__(name: str) -> object: + if name not in _LAZY_EXPORTS: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + return _resolve_lazy_export(name) + + +def __dir__() -> list[str]: + return sorted(set(globals()) | set(__all__)) + def extract( file_path: str | Path, mode: ExtractionMode = "standard", *, alpha_col: bool = False @@ -112,6 +210,8 @@ def extract( Returns: WorkbookData: Parsed workbook representation containing sheets, rows, shapes, charts, and print areas. """ + from .engine import ExStructEngine, StructOptions + include_links = True if mode == "verbose" else False include_colors_map = True if mode == "verbose" else None include_formulas_map = True if mode == "verbose" else None @@ -157,6 +257,8 @@ def export( >>> export(wb, "out.json", pretty=True) >>> export(wb, "out.yaml", fmt="yaml") # doctest: +SKIP """ + from .io import save_as_json, save_as_toon, save_as_yaml + dest = Path(path) format_hint = (fmt or dest.suffix.lstrip(".") or "json").lower() match format_hint: @@ -202,6 +304,8 @@ def export_sheets( >>> "Sheet1" in paths True """ + from .io import save_sheets + return save_sheets( data, Path(dir_path), @@ -242,6 +346,8 @@ def export_sheets_as( >>> wb = extract("input.xlsx") >>> _ = export_sheets_as(wb, "out_yaml", fmt="yaml") # doctest: +SKIP """ + from .io import save_sheets + return save_sheets( data, Path(dir_path), @@ -285,6 +391,8 @@ def export_print_areas_as( >>> isinstance(paths, dict) True """ + from .io import save_print_area_views + return save_print_area_views( data, Path(dir_path), @@ -331,6 +439,9 @@ def export_auto_page_breaks( ... except PrintAreaError: ... pass """ + from .errors import PrintAreaError + from .io import save_auto_page_break_views + if not any(sheet.auto_print_areas for sheet in data.sheets.values()): message = "No auto page-break areas found. Enable COM-based auto page breaks before exporting." logger.warning(message) @@ -407,6 +518,15 @@ def process_excel( >>> process_excel(Path("input.xlsx"), output_path=Path("out.json"), pdf=True) # doctest: +SKIP """ + from .engine import ( + DestinationOptions, + ExStructEngine, + FilterOptions, + FormatOptions, + OutputOptions, + StructOptions, + ) + engine = ExStructEngine( options=StructOptions(mode=mode, alpha_col=alpha_col), output=OutputOptions( @@ -440,3 +560,19 @@ def process_excel( auto_page_breaks_dir=auto_page_breaks_dir, stream=stream, ) + + +def _patch_runtime_annotations() -> None: + annotations_map: dict[Callable[..., object], dict[str, str]] = { + extract: {"return": "_lazy_type('WorkbookData')"}, + export: {"data": "_lazy_type('WorkbookData')"}, + export_sheets: {"data": "_lazy_type('WorkbookData')"}, + export_sheets_as: {"data": "_lazy_type('WorkbookData')"}, + export_print_areas_as: {"data": "_lazy_type('WorkbookData')"}, + export_auto_page_breaks: {"data": "_lazy_type('WorkbookData')"}, + } + for function, function_annotations in annotations_map.items(): + function.__annotations__.update(function_annotations) + + +_patch_runtime_annotations() diff --git a/src/exstruct/cli/edit.py b/src/exstruct/cli/edit.py index a98f54c..9d60f6c 100644 --- a/src/exstruct/cli/edit.py +++ b/src/exstruct/cli/edit.py @@ -3,26 +3,12 @@ from __future__ import annotations import argparse +from collections.abc import Callable +from functools import lru_cache import json from pathlib import Path import sys -from typing import Any, get_args - -from pydantic import BaseModel, ValidationError - -from exstruct.edit import ( - MakeRequest, - OnConflictPolicy, - PatchBackend, - PatchOp, - PatchRequest, - get_patch_op_schema, - list_patch_op_schemas, - make_workbook, - patch_workbook, - resolve_top_level_sheet_for_payload, -) -from exstruct.mcp.validate_input import ValidateInputRequest, validate_input +from typing import Any, cast, get_args _EDIT_SUBCOMMANDS = frozenset({"patch", "make", "ops", "validate"}) _EXPLICIT_EDIT_TOKENS: dict[str, frozenset[str]] = { @@ -60,6 +46,120 @@ } +def _load_patch_workbook_impl() -> Callable[[object], object]: + from exstruct.edit.api import patch_workbook as patch_workbook_impl + + return cast(Callable[[object], object], patch_workbook_impl) + + +def _load_make_workbook_impl() -> Callable[[object], object]: + from exstruct.edit.api import make_workbook as make_workbook_impl + + return cast(Callable[[object], object], make_workbook_impl) + + +def _load_resolve_top_level_sheet_for_payload_impl() -> Callable[[object], object]: + from exstruct.edit.normalize import ( + resolve_top_level_sheet_for_payload as resolve_top_level_sheet_for_payload_impl, + ) + + return cast(Callable[[object], object], resolve_top_level_sheet_for_payload_impl) + + +def _load_validate_input_impl() -> Callable[[object], object]: + from exstruct.mcp.validate_input import validate_input as validate_input_impl + + return cast(Callable[[object], object], validate_input_impl) + + +def _load_on_conflict_policy_literal() -> object: + from exstruct.edit.types import OnConflictPolicy + + return OnConflictPolicy + + +def _load_patch_backend_literal() -> object: + from exstruct.edit.types import PatchBackend + + return PatchBackend + + +def _load_patch_request_model() -> Callable[..., object]: + from exstruct.edit.models import PatchRequest + + return cast(Callable[..., object], PatchRequest) + + +def _load_make_request_model() -> Callable[..., object]: + from exstruct.edit.models import MakeRequest + + return cast(Callable[..., object], MakeRequest) + + +def _load_patch_op_model() -> Callable[..., object]: + from exstruct.edit.models import PatchOp + + return cast(Callable[..., object], PatchOp) + + +def _load_list_patch_op_schemas() -> Callable[[], list[object]]: + from exstruct.edit.op_schema import list_patch_op_schemas + + return cast(Callable[[], list[object]], list_patch_op_schemas) + + +def _load_get_patch_op_schema() -> Callable[[str], object | None]: + from exstruct.edit.op_schema import get_patch_op_schema + + return cast(Callable[[str], object | None], get_patch_op_schema) + + +def _load_validate_input_request_model() -> Callable[..., object]: + from exstruct.mcp.validate_input import ValidateInputRequest + + return cast(Callable[..., object], ValidateInputRequest) + + +def _is_validation_error(exc: Exception) -> bool: + from pydantic import ValidationError + + return isinstance(exc, ValidationError) + + +def _is_cli_runtime_error(exc: Exception) -> bool: + return isinstance(exc, OSError | RuntimeError | ValueError) or _is_validation_error( + exc + ) + + +def _is_validate_cli_error(exc: Exception) -> bool: + return isinstance(exc, OSError | ValueError) or _is_validation_error(exc) + + +def patch_workbook(request: object) -> object: + """Compatibility wrapper that resolves patch execution lazily.""" + + return _load_patch_workbook_impl()(request) + + +def make_workbook(request: object) -> object: + """Compatibility wrapper that resolves make execution lazily.""" + + return _load_make_workbook_impl()(request) + + +def resolve_top_level_sheet_for_payload(payload: object) -> object: + """Compatibility wrapper that resolves payload normalization lazily.""" + + return _load_resolve_top_level_sheet_for_payload_impl()(payload) + + +def validate_input(request: object) -> object: + """Compatibility wrapper that resolves validate execution lazily.""" + + return _load_validate_input_impl()(request) + + def _literal_choices(literal_type: object) -> tuple[str, ...]: """Return argparse choices derived from one string Literal type.""" @@ -71,8 +171,14 @@ def _literal_choices(literal_type: object) -> tuple[str, ...]: return tuple(choices) -_ON_CONFLICT_CHOICES = _literal_choices(OnConflictPolicy) -_BACKEND_CHOICES = _literal_choices(PatchBackend) +@lru_cache(maxsize=1) +def _on_conflict_choices() -> tuple[str, ...]: + return _literal_choices(_load_on_conflict_policy_literal()) + + +@lru_cache(maxsize=1) +def _backend_choices() -> tuple[str, ...]: + return _literal_choices(_load_patch_backend_literal()) def is_edit_subcommand(argv: list[str]) -> bool: @@ -224,13 +330,13 @@ def _add_patch_like_arguments( parser.add_argument("--sheet", help="Top-level sheet fallback for patch ops.") parser.add_argument( "--on-conflict", - choices=_ON_CONFLICT_CHOICES, + choices=_on_conflict_choices(), default="overwrite", help="Conflict policy for output workbook paths.", ) parser.add_argument( "--backend", - choices=_BACKEND_CHOICES, + choices=_backend_choices(), default="auto", help="Patch backend selection policy.", ) @@ -280,9 +386,11 @@ def _run_patch_command(args: argparse.Namespace) -> int: if args.output is not None: request_kwargs["out_dir"] = args.output.parent request_kwargs["out_name"] = args.output.name - request = PatchRequest(**request_kwargs) - result = patch_workbook(request) - except (OSError, RuntimeError, ValidationError, ValueError) as exc: + request = _load_patch_request_model()(**request_kwargs) + result = cast(Any, patch_workbook(request)) + except Exception as exc: + if not _is_cli_runtime_error(exc): + raise _print_error(exc) return 1 @@ -295,7 +403,7 @@ def _run_make_command(args: argparse.Namespace) -> int: try: ops = _load_patch_ops(args.ops, sheet=args.sheet) - request = MakeRequest( + request = _load_make_request_model()( out_path=args.output, ops=ops, sheet=args.sheet, @@ -306,8 +414,10 @@ def _run_make_command(args: argparse.Namespace) -> int: preflight_formula_check=args.preflight_formula_check, backend=args.backend, ) - result = make_workbook(request) - except (OSError, RuntimeError, ValidationError, ValueError) as exc: + result = cast(Any, make_workbook(request)) + except Exception as exc: + if not _is_cli_runtime_error(exc): + raise _print_error(exc) return 1 @@ -318,10 +428,10 @@ def _run_make_command(args: argparse.Namespace) -> int: def _run_ops_list_command(args: argparse.Namespace) -> int: """Execute the ops list subcommand.""" + schemas = cast(Any, _load_list_patch_op_schemas()()) payload = { "ops": [ - {"op": schema.op, "description": schema.description} - for schema in list_patch_op_schemas() + {"op": schema.op, "description": schema.description} for schema in schemas ] } _print_json_payload(payload, pretty=args.pretty) @@ -331,7 +441,7 @@ def _run_ops_list_command(args: argparse.Namespace) -> int: def _run_ops_describe_command(args: argparse.Namespace) -> int: """Execute the ops describe subcommand.""" - schema = get_patch_op_schema(args.op) + schema = cast(Any, _load_get_patch_op_schema()(args.op)) if schema is None: _print_error(ValueError(f"Unknown patch operation: {args.op}")) return 1 @@ -343,8 +453,13 @@ def _run_validate_command(args: argparse.Namespace) -> int: """Execute the validate subcommand.""" try: - result = validate_input(ValidateInputRequest(xlsx_path=args.input)) - except (OSError, ValidationError, ValueError) as exc: + result = cast( + Any, + validate_input(_load_validate_input_request_model()(xlsx_path=args.input)), + ) + except Exception as exc: + if not _is_validate_cli_error(exc): + raise _print_error(exc) return 1 @@ -352,7 +467,7 @@ def _run_validate_command(args: argparse.Namespace) -> int: return 0 if result.is_readable else 1 -def _load_patch_ops(source: str | None, *, sheet: str | None = None) -> list[PatchOp]: +def _load_patch_ops(source: str | None, *, sheet: str | None = None) -> list[Any]: """Load patch ops from a JSON file or stdin.""" if source is None: @@ -368,7 +483,8 @@ def _load_patch_ops(source: str | None, *, sheet: str | None = None) -> list[Pat resolved_ops = resolved_payload.get("ops") if not isinstance(resolved_ops, list): raise ValueError("Resolved patch ops payload must contain an ops list.") - return [PatchOp(**op_payload) for op_payload in resolved_ops] + patch_op_model = _load_patch_op_model() + return [patch_op_model(**op_payload) for op_payload in resolved_ops] def _load_json_value(source: str) -> object: @@ -388,10 +504,8 @@ def _print_json_payload(payload: object, *, pretty: bool) -> None: """Serialize one JSON payload to stdout.""" serializable: object - if isinstance(payload, BaseModel): - serializable = payload.model_dump(mode="json") - else: - serializable = payload + model_dump = getattr(payload, "model_dump", None) + serializable = model_dump(mode="json") if callable(model_dump) else payload print( json.dumps( serializable, diff --git a/src/exstruct/cli/main.py b/src/exstruct/cli/main.py index 5d9a25f..5fd7062 100644 --- a/src/exstruct/cli/main.py +++ b/src/exstruct/cli/main.py @@ -3,13 +3,77 @@ from __future__ import annotations import argparse +from collections.abc import Callable +from importlib import import_module from pathlib import Path import sys +from typing import TYPE_CHECKING, cast -from exstruct import process_excel -from exstruct.cli.availability import get_com_availability -from exstruct.cli.edit import is_edit_subcommand, run_edit_cli -from exstruct.constraints import validate_libreoffice_process_request +if TYPE_CHECKING: + from exstruct.cli.availability import ComAvailability + +ProcessExcelFn = Callable[..., None] +EditPredicateFn = Callable[[list[str]], bool] +RunEditCliFn = Callable[[list[str]], int] +ComAvailabilityFn = Callable[[], "ComAvailability"] +LibreOfficeValidatorFn = Callable[..., Path] +_EDIT_SUBCOMMAND_NAMES = frozenset({"patch", "make", "ops", "validate"}) + + +def _load_process_excel() -> ProcessExcelFn: + module = import_module("exstruct") + return cast(ProcessExcelFn, module.process_excel) + + +def _load_is_edit_subcommand() -> EditPredicateFn: + module = import_module("exstruct.cli.edit") + return cast(EditPredicateFn, module.is_edit_subcommand) + + +def _load_run_edit_cli() -> RunEditCliFn: + module = import_module("exstruct.cli.edit") + return cast(RunEditCliFn, module.run_edit_cli) + + +def _load_get_com_availability() -> ComAvailabilityFn: + module = import_module("exstruct.cli.availability") + return cast(ComAvailabilityFn, module.get_com_availability) + + +def _load_libreoffice_validator() -> LibreOfficeValidatorFn: + module = import_module("exstruct.constraints") + return cast( + LibreOfficeValidatorFn, + module.validate_libreoffice_process_request, + ) + + +def process_excel(*args: object, **kwargs: object) -> None: + """Compatibility wrapper that resolves `exstruct.process_excel` lazily.""" + + _load_process_excel()(*args, **kwargs) + + +def is_edit_subcommand(argv: list[str]) -> bool: + """Compatibility wrapper that resolves the edit router lazily.""" + + if not argv: + return False + if argv[0] not in _EDIT_SUBCOMMAND_NAMES: + return False + return _load_is_edit_subcommand()(argv) + + +def run_edit_cli(argv: list[str]) -> int: + """Compatibility wrapper that resolves the edit CLI lazily.""" + + return _load_run_edit_cli()(argv) + + +def get_com_availability() -> ComAvailability: + """Compatibility wrapper that resolves COM probing lazily.""" + + return _load_get_com_availability()() def _ensure_utf8_stdout() -> None: @@ -147,7 +211,7 @@ def _validate_auto_page_breaks_request(args: argparse.Namespace) -> None: "with Excel COM." ) if args.mode == "libreoffice": - validate_libreoffice_process_request( + _load_libreoffice_validator()( args.input, mode=args.mode, include_auto_page_breaks=True, @@ -206,8 +270,8 @@ def main(argv: list[str] | None = None) -> int: include_backend_metadata=args.include_backend_metadata, ) return 0 - except Exception as e: - print(f"Error: {e}", flush=True) + except Exception as exc: + print(f"Error: {exc}", flush=True) return 1 diff --git a/src/exstruct/edit/__init__.py b/src/exstruct/edit/__init__.py index 836d656..f26b5bc 100644 --- a/src/exstruct/edit/__init__.py +++ b/src/exstruct/edit/__init__.py @@ -2,70 +2,74 @@ from __future__ import annotations -from .api import make_workbook, patch_workbook -from .chart_types import ( - CHART_TYPE_ALIASES, - CHART_TYPE_TO_COM_ID, - SUPPORTED_CHART_TYPES, - SUPPORTED_CHART_TYPES_CSV, - SUPPORTED_CHART_TYPES_SET, - normalize_chart_type, - resolve_chart_type_id, -) -from .errors import PatchOpError -from .models import ( - AlignmentSnapshot, - BorderSideSnapshot, - BorderSnapshot, - ColumnDimensionSnapshot, - DesignSnapshot, - FillSnapshot, - FontSnapshot, - FormulaIssue, - MakeRequest, - MergeStateSnapshot, - OpenpyxlEngineResult, - OpenpyxlWorksheetProtocol, - PatchDiffItem, - PatchErrorDetail, - PatchOp, - PatchRequest, - PatchResult, - PatchValue, - RowDimensionSnapshot, - XlwingsRangeProtocol, -) -from .normalize import ( - alias_to_canonical_with_conflict_check, - build_missing_sheet_message, - build_patch_op_error_message, - coerce_patch_ops, - normalize_draw_grid_border_range, - normalize_patch_op_aliases, - normalize_top_level_sheet, - parse_patch_op_json, - resolve_top_level_sheet_for_payload, -) -from .op_schema import ( - PatchOpSchema, - build_patch_tool_mini_schema, - get_patch_op_schema, - list_patch_op_schemas, - schema_with_sheet_resolution_rules, -) -from .specs import PATCH_OP_SPECS, PatchOpSpec, get_alias_map_for_op -from .types import ( - FormulaIssueCode, - FormulaIssueLevel, - HorizontalAlignType, - OnConflictPolicy, - PatchBackend, - PatchEngine, - PatchOpType, - PatchStatus, - PatchValueKind, - VerticalAlignType, -) +from collections.abc import Callable +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .api import make_workbook, patch_workbook + from .chart_types import ( + CHART_TYPE_ALIASES, + CHART_TYPE_TO_COM_ID, + SUPPORTED_CHART_TYPES, + SUPPORTED_CHART_TYPES_CSV, + SUPPORTED_CHART_TYPES_SET, + normalize_chart_type, + resolve_chart_type_id, + ) + from .errors import PatchOpError + from .models import ( + AlignmentSnapshot, + BorderSideSnapshot, + BorderSnapshot, + ColumnDimensionSnapshot, + DesignSnapshot, + FillSnapshot, + FontSnapshot, + FormulaIssue, + MakeRequest, + MergeStateSnapshot, + OpenpyxlEngineResult, + OpenpyxlWorksheetProtocol, + PatchDiffItem, + PatchErrorDetail, + PatchOp, + PatchRequest, + PatchResult, + PatchValue, + RowDimensionSnapshot, + XlwingsRangeProtocol, + ) + from .normalize import ( + alias_to_canonical_with_conflict_check, + build_missing_sheet_message, + build_patch_op_error_message, + coerce_patch_ops, + normalize_draw_grid_border_range, + normalize_patch_op_aliases, + normalize_top_level_sheet, + parse_patch_op_json, + resolve_top_level_sheet_for_payload, + ) + from .op_schema import ( + PatchOpSchema, + build_patch_tool_mini_schema, + get_patch_op_schema, + list_patch_op_schemas, + schema_with_sheet_resolution_rules, + ) + from .specs import PATCH_OP_SPECS, PatchOpSpec, get_alias_map_for_op + from .types import ( + FormulaIssueCode, + FormulaIssueLevel, + HorizontalAlignType, + OnConflictPolicy, + PatchBackend, + PatchEngine, + PatchOpType, + PatchStatus, + PatchValueKind, + VerticalAlignType, + ) __all__ = [ "AlignmentSnapshot", @@ -126,3 +130,152 @@ "resolve_top_level_sheet_for_payload", "schema_with_sheet_resolution_rules", ] + +LazyExportLoader = Callable[[], object] + + +def _load_api_attr(name: str) -> object: + from . import api as api_module + + return getattr(api_module, name) + + +def _load_chart_type_attr(name: str) -> object: + from . import chart_types as chart_types_module + + return getattr(chart_types_module, name) + + +def _load_error_attr(name: str) -> object: + from . import errors as errors_module + + return getattr(errors_module, name) + + +def _load_model_attr(name: str) -> object: + from . import models as models_module + + return getattr(models_module, name) + + +def _load_normalize_attr(name: str) -> object: + from . import normalize as normalize_module + + return getattr(normalize_module, name) + + +def _load_op_schema_attr(name: str) -> object: + from . import op_schema as op_schema_module + + return getattr(op_schema_module, name) + + +def _load_specs_attr(name: str) -> object: + from . import specs as specs_module + + return getattr(specs_module, name) + + +def _load_type_attr(name: str) -> object: + from . import types as types_module + + return getattr(types_module, name) + + +_LAZY_EXPORTS: dict[str, LazyExportLoader] = { + "AlignmentSnapshot": lambda: _load_model_attr("AlignmentSnapshot"), + "BorderSideSnapshot": lambda: _load_model_attr("BorderSideSnapshot"), + "BorderSnapshot": lambda: _load_model_attr("BorderSnapshot"), + "CHART_TYPE_ALIASES": lambda: _load_chart_type_attr("CHART_TYPE_ALIASES"), + "CHART_TYPE_TO_COM_ID": lambda: _load_chart_type_attr("CHART_TYPE_TO_COM_ID"), + "ColumnDimensionSnapshot": lambda: _load_model_attr("ColumnDimensionSnapshot"), + "DesignSnapshot": lambda: _load_model_attr("DesignSnapshot"), + "FillSnapshot": lambda: _load_model_attr("FillSnapshot"), + "FontSnapshot": lambda: _load_model_attr("FontSnapshot"), + "FormulaIssue": lambda: _load_model_attr("FormulaIssue"), + "FormulaIssueCode": lambda: _load_type_attr("FormulaIssueCode"), + "FormulaIssueLevel": lambda: _load_type_attr("FormulaIssueLevel"), + "HorizontalAlignType": lambda: _load_type_attr("HorizontalAlignType"), + "MakeRequest": lambda: _load_model_attr("MakeRequest"), + "MergeStateSnapshot": lambda: _load_model_attr("MergeStateSnapshot"), + "OnConflictPolicy": lambda: _load_type_attr("OnConflictPolicy"), + "OpenpyxlEngineResult": lambda: _load_model_attr("OpenpyxlEngineResult"), + "OpenpyxlWorksheetProtocol": lambda: _load_model_attr("OpenpyxlWorksheetProtocol"), + "PATCH_OP_SPECS": lambda: _load_specs_attr("PATCH_OP_SPECS"), + "PatchBackend": lambda: _load_type_attr("PatchBackend"), + "PatchDiffItem": lambda: _load_model_attr("PatchDiffItem"), + "PatchEngine": lambda: _load_type_attr("PatchEngine"), + "PatchErrorDetail": lambda: _load_model_attr("PatchErrorDetail"), + "PatchOp": lambda: _load_model_attr("PatchOp"), + "PatchOpError": lambda: _load_error_attr("PatchOpError"), + "PatchOpSchema": lambda: _load_op_schema_attr("PatchOpSchema"), + "PatchOpSpec": lambda: _load_specs_attr("PatchOpSpec"), + "PatchOpType": lambda: _load_type_attr("PatchOpType"), + "PatchRequest": lambda: _load_model_attr("PatchRequest"), + "PatchResult": lambda: _load_model_attr("PatchResult"), + "PatchStatus": lambda: _load_type_attr("PatchStatus"), + "PatchValue": lambda: _load_model_attr("PatchValue"), + "PatchValueKind": lambda: _load_type_attr("PatchValueKind"), + "RowDimensionSnapshot": lambda: _load_model_attr("RowDimensionSnapshot"), + "SUPPORTED_CHART_TYPES": lambda: _load_chart_type_attr("SUPPORTED_CHART_TYPES"), + "SUPPORTED_CHART_TYPES_CSV": lambda: _load_chart_type_attr( + "SUPPORTED_CHART_TYPES_CSV" + ), + "SUPPORTED_CHART_TYPES_SET": lambda: _load_chart_type_attr( + "SUPPORTED_CHART_TYPES_SET" + ), + "VerticalAlignType": lambda: _load_type_attr("VerticalAlignType"), + "XlwingsRangeProtocol": lambda: _load_model_attr("XlwingsRangeProtocol"), + "alias_to_canonical_with_conflict_check": lambda: _load_normalize_attr( + "alias_to_canonical_with_conflict_check" + ), + "build_missing_sheet_message": lambda: _load_normalize_attr( + "build_missing_sheet_message" + ), + "build_patch_op_error_message": lambda: _load_normalize_attr( + "build_patch_op_error_message" + ), + "build_patch_tool_mini_schema": lambda: _load_op_schema_attr( + "build_patch_tool_mini_schema" + ), + "coerce_patch_ops": lambda: _load_normalize_attr("coerce_patch_ops"), + "get_alias_map_for_op": lambda: _load_specs_attr("get_alias_map_for_op"), + "get_patch_op_schema": lambda: _load_op_schema_attr("get_patch_op_schema"), + "list_patch_op_schemas": lambda: _load_op_schema_attr("list_patch_op_schemas"), + "make_workbook": lambda: _load_api_attr("make_workbook"), + "normalize_chart_type": lambda: _load_chart_type_attr("normalize_chart_type"), + "normalize_draw_grid_border_range": lambda: _load_normalize_attr( + "normalize_draw_grid_border_range" + ), + "normalize_patch_op_aliases": lambda: _load_normalize_attr( + "normalize_patch_op_aliases" + ), + "normalize_top_level_sheet": lambda: _load_normalize_attr( + "normalize_top_level_sheet" + ), + "parse_patch_op_json": lambda: _load_normalize_attr("parse_patch_op_json"), + "patch_workbook": lambda: _load_api_attr("patch_workbook"), + "resolve_chart_type_id": lambda: _load_chart_type_attr("resolve_chart_type_id"), + "resolve_top_level_sheet_for_payload": lambda: _load_normalize_attr( + "resolve_top_level_sheet_for_payload" + ), + "schema_with_sheet_resolution_rules": lambda: _load_op_schema_attr( + "schema_with_sheet_resolution_rules" + ), +} + + +def _resolve_lazy_export(name: str) -> object: + value = _LAZY_EXPORTS[name]() + globals()[name] = value + return value + + +def __getattr__(name: str) -> object: + if name not in _LAZY_EXPORTS: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + return _resolve_lazy_export(name) + + +def __dir__() -> list[str]: + return sorted(set(globals()) | set(__all__)) diff --git a/tasks/feature_spec.md b/tasks/feature_spec.md index f4bb9dc..306b375 100644 --- a/tasks/feature_spec.md +++ b/tasks/feature_spec.md @@ -167,3 +167,138 @@ - `not-needed` - rationale: this is a wording-only follow-up under the existing `ADR-0008` decision. + +## 2026-03-20 issue #108 CLI startup lazy import optimization + +### Goal + +- Reduce startup import cost for lightweight CLI paths such as `exstruct --help` and `exstruct ops list`. +- Keep the existing extraction and editing CLI contracts unchanged while delaying heavy implementation imports until routing is known. +- Preserve the current module-level monkeypatch surfaces used by the CLI test suite. + +### Public contract + +- `exstruct --help` and parser construction keep the current CLI syntax, help text, and exit behavior. +- `exstruct ops list` and `exstruct ops describe` keep their current output shape and exit behavior. +- Extraction invocations still call `process_excel(...)` with the same arguments and keep current file-not-found and auto page-break validation behavior. +- Public Python symbol names exported from `exstruct` and `exstruct.edit` remain unchanged; only their import timing changes. + +### Internal implementation guarantees + +- `src/exstruct/__init__.py` must not eagerly import extraction engine, IO, render, or model modules during package import; convenience functions may import those dependencies inside function bodies. +- `src/exstruct/edit/__init__.py` must not eagerly import editing service/runtime/model modules during package import; exported names should resolve lazily. +- `src/exstruct/cli/main.py` must route edit vs extraction commands before importing edit/extraction implementations. +- `src/exstruct/cli/edit.py` must not import `exstruct.mcp.validate_input` or editing execution helpers at module import time; command handlers load only the functionality they need. +- Existing CLI module patch points (`process_excel`, `get_com_availability`, `is_edit_subcommand`, `run_edit_cli`, `patch_workbook`, `make_workbook`, `resolve_top_level_sheet_for_payload`, `validate_input`) remain present as thin wrappers. + +### Scope and non-goals + +- In scope: + - `src/exstruct/__init__.py` + - `src/exstruct/edit/__init__.py` + - `src/exstruct/cli/main.py` + - `src/exstruct/cli/edit.py` + - targeted tests and one architecture note +- Out of scope: + - changing CLI syntax, help wording, or JSON contracts + - changing backend selection policy + - optimizing `exstruct validate` startup beyond removing it from the `ops` path + - refactoring `src/exstruct/mcp/__init__.py` unless required by failing tests on the `ops` path + +### Permanent destinations + +- `dev-docs/architecture/overview.md` + - Records that package `__init__` files and lightweight CLI startup paths must remain side-effect-free and defer heavy imports. +- `tasks/feature_spec.md` and `tasks/todo.md` + - Keep the temporary implementation/verification record for this issue. +- `dev-docs/adr/` + - No new ADR is planned; this issue changes import timing only and does not alter the public contract or policy. + +### Verification plan + +- `tests/cli/test_cli.py` + - help and extraction routing still behave the same + - lightweight startup paths do not eagerly load edit/extraction implementation modules +- `tests/cli/test_edit_cli.py` + - `ops list` / `ops describe` do not depend on extraction import paths + - existing monkeypatch-based tests still pass +- `tests/edit/test_architecture.py` or a focused startup test module + - `import exstruct` does not eagerly load extraction engine modules + - `import exstruct.cli.edit` does not eagerly load `exstruct.mcp` / `exstruct.mcp.extract_runner` +- `uv run pytest tests/cli/test_cli.py tests/cli/test_edit_cli.py tests/edit/test_architecture.py -q` +- `uv run task precommit-run` +- manual importtime sanity checks for `--help` and `ops list` + +### ADR verdict + +- `not-needed` +- rationale: this is a startup-focused internal refactor that preserves existing CLI/API contracts and backend policy. The durable guidance belongs in architecture notes rather than a new policy ADR. + +## 2026-03-20 issue #108 review and Codacy follow-up + +### Goal + +- Resolve the 3 Codacy `non-literal-import` findings on PR `#112` without regressing the lazy-import startup work. +- Address the substantive PR review comments about runtime annotation introspection and unnecessary eager imports on lightweight CLI paths. +- Keep the public CLI and Python export surface unchanged while tightening the internal implementation. + +### Public contract + +- `typing.get_type_hints(exstruct.extract)` and the other public convenience helpers in `src/exstruct/__init__.py` must keep resolving runtime-visible exported model types after the lazy-import refactor. +- `exstruct --help` and extraction-style argv that are clearly not edit subcommands must not import `exstruct.cli.edit`. +- Importing `exstruct.cli.edit` for routing/help-only purposes must not eagerly import `pydantic`. +- Public exports from `exstruct` and `exstruct.edit` remain unchanged; only the internal lazy-loader structure changes to satisfy static analysis. + +### Constraints + +- Do not undo the startup optimization by eagerly importing `exstruct.models`, `exstruct.edit.models`, or `pydantic` at module import time. +- Replace generic non-literal `import_module()` helpers with explicit literal import paths or literal loader functions so Codacy/Semgrep no longer flags them. +- Keep the existing monkeypatch-compatible wrappers in `src/exstruct/cli/main.py` and `src/exstruct/cli/edit.py`. + +### Verification plan + +- `tests/cli/test_cli_lazy_imports.py` + - `import exstruct.cli.edit` does not eagerly load `pydantic` + - `main(["--help"])` does not import `exstruct.cli.edit` + - `typing.get_type_hints(exstruct.extract)` resolves `WorkbookData` successfully +- `tests/cli/test_edit_cli.py` + - existing edit CLI behavior still passes with the new explicit loaders +- `uv run pytest tests/cli/test_cli_lazy_imports.py tests/cli/test_edit_cli.py tests/cli/test_cli.py -q` +- `uv run task precommit-run` + +### ADR verdict + +- `not-needed` +- rationale: this is a follow-up implementation hardening and static-analysis cleanup under the existing issue `#108` design, not a new policy decision. + +## 2026-03-21 issue #108 review follow-up: validate runtime error scope + +### Goal + +- Restore the original `validate` subcommand exception boundary after the lazy-loader refactor in `src/exstruct/cli/edit.py`. +- Keep the patch/make commands catching `RuntimeError` while ensuring `validate` does not silently absorb it. + +### Public contract + +- `patch` and `make` continue to convert backend/runtime failures in `(OSError, RuntimeError, ValidationError, ValueError)` into `Error: ...` stderr output with exit code `1`. +- `validate` keeps its narrower historical contract and only converts `(OSError, ValidationError, ValueError)` into CLI error output. +- If `validate_input(...)` raises `RuntimeError`, the exception must still propagate rather than being turned into a handled CLI error. + +### Constraints + +- Do not broaden this follow-up into another startup optimization pass. +- Keep the current lazy import boundary for `pydantic` and validation helpers intact. +- Do not change the behavior of `patch` and `make` while narrowing `validate`. + +### Verification plan + +- `tests/cli/test_edit_cli.py` + - `validate` still returns handled CLI errors for `OSError` + - `validate` propagates `RuntimeError` +- `uv run pytest tests/cli/test_edit_cli.py -q` +- `uv run task precommit-run` + +### ADR verdict + +- `not-needed` +- rationale: this is a narrow behavior-restoration follow-up inside the existing edit CLI contract, not a new design decision. diff --git a/tasks/todo.md b/tasks/todo.md index 32ed7d7..a073707 100644 --- a/tasks/todo.md +++ b/tasks/todo.md @@ -118,3 +118,100 @@ - `uv run pytest tests/cli/test_cli.py -q` - `uv run task precommit-run` - `git diff --check` + +## 2026-03-20 issue #108 CLI startup lazy import optimization + +### Planning + +- [x] Confirm issue `#108` details with `gh issue view 108`. +- [x] Inspect current import paths in `src/exstruct/__init__.py`, `src/exstruct/edit/__init__.py`, `src/exstruct/cli/main.py`, and `src/exstruct/cli/edit.py`. +- [x] Classify ADR need for the startup optimization work. +- [x] Add the issue `#108` working spec to `tasks/feature_spec.md`. +- [x] Refactor `src/exstruct/__init__.py` to defer heavy imports while preserving exported symbol names. +- [x] Refactor `src/exstruct/edit/__init__.py` to defer heavy imports while preserving exported symbol names. +- [x] Refactor `src/exstruct/cli/main.py` so edit/extraction implementations load only after routing is known. +- [x] Refactor `src/exstruct/cli/edit.py` so `ops` commands avoid extraction-path imports and handler-specific dependencies load lazily. +- [x] Add regression tests for startup import isolation and existing CLI behavior. +- [x] Update `dev-docs/architecture/overview.md` with the lightweight-startup import rule. +- [x] Run targeted pytest for CLI/startup coverage. +- [x] Run `uv run task precommit-run`. +- [x] Record final verification and retention notes in this Review section. + +### Review + +- `src/exstruct/__init__.py` now keeps the public export surface but resolves heavy extraction/runtime symbols lazily, so importing `exstruct` no longer front-loads extraction engine modules for CLI startup. +- `src/exstruct/edit/__init__.py` now resolves editing exports lazily, which lets CLI code import edit submodules without paying the full patch-service import cost up front. +- `src/exstruct/cli/main.py` now keeps monkeypatch-compatible wrappers for `process_excel`, `get_com_availability`, `is_edit_subcommand`, and `run_edit_cli`, but loads the underlying implementations only after routing demands them. +- `src/exstruct/cli/edit.py` now keeps monkeypatch-compatible wrappers for `patch_workbook`, `make_workbook`, `resolve_top_level_sheet_for_payload`, and `validate_input`, while `ops` commands load only schema metadata and avoid dragging the extraction path into startup. +- `tests/cli/test_cli_lazy_imports.py` now locks the startup boundary with subprocess `sys.modules` probes for `import exstruct`, `import exstruct.cli.main`, `import exstruct.cli.edit`, and `main(["ops", "list"])`. +- `dev-docs/architecture/overview.md` now records the durable rule that package `__init__` files and lightweight CLI startup paths must remain side-effect-free. +- Retention decision: + - No new ADR was added. The change preserves the public contract and only changes import timing, so the durable guidance lives in `dev-docs/architecture/overview.md`. + - The temporary working record for implementation order and verification remains limited to this section in `tasks/feature_spec.md` and `tasks/todo.md`. +- Verification: + - `uv run pytest tests/cli/test_cli.py tests/cli/test_edit_cli.py tests/cli/test_cli_lazy_imports.py tests/edit/test_architecture.py -q` + - `uv run task precommit-run` + - manual `-X importtime` sanity probe for `-m exstruct.cli.main --help` and `-m exstruct.cli.main ops list` + +## 2026-03-20 issue #108 review and Codacy follow-up + +### Planning + +- [x] Retrieve PR `#112` Codacy findings and review comments with `scripts/codacy_issues.py` and `gh`. +- [x] Classify which findings are substantive and confirm the current implementation gaps locally. +- [x] Add the follow-up spec and task record to `tasks/feature_spec.md` and `tasks/todo.md`. +- [x] Replace the generic lazy-import helpers in `src/exstruct/__init__.py`, `src/exstruct/edit/__init__.py`, and `src/exstruct/cli/edit.py` with explicit literal loaders. +- [x] Restore runtime-resolvable type hints for public helpers in `src/exstruct/__init__.py` without eagerly importing `exstruct.models`. +- [x] Add a fast path in `src/exstruct/cli/main.py` so non-edit argv does not import `exstruct.cli.edit`. +- [x] Remove the top-level `pydantic` import from `src/exstruct/cli/edit.py`. +- [x] Add or update regression tests for startup import boundaries and runtime type hints. +- [x] Run targeted pytest for CLI follow-up coverage. +- [x] Run `uv run task precommit-run`. +- [x] Update this Review section with the final verification result and retention decision. + +### Review + +- Codacy's three `non-literal-import` findings were not exploitable security bugs in practice, because the module targets were fixed by code rather than user input. Even so, the finding was operationally valid for CI, so the generic loaders were replaced with explicit literal loader functions in `src/exstruct/__init__.py`, `src/exstruct/edit/__init__.py`, and `src/exstruct/cli/edit.py`. +- The PR review about runtime type hints was valid. `typing.get_type_hints(exstruct.extract)` regressed with `NameError` after the lazy-import refactor, so `src/exstruct/__init__.py` now patches the affected public helper annotations to resolve exported model types through `_lazy_type(...)` only when runtime introspection asks for them. +- The PR review about `cli.main` routing was valid. `src/exstruct/cli/main.py` now fast-fails obvious non-edit argv before importing `exstruct.cli.edit`, so `exstruct --help` and extraction-style argv no longer pay the edit-module import cost. +- The PR review about `pydantic` eager import in `src/exstruct/cli/edit.py` was valid for routing/help-only paths. The module now defers `pydantic` loading until an actual validation-error check happens and serializes JSON payloads via `model_dump` duck typing. +- `tests/cli/test_cli_lazy_imports.py` now locks the new boundaries: `import exstruct.cli.edit` keeps `pydantic` unloaded, `main(["--help"])` keeps `exstruct.cli.edit` unloaded, and `typing.get_type_hints(exstruct.extract)` resolves `WorkbookData` successfully. +- Retention decision: + - No new ADR or permanent spec migration was needed. This follow-up only hardens the existing issue `#108` implementation and review expectations under the already-recorded lightweight-startup rule in `dev-docs/architecture/overview.md`. + - The temporary working notes for this follow-up can remain limited to this section in `tasks/feature_spec.md` and `tasks/todo.md`. +- Verification: + - `python scripts/codacy_issues.py --pr 112 --min-level Error` + - `gh pr view 112 --json number,title,reviewDecision,reviews,comments,files,url,headRefName,baseRefName` + - `gh api repos/harumiWeb/exstruct/pulls/112/comments` + - `uv run pytest tests/cli/test_cli_lazy_imports.py tests/cli/test_edit_cli.py tests/cli/test_cli.py -q` + - `uv run task precommit-run` + - manual `uv run python` probes for `typing.get_type_hints(exstruct.extract)` and `main(["--help"])` import boundaries + +## 2026-03-21 issue #108 review follow-up: validate runtime error scope + +### Planning + +- [x] Retrieve the new PR `#112` review comments and classify which ones are substantively valid. +- [x] Confirm locally whether `isinstance(exc, OSError | RuntimeError | ValueError)` is actually invalid on the supported Python runtime. +- [x] Add the working spec and task record for this follow-up. +- [x] Narrow `validate` exception handling in `src/exstruct/cli/edit.py` back to the original `(OSError, ValidationError, ValueError)` scope. +- [x] Add a regression test that proves `validate` still propagates `RuntimeError`. +- [x] Run targeted pytest for `tests/cli/test_edit_cli.py`. +- [x] Run `uv run task precommit-run`. +- [x] Update this Review section with the final verification result and retention decision. + +### Review + +- The new Devin review finding was valid: the shared `_is_cli_runtime_error(...)` helper widened `_run_validate_command(...)` to catch `RuntimeError`, which changed the historical validate-subcommand contract. +- The new Copilot review finding was not valid on the supported runtime. A direct `uv run python` probe confirmed that `isinstance(OSError(), OSError | RuntimeError | ValueError)` evaluates successfully on Python `3.11`, so no change was made for that comment. +- `src/exstruct/cli/edit.py` now uses a separate `_is_validate_cli_error(...)` helper so `patch` / `make` still catch `RuntimeError` while `validate` only catches `(OSError, ValidationError, ValueError)` as before. +- `tests/cli/test_edit_cli.py` now includes a regression test proving that `validate` propagates `RuntimeError` instead of converting it to handled CLI stderr output. +- Retention decision: + - No new ADR or permanent spec migration was needed. This follow-up only restores the pre-existing validate CLI error boundary inside the current edit CLI design. + - The temporary working notes for this review follow-up can remain limited to this section in `tasks/feature_spec.md` and `tasks/todo.md`. +- Verification: + - `gh api repos/harumiWeb/exstruct/pulls/112/comments` + - `gh api graphql -f query='query { repository(owner:"harumiWeb", name:"exstruct") { pullRequest(number: 112) { reviewThreads(first: 30) { nodes { id isResolved isOutdated comments(first: 20) { nodes { id author { login } body path url createdAt } } } } } } }'` + - `uv run python` probe for `isinstance(OSError(), OSError | RuntimeError | ValueError)` + - `uv run pytest tests/cli/test_edit_cli.py -q` + - `uv run task precommit-run` diff --git a/tests/cli/test_cli_lazy_imports.py b/tests/cli/test_cli_lazy_imports.py new file mode 100644 index 0000000..e305861 --- /dev/null +++ b/tests/cli/test_cli_lazy_imports.py @@ -0,0 +1,195 @@ +from __future__ import annotations + +import json +import subprocess +import sys +from typing import cast + + +def _run_python(code: str) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [sys.executable, "-c", code], + check=False, + capture_output=True, + text=True, + ) + + +def _run_probe(code: str) -> dict[str, object]: + result = _run_python(code) + assert result.returncode == 0, result.stderr or result.stdout + return cast(dict[str, object], json.loads(result.stdout)) + + +def test_import_exstruct_stays_lightweight() -> None: + payload = _run_probe( + """ +import json +import sys +import exstruct +print(json.dumps({ + "core_integrate": "exstruct.core.integrate" in sys.modules, + "engine": "exstruct.engine" in sys.modules, + "pandas": "pandas" in sys.modules, + "openpyxl": "openpyxl" in sys.modules, + "xlwings": "xlwings" in sys.modules, +})) +""" + ) + + assert payload == { + "core_integrate": False, + "engine": False, + "pandas": False, + "openpyxl": False, + "xlwings": False, + } + + +def test_import_cli_main_does_not_load_edit_or_extraction_modules() -> None: + payload = _run_probe( + """ +import json +import sys +import exstruct.cli.main +print(json.dumps({ + "cli_edit": "exstruct.cli.edit" in sys.modules, + "mcp": "exstruct.mcp" in sys.modules, + "core_integrate": "exstruct.core.integrate" in sys.modules, + "engine": "exstruct.engine" in sys.modules, +})) +""" + ) + + assert payload == { + "cli_edit": False, + "mcp": False, + "core_integrate": False, + "engine": False, + } + + +def test_import_cli_edit_does_not_load_mcp_or_edit_execution_modules() -> None: + payload = _run_probe( + """ +import json +import sys +import exstruct.cli.edit +print(json.dumps({ + "pydantic": "pydantic" in sys.modules, + "mcp": "exstruct.mcp" in sys.modules, + "extract_runner": "exstruct.mcp.extract_runner" in sys.modules, + "edit_api": "exstruct.edit.api" in sys.modules, + "edit_service": "exstruct.edit.service" in sys.modules, +})) +""" + ) + + assert payload == { + "pydantic": False, + "mcp": False, + "extract_runner": False, + "edit_api": False, + "edit_service": False, + } + + +def test_public_type_hints_resolve_lazy_exports_at_runtime() -> None: + payload = _run_probe( + """ +import json +import sys +import typing + +import exstruct + +before = { + "models": "exstruct.models" in sys.modules, + "pydantic": "pydantic" in sys.modules, +} +hints = typing.get_type_hints(exstruct.extract) + +print(json.dumps({ + "before": before, + "return_module": hints["return"].__module__, + "return_name": hints["return"].__name__, + "after_models": "exstruct.models" in sys.modules, +})) +""" + ) + + assert payload["before"] == {"models": False, "pydantic": False} + assert payload["return_module"] == "exstruct.models" + assert payload["return_name"] == "WorkbookData" + assert payload["after_models"] is True + + +def test_help_path_keeps_lightweight_import_boundaries() -> None: + payload = _run_probe( + """ +import io +import json +import sys +from contextlib import redirect_stderr, redirect_stdout + +from exstruct.cli.main import main + +stdout_buffer = io.StringIO() +stderr_buffer = io.StringIO() +with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer): + try: + help_exit = main(["--help"]) + except SystemExit as exc: + help_exit = exc.code +help_text = stdout_buffer.getvalue() + +print(json.dumps({ + "help_exit": help_exit, + "help_has_option": "--auto-page-breaks-dir" in help_text, + "cli_edit": "exstruct.cli.edit" in sys.modules, + "mcp": "exstruct.mcp" in sys.modules, + "core_integrate": "exstruct.core.integrate" in sys.modules, +})) +""" + ) + + assert payload["help_exit"] == 0 + assert payload["help_has_option"] is True + assert payload["cli_edit"] is False + assert payload["mcp"] is False + assert payload["core_integrate"] is False + + +def test_ops_list_keeps_mcp_and_extraction_lightweight() -> None: + payload = _run_probe( + """ +import io +import json +import sys +from contextlib import redirect_stderr, redirect_stdout + +from exstruct.cli.main import main + +stdout_buffer = io.StringIO() +stderr_buffer = io.StringIO() +with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer): + ops_exit = main(["ops", "list"]) +ops_payload = json.loads(stdout_buffer.getvalue()) + +print(json.dumps({ + "ops_exit": ops_exit, + "ops_has_set_value": any(item["op"] == "set_value" for item in ops_payload["ops"]), + "cli_edit": "exstruct.cli.edit" in sys.modules, + "mcp": "exstruct.mcp" in sys.modules, + "extract_runner": "exstruct.mcp.extract_runner" in sys.modules, + "core_integrate": "exstruct.core.integrate" in sys.modules, +})) +""" + ) + + assert payload["ops_exit"] == 0 + assert payload["ops_has_set_value"] is True + assert payload["cli_edit"] is True + assert payload["mcp"] is False + assert payload["extract_runner"] is False + assert payload["core_integrate"] is False diff --git a/tests/cli/test_edit_cli.py b/tests/cli/test_edit_cli.py index 1fed5fc..218d6e3 100644 --- a/tests/cli/test_edit_cli.py +++ b/tests/cli/test_edit_cli.py @@ -427,6 +427,21 @@ def _raise_os_error(_request: object) -> object: assert "Error: boom" in result.stderr +def test_validate_cli_propagates_runtime_error( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + path = tmp_path / "input.xlsx" + path.write_bytes(b"x") + + def _raise_runtime_error(_request: object) -> object: + raise RuntimeError("boom") + + monkeypatch.setattr(edit_cli_module, "validate_input", _raise_runtime_error) + + with pytest.raises(RuntimeError, match="boom"): + _run_cli(["validate", "--input", str(path)]) + + def test_extraction_help_mentions_editing_commands() -> None: help_text = build_parser().format_help()