From 83f95aa9a366b9625a89b64109f99b27b8aec1c3 Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Fri, 20 Mar 2026 22:07:58 +0900 Subject: [PATCH 1/3] Optimize CLI startup imports for issue #108 --- dev-docs/architecture/overview.md | 4 + src/exstruct/__init__.py | 156 ++++++++++++++------ src/exstruct/cli/edit.py | 123 ++++++++++++---- src/exstruct/cli/main.py | 73 +++++++++- src/exstruct/edit/__init__.py | 224 ++++++++++++++++++++--------- tasks/feature_spec.md | 66 +++++++++ tasks/todo.md | 34 +++++ tests/cli/test_cli_lazy_imports.py | 138 ++++++++++++++++++ 8 files changed, 673 insertions(+), 145 deletions(-) create mode 100644 tests/cli/test_cli_lazy_imports.py diff --git a/dev-docs/architecture/overview.md b/dev-docs/architecture/overview.md index 4e202da2..5d20b004 100644 --- a/dev-docs/architecture/overview.md +++ b/dev-docs/architecture/overview.md @@ -98,6 +98,10 @@ CLI entry point `validate` - `edit.py` contains the Phase 2 editing parser, JSON serialization helpers, and wrappers around `exstruct.edit` +- `exstruct.__init__`, `exstruct.edit.__init__`, and lightweight CLI startup + paths must remain side-effect-free: `--help` and `ops` routing should defer + heavy extraction/edit implementation imports until command execution needs + them ### edit/ diff --git a/src/exstruct/__init__.py b/src/exstruct/__init__.py index 3972b5b0..25b21a4b 100644 --- a/src/exstruct/__init__.py +++ b/src/exstruct/__init__.py @@ -2,53 +2,47 @@ from __future__ import annotations +from importlib import import_module import logging from pathlib import Path -from typing import Literal, TextIO - -from .core.cells import set_table_detection_params -from .core.integrate import extract_workbook -from .engine import ( - ColorsOptions, - DestinationOptions, - ExStructEngine, - FilterOptions, - FormatOptions, - OutputOptions, - StructOptions, -) -from .errors import ( - ConfigError, - ExstructError, - MissingDependencyError, - PrintAreaError, - RenderError, - SerializationError, -) -from .io import ( - save_as_json, - save_as_toon, - save_as_yaml, - save_auto_page_break_views, - save_print_area_views, - save_sheets, - serialize_workbook, -) -from .models import ( - CellRow, - Chart, - ChartSeries, - PrintArea, - PrintAreaView, - Shape, - SheetData, - WorkbookData, - col_index_to_alpha, - convert_row_keys_to_alpha, - convert_sheet_keys_to_alpha, - convert_workbook_keys_to_alpha, -) -from .render import export_pdf, export_sheet_images +from typing import TYPE_CHECKING, Literal, TextIO + +if TYPE_CHECKING: + from .core.cells import set_table_detection_params + from .core.integrate import extract_workbook + from .engine import ( + ColorsOptions, + DestinationOptions, + ExStructEngine, + FilterOptions, + FormatOptions, + OutputOptions, + StructOptions, + ) + from .errors import ( + ConfigError, + ExstructError, + MissingDependencyError, + PrintAreaError, + RenderError, + SerializationError, + ) + from .io import serialize_workbook + from .models import ( + CellRow, + Chart, + ChartSeries, + PrintArea, + PrintAreaView, + Shape, + SheetData, + WorkbookData, + col_index_to_alpha, + convert_row_keys_to_alpha, + convert_sheet_keys_to_alpha, + convert_workbook_keys_to_alpha, + ) + from .render import export_pdf, export_sheet_images logger = logging.getLogger(__name__) @@ -97,6 +91,56 @@ ExtractionMode = Literal["light", "libreoffice", "standard", "verbose"] +_LAZY_EXPORTS: dict[str, tuple[str, str]] = { + "ColorsOptions": (".engine", "ColorsOptions"), + "ConfigError": (".errors", "ConfigError"), + "DestinationOptions": (".engine", "DestinationOptions"), + "ExStructEngine": (".engine", "ExStructEngine"), + "ExstructError": (".errors", "ExstructError"), + "FilterOptions": (".engine", "FilterOptions"), + "FormatOptions": (".engine", "FormatOptions"), + "MissingDependencyError": (".errors", "MissingDependencyError"), + "OutputOptions": (".engine", "OutputOptions"), + "PrintArea": (".models", "PrintArea"), + "PrintAreaError": (".errors", "PrintAreaError"), + "PrintAreaView": (".models", "PrintAreaView"), + "RenderError": (".errors", "RenderError"), + "SerializationError": (".errors", "SerializationError"), + "StructOptions": (".engine", "StructOptions"), + "WorkbookData": (".models", "WorkbookData"), + "CellRow": (".models", "CellRow"), + "Chart": (".models", "Chart"), + "ChartSeries": (".models", "ChartSeries"), + "Shape": (".models", "Shape"), + "SheetData": (".models", "SheetData"), + "col_index_to_alpha": (".models", "col_index_to_alpha"), + "convert_row_keys_to_alpha": (".models", "convert_row_keys_to_alpha"), + "convert_sheet_keys_to_alpha": (".models", "convert_sheet_keys_to_alpha"), + "convert_workbook_keys_to_alpha": (".models", "convert_workbook_keys_to_alpha"), + "export_pdf": (".render", "export_pdf"), + "export_sheet_images": (".render", "export_sheet_images"), + "extract_workbook": (".core.integrate", "extract_workbook"), + "serialize_workbook": (".io", "serialize_workbook"), + "set_table_detection_params": (".core.cells", "set_table_detection_params"), +} + + +def _resolve_lazy_export(name: str) -> object: + module_name, attr_name = _LAZY_EXPORTS[name] + value = getattr(import_module(module_name, __name__), attr_name) + globals()[name] = value + return value + + +def __getattr__(name: str) -> object: + if name not in _LAZY_EXPORTS: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + return _resolve_lazy_export(name) + + +def __dir__() -> list[str]: + return sorted(set(globals()) | set(__all__)) + def extract( file_path: str | Path, mode: ExtractionMode = "standard", *, alpha_col: bool = False @@ -112,6 +156,8 @@ def extract( Returns: WorkbookData: Parsed workbook representation containing sheets, rows, shapes, charts, and print areas. """ + from .engine import ExStructEngine, StructOptions + include_links = True if mode == "verbose" else False include_colors_map = True if mode == "verbose" else None include_formulas_map = True if mode == "verbose" else None @@ -157,6 +203,8 @@ def export( >>> export(wb, "out.json", pretty=True) >>> export(wb, "out.yaml", fmt="yaml") # doctest: +SKIP """ + from .io import save_as_json, save_as_toon, save_as_yaml + dest = Path(path) format_hint = (fmt or dest.suffix.lstrip(".") or "json").lower() match format_hint: @@ -202,6 +250,8 @@ def export_sheets( >>> "Sheet1" in paths True """ + from .io import save_sheets + return save_sheets( data, Path(dir_path), @@ -242,6 +292,8 @@ def export_sheets_as( >>> wb = extract("input.xlsx") >>> _ = export_sheets_as(wb, "out_yaml", fmt="yaml") # doctest: +SKIP """ + from .io import save_sheets + return save_sheets( data, Path(dir_path), @@ -285,6 +337,8 @@ def export_print_areas_as( >>> isinstance(paths, dict) True """ + from .io import save_print_area_views + return save_print_area_views( data, Path(dir_path), @@ -331,6 +385,9 @@ def export_auto_page_breaks( ... except PrintAreaError: ... pass """ + from .errors import PrintAreaError + from .io import save_auto_page_break_views + if not any(sheet.auto_print_areas for sheet in data.sheets.values()): message = "No auto page-break areas found. Enable COM-based auto page breaks before exporting." logger.warning(message) @@ -407,6 +464,15 @@ def process_excel( >>> process_excel(Path("input.xlsx"), output_path=Path("out.json"), pdf=True) # doctest: +SKIP """ + from .engine import ( + DestinationOptions, + ExStructEngine, + FilterOptions, + FormatOptions, + OutputOptions, + StructOptions, + ) + engine = ExStructEngine( options=StructOptions(mode=mode, alpha_col=alpha_col), output=OutputOptions( diff --git a/src/exstruct/cli/edit.py b/src/exstruct/cli/edit.py index a98f54c0..59bfb3cf 100644 --- a/src/exstruct/cli/edit.py +++ b/src/exstruct/cli/edit.py @@ -3,27 +3,16 @@ from __future__ import annotations import argparse +from collections.abc import Callable +from functools import lru_cache +from importlib import import_module import json from pathlib import Path import sys -from typing import Any, get_args +from typing import Any, cast, get_args from pydantic import BaseModel, ValidationError -from exstruct.edit import ( - MakeRequest, - OnConflictPolicy, - PatchBackend, - PatchOp, - PatchRequest, - get_patch_op_schema, - list_patch_op_schemas, - make_workbook, - patch_workbook, - resolve_top_level_sheet_for_payload, -) -from exstruct.mcp.validate_input import ValidateInputRequest, validate_input - _EDIT_SUBCOMMANDS = frozenset({"patch", "make", "ops", "validate"}) _EXPLICIT_EDIT_TOKENS: dict[str, frozenset[str]] = { "patch": frozenset( @@ -60,6 +49,46 @@ } +def _load_symbol(module_name: str, attr_name: str) -> object: + return getattr(import_module(module_name), attr_name) + + +def patch_workbook(request: object) -> object: + """Compatibility wrapper that resolves patch execution lazily.""" + + return cast( + Callable[[object], object], + _load_symbol("exstruct.edit.api", "patch_workbook"), + )(request) + + +def make_workbook(request: object) -> object: + """Compatibility wrapper that resolves make execution lazily.""" + + return cast( + Callable[[object], object], + _load_symbol("exstruct.edit.api", "make_workbook"), + )(request) + + +def resolve_top_level_sheet_for_payload(payload: object) -> object: + """Compatibility wrapper that resolves payload normalization lazily.""" + + return cast( + Callable[[object], object], + _load_symbol("exstruct.edit.normalize", "resolve_top_level_sheet_for_payload"), + )(payload) + + +def validate_input(request: object) -> object: + """Compatibility wrapper that resolves validate execution lazily.""" + + return cast( + Callable[[object], object], + _load_symbol("exstruct.mcp.validate_input", "validate_input"), + )(request) + + def _literal_choices(literal_type: object) -> tuple[str, ...]: """Return argparse choices derived from one string Literal type.""" @@ -71,8 +100,14 @@ def _literal_choices(literal_type: object) -> tuple[str, ...]: return tuple(choices) -_ON_CONFLICT_CHOICES = _literal_choices(OnConflictPolicy) -_BACKEND_CHOICES = _literal_choices(PatchBackend) +@lru_cache(maxsize=1) +def _on_conflict_choices() -> tuple[str, ...]: + return _literal_choices(_load_symbol("exstruct.edit.types", "OnConflictPolicy")) + + +@lru_cache(maxsize=1) +def _backend_choices() -> tuple[str, ...]: + return _literal_choices(_load_symbol("exstruct.edit.types", "PatchBackend")) def is_edit_subcommand(argv: list[str]) -> bool: @@ -224,13 +259,13 @@ def _add_patch_like_arguments( parser.add_argument("--sheet", help="Top-level sheet fallback for patch ops.") parser.add_argument( "--on-conflict", - choices=_ON_CONFLICT_CHOICES, + choices=_on_conflict_choices(), default="overwrite", help="Conflict policy for output workbook paths.", ) parser.add_argument( "--backend", - choices=_BACKEND_CHOICES, + choices=_backend_choices(), default="auto", help="Patch backend selection policy.", ) @@ -266,6 +301,10 @@ def _run_patch_command(args: argparse.Namespace) -> int: try: ops = _load_patch_ops(args.ops, sheet=args.sheet) + patch_request_model = cast( + Callable[..., object], + _load_symbol("exstruct.edit.models", "PatchRequest"), + ) request_kwargs: dict[str, Any] = { "xlsx_path": args.input, "ops": ops, @@ -280,8 +319,8 @@ def _run_patch_command(args: argparse.Namespace) -> int: if args.output is not None: request_kwargs["out_dir"] = args.output.parent request_kwargs["out_name"] = args.output.name - request = PatchRequest(**request_kwargs) - result = patch_workbook(request) + request = patch_request_model(**request_kwargs) + result = cast(Any, patch_workbook(request)) except (OSError, RuntimeError, ValidationError, ValueError) as exc: _print_error(exc) return 1 @@ -295,7 +334,11 @@ def _run_make_command(args: argparse.Namespace) -> int: try: ops = _load_patch_ops(args.ops, sheet=args.sheet) - request = MakeRequest( + make_request_model = cast( + Callable[..., object], + _load_symbol("exstruct.edit.models", "MakeRequest"), + ) + request = make_request_model( out_path=args.output, ops=ops, sheet=args.sheet, @@ -306,7 +349,7 @@ def _run_make_command(args: argparse.Namespace) -> int: preflight_formula_check=args.preflight_formula_check, backend=args.backend, ) - result = make_workbook(request) + result = cast(Any, make_workbook(request)) except (OSError, RuntimeError, ValidationError, ValueError) as exc: _print_error(exc) return 1 @@ -318,10 +361,14 @@ def _run_make_command(args: argparse.Namespace) -> int: def _run_ops_list_command(args: argparse.Namespace) -> int: """Execute the ops list subcommand.""" + list_patch_op_schemas = cast( + Callable[[], list[object]], + _load_symbol("exstruct.edit.op_schema", "list_patch_op_schemas"), + ) + schemas = cast(Any, list_patch_op_schemas()) payload = { "ops": [ - {"op": schema.op, "description": schema.description} - for schema in list_patch_op_schemas() + {"op": schema.op, "description": schema.description} for schema in schemas ] } _print_json_payload(payload, pretty=args.pretty) @@ -331,7 +378,11 @@ def _run_ops_list_command(args: argparse.Namespace) -> int: def _run_ops_describe_command(args: argparse.Namespace) -> int: """Execute the ops describe subcommand.""" - schema = get_patch_op_schema(args.op) + get_patch_op_schema = cast( + Callable[[str], object | None], + _load_symbol("exstruct.edit.op_schema", "get_patch_op_schema"), + ) + schema = cast(Any, get_patch_op_schema(args.op)) if schema is None: _print_error(ValueError(f"Unknown patch operation: {args.op}")) return 1 @@ -343,7 +394,17 @@ def _run_validate_command(args: argparse.Namespace) -> int: """Execute the validate subcommand.""" try: - result = validate_input(ValidateInputRequest(xlsx_path=args.input)) + validate_input_request_model = cast( + Callable[..., object], + _load_symbol( + "exstruct.mcp.validate_input", + "ValidateInputRequest", + ), + ) + result = cast( + Any, + validate_input(validate_input_request_model(xlsx_path=args.input)), + ) except (OSError, ValidationError, ValueError) as exc: _print_error(exc) return 1 @@ -352,7 +413,7 @@ def _run_validate_command(args: argparse.Namespace) -> int: return 0 if result.is_readable else 1 -def _load_patch_ops(source: str | None, *, sheet: str | None = None) -> list[PatchOp]: +def _load_patch_ops(source: str | None, *, sheet: str | None = None) -> list[Any]: """Load patch ops from a JSON file or stdin.""" if source is None: @@ -368,7 +429,11 @@ def _load_patch_ops(source: str | None, *, sheet: str | None = None) -> list[Pat resolved_ops = resolved_payload.get("ops") if not isinstance(resolved_ops, list): raise ValueError("Resolved patch ops payload must contain an ops list.") - return [PatchOp(**op_payload) for op_payload in resolved_ops] + patch_op_model = cast( + Callable[..., object], + _load_symbol("exstruct.edit.models", "PatchOp"), + ) + return [patch_op_model(**op_payload) for op_payload in resolved_ops] def _load_json_value(source: str) -> object: diff --git a/src/exstruct/cli/main.py b/src/exstruct/cli/main.py index 5d9a25f0..82510909 100644 --- a/src/exstruct/cli/main.py +++ b/src/exstruct/cli/main.py @@ -3,13 +3,72 @@ from __future__ import annotations import argparse +from collections.abc import Callable +from importlib import import_module from pathlib import Path import sys +from typing import TYPE_CHECKING, cast -from exstruct import process_excel -from exstruct.cli.availability import get_com_availability -from exstruct.cli.edit import is_edit_subcommand, run_edit_cli -from exstruct.constraints import validate_libreoffice_process_request +if TYPE_CHECKING: + from exstruct.cli.availability import ComAvailability + +ProcessExcelFn = Callable[..., None] +EditPredicateFn = Callable[[list[str]], bool] +RunEditCliFn = Callable[[list[str]], int] +ComAvailabilityFn = Callable[[], "ComAvailability"] +LibreOfficeValidatorFn = Callable[..., Path] + + +def _load_process_excel() -> ProcessExcelFn: + module = import_module("exstruct") + return cast(ProcessExcelFn, module.process_excel) + + +def _load_is_edit_subcommand() -> EditPredicateFn: + module = import_module("exstruct.cli.edit") + return cast(EditPredicateFn, module.is_edit_subcommand) + + +def _load_run_edit_cli() -> RunEditCliFn: + module = import_module("exstruct.cli.edit") + return cast(RunEditCliFn, module.run_edit_cli) + + +def _load_get_com_availability() -> ComAvailabilityFn: + module = import_module("exstruct.cli.availability") + return cast(ComAvailabilityFn, module.get_com_availability) + + +def _load_libreoffice_validator() -> LibreOfficeValidatorFn: + module = import_module("exstruct.constraints") + return cast( + LibreOfficeValidatorFn, + module.validate_libreoffice_process_request, + ) + + +def process_excel(*args: object, **kwargs: object) -> None: + """Compatibility wrapper that resolves `exstruct.process_excel` lazily.""" + + _load_process_excel()(*args, **kwargs) + + +def is_edit_subcommand(argv: list[str]) -> bool: + """Compatibility wrapper that resolves the edit router lazily.""" + + return _load_is_edit_subcommand()(argv) + + +def run_edit_cli(argv: list[str]) -> int: + """Compatibility wrapper that resolves the edit CLI lazily.""" + + return _load_run_edit_cli()(argv) + + +def get_com_availability() -> ComAvailability: + """Compatibility wrapper that resolves COM probing lazily.""" + + return _load_get_com_availability()() def _ensure_utf8_stdout() -> None: @@ -147,7 +206,7 @@ def _validate_auto_page_breaks_request(args: argparse.Namespace) -> None: "with Excel COM." ) if args.mode == "libreoffice": - validate_libreoffice_process_request( + _load_libreoffice_validator()( args.input, mode=args.mode, include_auto_page_breaks=True, @@ -206,8 +265,8 @@ def main(argv: list[str] | None = None) -> int: include_backend_metadata=args.include_backend_metadata, ) return 0 - except Exception as e: - print(f"Error: {e}", flush=True) + except Exception as exc: + print(f"Error: {exc}", flush=True) return 1 diff --git a/src/exstruct/edit/__init__.py b/src/exstruct/edit/__init__.py index 836d656a..3feba1c7 100644 --- a/src/exstruct/edit/__init__.py +++ b/src/exstruct/edit/__init__.py @@ -2,70 +2,74 @@ from __future__ import annotations -from .api import make_workbook, patch_workbook -from .chart_types import ( - CHART_TYPE_ALIASES, - CHART_TYPE_TO_COM_ID, - SUPPORTED_CHART_TYPES, - SUPPORTED_CHART_TYPES_CSV, - SUPPORTED_CHART_TYPES_SET, - normalize_chart_type, - resolve_chart_type_id, -) -from .errors import PatchOpError -from .models import ( - AlignmentSnapshot, - BorderSideSnapshot, - BorderSnapshot, - ColumnDimensionSnapshot, - DesignSnapshot, - FillSnapshot, - FontSnapshot, - FormulaIssue, - MakeRequest, - MergeStateSnapshot, - OpenpyxlEngineResult, - OpenpyxlWorksheetProtocol, - PatchDiffItem, - PatchErrorDetail, - PatchOp, - PatchRequest, - PatchResult, - PatchValue, - RowDimensionSnapshot, - XlwingsRangeProtocol, -) -from .normalize import ( - alias_to_canonical_with_conflict_check, - build_missing_sheet_message, - build_patch_op_error_message, - coerce_patch_ops, - normalize_draw_grid_border_range, - normalize_patch_op_aliases, - normalize_top_level_sheet, - parse_patch_op_json, - resolve_top_level_sheet_for_payload, -) -from .op_schema import ( - PatchOpSchema, - build_patch_tool_mini_schema, - get_patch_op_schema, - list_patch_op_schemas, - schema_with_sheet_resolution_rules, -) -from .specs import PATCH_OP_SPECS, PatchOpSpec, get_alias_map_for_op -from .types import ( - FormulaIssueCode, - FormulaIssueLevel, - HorizontalAlignType, - OnConflictPolicy, - PatchBackend, - PatchEngine, - PatchOpType, - PatchStatus, - PatchValueKind, - VerticalAlignType, -) +from importlib import import_module +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .api import make_workbook, patch_workbook + from .chart_types import ( + CHART_TYPE_ALIASES, + CHART_TYPE_TO_COM_ID, + SUPPORTED_CHART_TYPES, + SUPPORTED_CHART_TYPES_CSV, + SUPPORTED_CHART_TYPES_SET, + normalize_chart_type, + resolve_chart_type_id, + ) + from .errors import PatchOpError + from .models import ( + AlignmentSnapshot, + BorderSideSnapshot, + BorderSnapshot, + ColumnDimensionSnapshot, + DesignSnapshot, + FillSnapshot, + FontSnapshot, + FormulaIssue, + MakeRequest, + MergeStateSnapshot, + OpenpyxlEngineResult, + OpenpyxlWorksheetProtocol, + PatchDiffItem, + PatchErrorDetail, + PatchOp, + PatchRequest, + PatchResult, + PatchValue, + RowDimensionSnapshot, + XlwingsRangeProtocol, + ) + from .normalize import ( + alias_to_canonical_with_conflict_check, + build_missing_sheet_message, + build_patch_op_error_message, + coerce_patch_ops, + normalize_draw_grid_border_range, + normalize_patch_op_aliases, + normalize_top_level_sheet, + parse_patch_op_json, + resolve_top_level_sheet_for_payload, + ) + from .op_schema import ( + PatchOpSchema, + build_patch_tool_mini_schema, + get_patch_op_schema, + list_patch_op_schemas, + schema_with_sheet_resolution_rules, + ) + from .specs import PATCH_OP_SPECS, PatchOpSpec, get_alias_map_for_op + from .types import ( + FormulaIssueCode, + FormulaIssueLevel, + HorizontalAlignType, + OnConflictPolicy, + PatchBackend, + PatchEngine, + PatchOpType, + PatchStatus, + PatchValueKind, + VerticalAlignType, + ) __all__ = [ "AlignmentSnapshot", @@ -126,3 +130,95 @@ "resolve_top_level_sheet_for_payload", "schema_with_sheet_resolution_rules", ] + +_LAZY_EXPORTS: dict[str, tuple[str, str]] = { + "AlignmentSnapshot": (".models", "AlignmentSnapshot"), + "BorderSideSnapshot": (".models", "BorderSideSnapshot"), + "BorderSnapshot": (".models", "BorderSnapshot"), + "CHART_TYPE_ALIASES": (".chart_types", "CHART_TYPE_ALIASES"), + "CHART_TYPE_TO_COM_ID": (".chart_types", "CHART_TYPE_TO_COM_ID"), + "ColumnDimensionSnapshot": (".models", "ColumnDimensionSnapshot"), + "DesignSnapshot": (".models", "DesignSnapshot"), + "FillSnapshot": (".models", "FillSnapshot"), + "FontSnapshot": (".models", "FontSnapshot"), + "FormulaIssue": (".models", "FormulaIssue"), + "FormulaIssueCode": (".types", "FormulaIssueCode"), + "FormulaIssueLevel": (".types", "FormulaIssueLevel"), + "HorizontalAlignType": (".types", "HorizontalAlignType"), + "MakeRequest": (".models", "MakeRequest"), + "MergeStateSnapshot": (".models", "MergeStateSnapshot"), + "OnConflictPolicy": (".types", "OnConflictPolicy"), + "OpenpyxlEngineResult": (".models", "OpenpyxlEngineResult"), + "OpenpyxlWorksheetProtocol": (".models", "OpenpyxlWorksheetProtocol"), + "PATCH_OP_SPECS": (".specs", "PATCH_OP_SPECS"), + "PatchBackend": (".types", "PatchBackend"), + "PatchDiffItem": (".models", "PatchDiffItem"), + "PatchEngine": (".types", "PatchEngine"), + "PatchErrorDetail": (".models", "PatchErrorDetail"), + "PatchOp": (".models", "PatchOp"), + "PatchOpError": (".errors", "PatchOpError"), + "PatchOpSchema": (".op_schema", "PatchOpSchema"), + "PatchOpSpec": (".specs", "PatchOpSpec"), + "PatchOpType": (".types", "PatchOpType"), + "PatchRequest": (".models", "PatchRequest"), + "PatchResult": (".models", "PatchResult"), + "PatchStatus": (".types", "PatchStatus"), + "PatchValue": (".models", "PatchValue"), + "PatchValueKind": (".types", "PatchValueKind"), + "RowDimensionSnapshot": (".models", "RowDimensionSnapshot"), + "SUPPORTED_CHART_TYPES": (".chart_types", "SUPPORTED_CHART_TYPES"), + "SUPPORTED_CHART_TYPES_CSV": (".chart_types", "SUPPORTED_CHART_TYPES_CSV"), + "SUPPORTED_CHART_TYPES_SET": (".chart_types", "SUPPORTED_CHART_TYPES_SET"), + "VerticalAlignType": (".types", "VerticalAlignType"), + "XlwingsRangeProtocol": (".models", "XlwingsRangeProtocol"), + "alias_to_canonical_with_conflict_check": ( + ".normalize", + "alias_to_canonical_with_conflict_check", + ), + "build_missing_sheet_message": (".normalize", "build_missing_sheet_message"), + "build_patch_op_error_message": ( + ".normalize", + "build_patch_op_error_message", + ), + "build_patch_tool_mini_schema": (".op_schema", "build_patch_tool_mini_schema"), + "coerce_patch_ops": (".normalize", "coerce_patch_ops"), + "get_alias_map_for_op": (".specs", "get_alias_map_for_op"), + "get_patch_op_schema": (".op_schema", "get_patch_op_schema"), + "list_patch_op_schemas": (".op_schema", "list_patch_op_schemas"), + "make_workbook": (".api", "make_workbook"), + "normalize_chart_type": (".chart_types", "normalize_chart_type"), + "normalize_draw_grid_border_range": ( + ".normalize", + "normalize_draw_grid_border_range", + ), + "normalize_patch_op_aliases": (".normalize", "normalize_patch_op_aliases"), + "normalize_top_level_sheet": (".normalize", "normalize_top_level_sheet"), + "parse_patch_op_json": (".normalize", "parse_patch_op_json"), + "patch_workbook": (".api", "patch_workbook"), + "resolve_chart_type_id": (".chart_types", "resolve_chart_type_id"), + "resolve_top_level_sheet_for_payload": ( + ".normalize", + "resolve_top_level_sheet_for_payload", + ), + "schema_with_sheet_resolution_rules": ( + ".op_schema", + "schema_with_sheet_resolution_rules", + ), +} + + +def _resolve_lazy_export(name: str) -> object: + module_name, attr_name = _LAZY_EXPORTS[name] + value = getattr(import_module(module_name, __name__), attr_name) + globals()[name] = value + return value + + +def __getattr__(name: str) -> object: + if name not in _LAZY_EXPORTS: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + return _resolve_lazy_export(name) + + +def __dir__() -> list[str]: + return sorted(set(globals()) | set(__all__)) diff --git a/tasks/feature_spec.md b/tasks/feature_spec.md index f4bb9dc3..66a34ea5 100644 --- a/tasks/feature_spec.md +++ b/tasks/feature_spec.md @@ -167,3 +167,69 @@ - `not-needed` - rationale: this is a wording-only follow-up under the existing `ADR-0008` decision. + +## 2026-03-20 issue #108 CLI startup lazy import optimization + +### Goal + +- Reduce startup import cost for lightweight CLI paths such as `exstruct --help` and `exstruct ops list`. +- Keep the existing extraction and editing CLI contracts unchanged while delaying heavy implementation imports until routing is known. +- Preserve the current module-level monkeypatch surfaces used by the CLI test suite. + +### Public contract + +- `exstruct --help` and parser construction keep the current CLI syntax, help text, and exit behavior. +- `exstruct ops list` and `exstruct ops describe` keep their current output shape and exit behavior. +- Extraction invocations still call `process_excel(...)` with the same arguments and keep current file-not-found and auto page-break validation behavior. +- Public Python symbol names exported from `exstruct` and `exstruct.edit` remain unchanged; only their import timing changes. + +### Internal implementation guarantees + +- `src/exstruct/__init__.py` must not eagerly import extraction engine, IO, render, or model modules during package import; convenience functions may import those dependencies inside function bodies. +- `src/exstruct/edit/__init__.py` must not eagerly import editing service/runtime/model modules during package import; exported names should resolve lazily. +- `src/exstruct/cli/main.py` must route edit vs extraction commands before importing edit/extraction implementations. +- `src/exstruct/cli/edit.py` must not import `exstruct.mcp.validate_input` or editing execution helpers at module import time; command handlers load only the functionality they need. +- Existing CLI module patch points (`process_excel`, `get_com_availability`, `is_edit_subcommand`, `run_edit_cli`, `patch_workbook`, `make_workbook`, `resolve_top_level_sheet_for_payload`, `validate_input`) remain present as thin wrappers. + +### Scope and non-goals + +- In scope: + - `src/exstruct/__init__.py` + - `src/exstruct/edit/__init__.py` + - `src/exstruct/cli/main.py` + - `src/exstruct/cli/edit.py` + - targeted tests and one architecture note +- Out of scope: + - changing CLI syntax, help wording, or JSON contracts + - changing backend selection policy + - optimizing `exstruct validate` startup beyond removing it from the `ops` path + - refactoring `src/exstruct/mcp/__init__.py` unless required by failing tests on the `ops` path + +### Permanent destinations + +- `dev-docs/architecture/overview.md` + - Records that package `__init__` files and lightweight CLI startup paths must remain side-effect-free and defer heavy imports. +- `tasks/feature_spec.md` and `tasks/todo.md` + - Keep the temporary implementation/verification record for this issue. +- `dev-docs/adr/` + - No new ADR is planned; this issue changes import timing only and does not alter the public contract or policy. + +### Verification plan + +- `tests/cli/test_cli.py` + - help and extraction routing still behave the same + - lightweight startup paths do not eagerly load edit/extraction implementation modules +- `tests/cli/test_edit_cli.py` + - `ops list` / `ops describe` do not depend on extraction import paths + - existing monkeypatch-based tests still pass +- `tests/edit/test_architecture.py` or a focused startup test module + - `import exstruct` does not eagerly load extraction engine modules + - `import exstruct.cli.edit` does not eagerly load `exstruct.mcp` / `exstruct.mcp.extract_runner` +- `uv run pytest tests/cli/test_cli.py tests/cli/test_edit_cli.py tests/edit/test_architecture.py -q` +- `uv run task precommit-run` +- manual importtime sanity checks for `--help` and `ops list` + +### ADR verdict + +- `not-needed` +- rationale: this is a startup-focused internal refactor that preserves existing CLI/API contracts and backend policy. The durable guidance belongs in architecture notes rather than a new policy ADR. diff --git a/tasks/todo.md b/tasks/todo.md index 32ed7d7f..2aee8c17 100644 --- a/tasks/todo.md +++ b/tasks/todo.md @@ -118,3 +118,37 @@ - `uv run pytest tests/cli/test_cli.py -q` - `uv run task precommit-run` - `git diff --check` + +## 2026-03-20 issue #108 CLI startup lazy import optimization + +### Planning + +- [x] Confirm issue `#108` details with `gh issue view 108`. +- [x] Inspect current import paths in `src/exstruct/__init__.py`, `src/exstruct/edit/__init__.py`, `src/exstruct/cli/main.py`, and `src/exstruct/cli/edit.py`. +- [x] Classify ADR need for the startup optimization work. +- [x] Add the issue `#108` working spec to `tasks/feature_spec.md`. +- [x] Refactor `src/exstruct/__init__.py` to defer heavy imports while preserving exported symbol names. +- [x] Refactor `src/exstruct/edit/__init__.py` to defer heavy imports while preserving exported symbol names. +- [x] Refactor `src/exstruct/cli/main.py` so edit/extraction implementations load only after routing is known. +- [x] Refactor `src/exstruct/cli/edit.py` so `ops` commands avoid extraction-path imports and handler-specific dependencies load lazily. +- [x] Add regression tests for startup import isolation and existing CLI behavior. +- [x] Update `dev-docs/architecture/overview.md` with the lightweight-startup import rule. +- [x] Run targeted pytest for CLI/startup coverage. +- [x] Run `uv run task precommit-run`. +- [x] Record final verification and retention notes in this Review section. + +### Review + +- `src/exstruct/__init__.py` now keeps the public export surface but resolves heavy extraction/runtime symbols lazily, so importing `exstruct` no longer front-loads extraction engine modules for CLI startup. +- `src/exstruct/edit/__init__.py` now resolves editing exports lazily, which lets CLI code import edit submodules without paying the full patch-service import cost up front. +- `src/exstruct/cli/main.py` now keeps monkeypatch-compatible wrappers for `process_excel`, `get_com_availability`, `is_edit_subcommand`, and `run_edit_cli`, but loads the underlying implementations only after routing demands them. +- `src/exstruct/cli/edit.py` now keeps monkeypatch-compatible wrappers for `patch_workbook`, `make_workbook`, `resolve_top_level_sheet_for_payload`, and `validate_input`, while `ops` commands load only schema metadata and avoid dragging the extraction path into startup. +- `tests/cli/test_cli_lazy_imports.py` now locks the startup boundary with subprocess `sys.modules` probes for `import exstruct`, `import exstruct.cli.main`, `import exstruct.cli.edit`, and `main(["ops", "list"])`. +- `dev-docs/architecture/overview.md` now records the durable rule that package `__init__` files and lightweight CLI startup paths must remain side-effect-free. +- Retention decision: + - No new ADR was added. The change preserves the public contract and only changes import timing, so the durable guidance lives in `dev-docs/architecture/overview.md`. + - The temporary working record for implementation order and verification remains limited to this section in `tasks/feature_spec.md` and `tasks/todo.md`. +- Verification: + - `uv run pytest tests/cli/test_cli.py tests/cli/test_edit_cli.py tests/cli/test_cli_lazy_imports.py tests/edit/test_architecture.py -q` + - `uv run task precommit-run` + - manual `-X importtime` sanity probe for `-m exstruct.cli.main --help` and `-m exstruct.cli.main ops list` diff --git a/tests/cli/test_cli_lazy_imports.py b/tests/cli/test_cli_lazy_imports.py new file mode 100644 index 00000000..267a1749 --- /dev/null +++ b/tests/cli/test_cli_lazy_imports.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +import json +import subprocess +import sys +from typing import cast + + +def _run_python(code: str) -> subprocess.CompletedProcess[str]: + return subprocess.run( + [sys.executable, "-c", code], + check=False, + capture_output=True, + text=True, + ) + + +def _run_probe(code: str) -> dict[str, object]: + result = _run_python(code) + assert result.returncode == 0, result.stderr or result.stdout + return cast(dict[str, object], json.loads(result.stdout)) + + +def test_import_exstruct_stays_lightweight() -> None: + payload = _run_probe( + """ +import json +import sys +import exstruct +print(json.dumps({ + "core_integrate": "exstruct.core.integrate" in sys.modules, + "engine": "exstruct.engine" in sys.modules, + "pandas": "pandas" in sys.modules, + "openpyxl": "openpyxl" in sys.modules, + "xlwings": "xlwings" in sys.modules, +})) +""" + ) + + assert payload == { + "core_integrate": False, + "engine": False, + "pandas": False, + "openpyxl": False, + "xlwings": False, + } + + +def test_import_cli_main_does_not_load_edit_or_extraction_modules() -> None: + payload = _run_probe( + """ +import json +import sys +import exstruct.cli.main +print(json.dumps({ + "cli_edit": "exstruct.cli.edit" in sys.modules, + "mcp": "exstruct.mcp" in sys.modules, + "core_integrate": "exstruct.core.integrate" in sys.modules, + "engine": "exstruct.engine" in sys.modules, +})) +""" + ) + + assert payload == { + "cli_edit": False, + "mcp": False, + "core_integrate": False, + "engine": False, + } + + +def test_import_cli_edit_does_not_load_mcp_or_edit_execution_modules() -> None: + payload = _run_probe( + """ +import json +import sys +import exstruct.cli.edit +print(json.dumps({ + "mcp": "exstruct.mcp" in sys.modules, + "extract_runner": "exstruct.mcp.extract_runner" in sys.modules, + "edit_api": "exstruct.edit.api" in sys.modules, + "edit_service": "exstruct.edit.service" in sys.modules, +})) +""" + ) + + assert payload == { + "mcp": False, + "extract_runner": False, + "edit_api": False, + "edit_service": False, + } + + +def test_help_and_ops_list_keep_lightweight_import_boundaries() -> None: + payload = _run_probe( + """ +import io +import json +import sys +from contextlib import redirect_stderr, redirect_stdout + +from exstruct.cli.main import main + +stdout_buffer = io.StringIO() +stderr_buffer = io.StringIO() +with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer): + try: + help_exit = main(["--help"]) + except SystemExit as exc: + help_exit = exc.code +help_text = stdout_buffer.getvalue() + +stdout_buffer = io.StringIO() +stderr_buffer = io.StringIO() +with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer): + ops_exit = main(["ops", "list"]) +ops_payload = json.loads(stdout_buffer.getvalue()) + +print(json.dumps({ + "help_exit": help_exit, + "help_has_option": "--auto-page-breaks-dir" in help_text, + "ops_exit": ops_exit, + "ops_has_set_value": any(item["op"] == "set_value" for item in ops_payload["ops"]), + "mcp": "exstruct.mcp" in sys.modules, + "extract_runner": "exstruct.mcp.extract_runner" in sys.modules, + "core_integrate": "exstruct.core.integrate" in sys.modules, +})) +""" + ) + + assert payload["help_exit"] == 0 + assert payload["help_has_option"] is True + assert payload["ops_exit"] == 0 + assert payload["ops_has_set_value"] is True + assert payload["mcp"] is False + assert payload["extract_runner"] is False + assert payload["core_integrate"] is False From ffdff72b0fd5890957fa4128f207ccfeb10a05d1 Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Sat, 21 Mar 2026 16:27:18 +0900 Subject: [PATCH 2/3] Address Codacy and review feedback for #108 --- src/exstruct/__init__.py | 138 +++++++++++++++----- src/exstruct/cli/edit.py | 169 +++++++++++++++--------- src/exstruct/cli/main.py | 5 + src/exstruct/edit/__init__.py | 199 +++++++++++++++++++---------- tasks/feature_spec.md | 37 ++++++ tasks/todo.md | 34 +++++ tests/cli/test_cli_lazy_imports.py | 67 +++++++++- 7 files changed, 477 insertions(+), 172 deletions(-) diff --git a/src/exstruct/__init__.py b/src/exstruct/__init__.py index 25b21a4b..8672fc97 100644 --- a/src/exstruct/__init__.py +++ b/src/exstruct/__init__.py @@ -2,7 +2,7 @@ from __future__ import annotations -from importlib import import_module +from collections.abc import Callable import logging from pathlib import Path from typing import TYPE_CHECKING, Literal, TextIO @@ -91,47 +91,101 @@ ExtractionMode = Literal["light", "libreoffice", "standard", "verbose"] -_LAZY_EXPORTS: dict[str, tuple[str, str]] = { - "ColorsOptions": (".engine", "ColorsOptions"), - "ConfigError": (".errors", "ConfigError"), - "DestinationOptions": (".engine", "DestinationOptions"), - "ExStructEngine": (".engine", "ExStructEngine"), - "ExstructError": (".errors", "ExstructError"), - "FilterOptions": (".engine", "FilterOptions"), - "FormatOptions": (".engine", "FormatOptions"), - "MissingDependencyError": (".errors", "MissingDependencyError"), - "OutputOptions": (".engine", "OutputOptions"), - "PrintArea": (".models", "PrintArea"), - "PrintAreaError": (".errors", "PrintAreaError"), - "PrintAreaView": (".models", "PrintAreaView"), - "RenderError": (".errors", "RenderError"), - "SerializationError": (".errors", "SerializationError"), - "StructOptions": (".engine", "StructOptions"), - "WorkbookData": (".models", "WorkbookData"), - "CellRow": (".models", "CellRow"), - "Chart": (".models", "Chart"), - "ChartSeries": (".models", "ChartSeries"), - "Shape": (".models", "Shape"), - "SheetData": (".models", "SheetData"), - "col_index_to_alpha": (".models", "col_index_to_alpha"), - "convert_row_keys_to_alpha": (".models", "convert_row_keys_to_alpha"), - "convert_sheet_keys_to_alpha": (".models", "convert_sheet_keys_to_alpha"), - "convert_workbook_keys_to_alpha": (".models", "convert_workbook_keys_to_alpha"), - "export_pdf": (".render", "export_pdf"), - "export_sheet_images": (".render", "export_sheet_images"), - "extract_workbook": (".core.integrate", "extract_workbook"), - "serialize_workbook": (".io", "serialize_workbook"), - "set_table_detection_params": (".core.cells", "set_table_detection_params"), +LazyExportLoader = Callable[[], object] + + +def _load_engine_attr(name: str) -> object: + from . import engine as engine_module + + return getattr(engine_module, name) + + +def _load_error_attr(name: str) -> object: + from . import errors as errors_module + + return getattr(errors_module, name) + + +def _load_model_attr(name: str) -> object: + from . import models as models_module + + return getattr(models_module, name) + + +def _load_render_attr(name: str) -> object: + from . import render as render_module + + return getattr(render_module, name) + + +def _load_io_attr(name: str) -> object: + from . import io as io_module + + return getattr(io_module, name) + + +def _load_core_cells_attr(name: str) -> object: + from .core import cells as cells_module + + return getattr(cells_module, name) + + +def _load_core_integrate_attr(name: str) -> object: + from .core import integrate as integrate_module + + return getattr(integrate_module, name) + + +_LAZY_EXPORTS: dict[str, LazyExportLoader] = { + "ColorsOptions": lambda: _load_engine_attr("ColorsOptions"), + "ConfigError": lambda: _load_error_attr("ConfigError"), + "DestinationOptions": lambda: _load_engine_attr("DestinationOptions"), + "ExStructEngine": lambda: _load_engine_attr("ExStructEngine"), + "ExstructError": lambda: _load_error_attr("ExstructError"), + "FilterOptions": lambda: _load_engine_attr("FilterOptions"), + "FormatOptions": lambda: _load_engine_attr("FormatOptions"), + "MissingDependencyError": lambda: _load_error_attr("MissingDependencyError"), + "OutputOptions": lambda: _load_engine_attr("OutputOptions"), + "PrintArea": lambda: _load_model_attr("PrintArea"), + "PrintAreaError": lambda: _load_error_attr("PrintAreaError"), + "PrintAreaView": lambda: _load_model_attr("PrintAreaView"), + "RenderError": lambda: _load_error_attr("RenderError"), + "SerializationError": lambda: _load_error_attr("SerializationError"), + "StructOptions": lambda: _load_engine_attr("StructOptions"), + "WorkbookData": lambda: _load_model_attr("WorkbookData"), + "CellRow": lambda: _load_model_attr("CellRow"), + "Chart": lambda: _load_model_attr("Chart"), + "ChartSeries": lambda: _load_model_attr("ChartSeries"), + "Shape": lambda: _load_model_attr("Shape"), + "SheetData": lambda: _load_model_attr("SheetData"), + "col_index_to_alpha": lambda: _load_model_attr("col_index_to_alpha"), + "convert_row_keys_to_alpha": lambda: _load_model_attr("convert_row_keys_to_alpha"), + "convert_sheet_keys_to_alpha": lambda: _load_model_attr( + "convert_sheet_keys_to_alpha" + ), + "convert_workbook_keys_to_alpha": lambda: _load_model_attr( + "convert_workbook_keys_to_alpha" + ), + "export_pdf": lambda: _load_render_attr("export_pdf"), + "export_sheet_images": lambda: _load_render_attr("export_sheet_images"), + "extract_workbook": lambda: _load_core_integrate_attr("extract_workbook"), + "serialize_workbook": lambda: _load_io_attr("serialize_workbook"), + "set_table_detection_params": lambda: _load_core_cells_attr( + "set_table_detection_params" + ), } def _resolve_lazy_export(name: str) -> object: - module_name, attr_name = _LAZY_EXPORTS[name] - value = getattr(import_module(module_name, __name__), attr_name) + value = _LAZY_EXPORTS[name]() globals()[name] = value return value +def _lazy_type(name: str) -> object: + return _resolve_lazy_export(name) + + def __getattr__(name: str) -> object: if name not in _LAZY_EXPORTS: raise AttributeError(f"module {__name__!r} has no attribute {name!r}") @@ -506,3 +560,19 @@ def process_excel( auto_page_breaks_dir=auto_page_breaks_dir, stream=stream, ) + + +def _patch_runtime_annotations() -> None: + annotations_map: dict[Callable[..., object], dict[str, str]] = { + extract: {"return": "_lazy_type('WorkbookData')"}, + export: {"data": "_lazy_type('WorkbookData')"}, + export_sheets: {"data": "_lazy_type('WorkbookData')"}, + export_sheets_as: {"data": "_lazy_type('WorkbookData')"}, + export_print_areas_as: {"data": "_lazy_type('WorkbookData')"}, + export_auto_page_breaks: {"data": "_lazy_type('WorkbookData')"}, + } + for function, function_annotations in annotations_map.items(): + function.__annotations__.update(function_annotations) + + +_patch_runtime_annotations() diff --git a/src/exstruct/cli/edit.py b/src/exstruct/cli/edit.py index 59bfb3cf..dca21ac8 100644 --- a/src/exstruct/cli/edit.py +++ b/src/exstruct/cli/edit.py @@ -5,14 +5,11 @@ import argparse from collections.abc import Callable from functools import lru_cache -from importlib import import_module import json from pathlib import Path import sys from typing import Any, cast, get_args -from pydantic import BaseModel, ValidationError - _EDIT_SUBCOMMANDS = frozenset({"patch", "make", "ops", "validate"}) _EXPLICIT_EDIT_TOKENS: dict[str, frozenset[str]] = { "patch": frozenset( @@ -49,44 +46,114 @@ } -def _load_symbol(module_name: str, attr_name: str) -> object: - return getattr(import_module(module_name), attr_name) +def _load_patch_workbook_impl() -> Callable[[object], object]: + from exstruct.edit.api import patch_workbook as patch_workbook_impl + + return cast(Callable[[object], object], patch_workbook_impl) + + +def _load_make_workbook_impl() -> Callable[[object], object]: + from exstruct.edit.api import make_workbook as make_workbook_impl + + return cast(Callable[[object], object], make_workbook_impl) + + +def _load_resolve_top_level_sheet_for_payload_impl() -> Callable[[object], object]: + from exstruct.edit.normalize import ( + resolve_top_level_sheet_for_payload as resolve_top_level_sheet_for_payload_impl, + ) + + return cast(Callable[[object], object], resolve_top_level_sheet_for_payload_impl) + + +def _load_validate_input_impl() -> Callable[[object], object]: + from exstruct.mcp.validate_input import validate_input as validate_input_impl + + return cast(Callable[[object], object], validate_input_impl) + + +def _load_on_conflict_policy_literal() -> object: + from exstruct.edit.types import OnConflictPolicy + + return OnConflictPolicy + + +def _load_patch_backend_literal() -> object: + from exstruct.edit.types import PatchBackend + + return PatchBackend + + +def _load_patch_request_model() -> Callable[..., object]: + from exstruct.edit.models import PatchRequest + + return cast(Callable[..., object], PatchRequest) + + +def _load_make_request_model() -> Callable[..., object]: + from exstruct.edit.models import MakeRequest + + return cast(Callable[..., object], MakeRequest) + + +def _load_patch_op_model() -> Callable[..., object]: + from exstruct.edit.models import PatchOp + + return cast(Callable[..., object], PatchOp) + + +def _load_list_patch_op_schemas() -> Callable[[], list[object]]: + from exstruct.edit.op_schema import list_patch_op_schemas + + return cast(Callable[[], list[object]], list_patch_op_schemas) + + +def _load_get_patch_op_schema() -> Callable[[str], object | None]: + from exstruct.edit.op_schema import get_patch_op_schema + + return cast(Callable[[str], object | None], get_patch_op_schema) + + +def _load_validate_input_request_model() -> Callable[..., object]: + from exstruct.mcp.validate_input import ValidateInputRequest + + return cast(Callable[..., object], ValidateInputRequest) + + +def _is_validation_error(exc: Exception) -> bool: + from pydantic import ValidationError + + return isinstance(exc, ValidationError) + + +def _is_cli_runtime_error(exc: Exception) -> bool: + return isinstance(exc, OSError | RuntimeError | ValueError) or _is_validation_error( + exc + ) def patch_workbook(request: object) -> object: """Compatibility wrapper that resolves patch execution lazily.""" - return cast( - Callable[[object], object], - _load_symbol("exstruct.edit.api", "patch_workbook"), - )(request) + return _load_patch_workbook_impl()(request) def make_workbook(request: object) -> object: """Compatibility wrapper that resolves make execution lazily.""" - return cast( - Callable[[object], object], - _load_symbol("exstruct.edit.api", "make_workbook"), - )(request) + return _load_make_workbook_impl()(request) def resolve_top_level_sheet_for_payload(payload: object) -> object: """Compatibility wrapper that resolves payload normalization lazily.""" - return cast( - Callable[[object], object], - _load_symbol("exstruct.edit.normalize", "resolve_top_level_sheet_for_payload"), - )(payload) + return _load_resolve_top_level_sheet_for_payload_impl()(payload) def validate_input(request: object) -> object: """Compatibility wrapper that resolves validate execution lazily.""" - return cast( - Callable[[object], object], - _load_symbol("exstruct.mcp.validate_input", "validate_input"), - )(request) + return _load_validate_input_impl()(request) def _literal_choices(literal_type: object) -> tuple[str, ...]: @@ -102,12 +169,12 @@ def _literal_choices(literal_type: object) -> tuple[str, ...]: @lru_cache(maxsize=1) def _on_conflict_choices() -> tuple[str, ...]: - return _literal_choices(_load_symbol("exstruct.edit.types", "OnConflictPolicy")) + return _literal_choices(_load_on_conflict_policy_literal()) @lru_cache(maxsize=1) def _backend_choices() -> tuple[str, ...]: - return _literal_choices(_load_symbol("exstruct.edit.types", "PatchBackend")) + return _literal_choices(_load_patch_backend_literal()) def is_edit_subcommand(argv: list[str]) -> bool: @@ -301,10 +368,6 @@ def _run_patch_command(args: argparse.Namespace) -> int: try: ops = _load_patch_ops(args.ops, sheet=args.sheet) - patch_request_model = cast( - Callable[..., object], - _load_symbol("exstruct.edit.models", "PatchRequest"), - ) request_kwargs: dict[str, Any] = { "xlsx_path": args.input, "ops": ops, @@ -319,9 +382,11 @@ def _run_patch_command(args: argparse.Namespace) -> int: if args.output is not None: request_kwargs["out_dir"] = args.output.parent request_kwargs["out_name"] = args.output.name - request = patch_request_model(**request_kwargs) + request = _load_patch_request_model()(**request_kwargs) result = cast(Any, patch_workbook(request)) - except (OSError, RuntimeError, ValidationError, ValueError) as exc: + except Exception as exc: + if not _is_cli_runtime_error(exc): + raise _print_error(exc) return 1 @@ -334,11 +399,7 @@ def _run_make_command(args: argparse.Namespace) -> int: try: ops = _load_patch_ops(args.ops, sheet=args.sheet) - make_request_model = cast( - Callable[..., object], - _load_symbol("exstruct.edit.models", "MakeRequest"), - ) - request = make_request_model( + request = _load_make_request_model()( out_path=args.output, ops=ops, sheet=args.sheet, @@ -350,7 +411,9 @@ def _run_make_command(args: argparse.Namespace) -> int: backend=args.backend, ) result = cast(Any, make_workbook(request)) - except (OSError, RuntimeError, ValidationError, ValueError) as exc: + except Exception as exc: + if not _is_cli_runtime_error(exc): + raise _print_error(exc) return 1 @@ -361,11 +424,7 @@ def _run_make_command(args: argparse.Namespace) -> int: def _run_ops_list_command(args: argparse.Namespace) -> int: """Execute the ops list subcommand.""" - list_patch_op_schemas = cast( - Callable[[], list[object]], - _load_symbol("exstruct.edit.op_schema", "list_patch_op_schemas"), - ) - schemas = cast(Any, list_patch_op_schemas()) + schemas = cast(Any, _load_list_patch_op_schemas()()) payload = { "ops": [ {"op": schema.op, "description": schema.description} for schema in schemas @@ -378,11 +437,7 @@ def _run_ops_list_command(args: argparse.Namespace) -> int: def _run_ops_describe_command(args: argparse.Namespace) -> int: """Execute the ops describe subcommand.""" - get_patch_op_schema = cast( - Callable[[str], object | None], - _load_symbol("exstruct.edit.op_schema", "get_patch_op_schema"), - ) - schema = cast(Any, get_patch_op_schema(args.op)) + schema = cast(Any, _load_get_patch_op_schema()(args.op)) if schema is None: _print_error(ValueError(f"Unknown patch operation: {args.op}")) return 1 @@ -394,18 +449,13 @@ def _run_validate_command(args: argparse.Namespace) -> int: """Execute the validate subcommand.""" try: - validate_input_request_model = cast( - Callable[..., object], - _load_symbol( - "exstruct.mcp.validate_input", - "ValidateInputRequest", - ), - ) result = cast( Any, - validate_input(validate_input_request_model(xlsx_path=args.input)), + validate_input(_load_validate_input_request_model()(xlsx_path=args.input)), ) - except (OSError, ValidationError, ValueError) as exc: + except Exception as exc: + if not _is_cli_runtime_error(exc): + raise _print_error(exc) return 1 @@ -429,10 +479,7 @@ def _load_patch_ops(source: str | None, *, sheet: str | None = None) -> list[Any resolved_ops = resolved_payload.get("ops") if not isinstance(resolved_ops, list): raise ValueError("Resolved patch ops payload must contain an ops list.") - patch_op_model = cast( - Callable[..., object], - _load_symbol("exstruct.edit.models", "PatchOp"), - ) + patch_op_model = _load_patch_op_model() return [patch_op_model(**op_payload) for op_payload in resolved_ops] @@ -453,10 +500,8 @@ def _print_json_payload(payload: object, *, pretty: bool) -> None: """Serialize one JSON payload to stdout.""" serializable: object - if isinstance(payload, BaseModel): - serializable = payload.model_dump(mode="json") - else: - serializable = payload + model_dump = getattr(payload, "model_dump", None) + serializable = model_dump(mode="json") if callable(model_dump) else payload print( json.dumps( serializable, diff --git a/src/exstruct/cli/main.py b/src/exstruct/cli/main.py index 82510909..5fd7062d 100644 --- a/src/exstruct/cli/main.py +++ b/src/exstruct/cli/main.py @@ -17,6 +17,7 @@ RunEditCliFn = Callable[[list[str]], int] ComAvailabilityFn = Callable[[], "ComAvailability"] LibreOfficeValidatorFn = Callable[..., Path] +_EDIT_SUBCOMMAND_NAMES = frozenset({"patch", "make", "ops", "validate"}) def _load_process_excel() -> ProcessExcelFn: @@ -56,6 +57,10 @@ def process_excel(*args: object, **kwargs: object) -> None: def is_edit_subcommand(argv: list[str]) -> bool: """Compatibility wrapper that resolves the edit router lazily.""" + if not argv: + return False + if argv[0] not in _EDIT_SUBCOMMAND_NAMES: + return False return _load_is_edit_subcommand()(argv) diff --git a/src/exstruct/edit/__init__.py b/src/exstruct/edit/__init__.py index 3feba1c7..f26b5bc7 100644 --- a/src/exstruct/edit/__init__.py +++ b/src/exstruct/edit/__init__.py @@ -2,7 +2,7 @@ from __future__ import annotations -from importlib import import_module +from collections.abc import Callable from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -131,85 +131,142 @@ "schema_with_sheet_resolution_rules", ] -_LAZY_EXPORTS: dict[str, tuple[str, str]] = { - "AlignmentSnapshot": (".models", "AlignmentSnapshot"), - "BorderSideSnapshot": (".models", "BorderSideSnapshot"), - "BorderSnapshot": (".models", "BorderSnapshot"), - "CHART_TYPE_ALIASES": (".chart_types", "CHART_TYPE_ALIASES"), - "CHART_TYPE_TO_COM_ID": (".chart_types", "CHART_TYPE_TO_COM_ID"), - "ColumnDimensionSnapshot": (".models", "ColumnDimensionSnapshot"), - "DesignSnapshot": (".models", "DesignSnapshot"), - "FillSnapshot": (".models", "FillSnapshot"), - "FontSnapshot": (".models", "FontSnapshot"), - "FormulaIssue": (".models", "FormulaIssue"), - "FormulaIssueCode": (".types", "FormulaIssueCode"), - "FormulaIssueLevel": (".types", "FormulaIssueLevel"), - "HorizontalAlignType": (".types", "HorizontalAlignType"), - "MakeRequest": (".models", "MakeRequest"), - "MergeStateSnapshot": (".models", "MergeStateSnapshot"), - "OnConflictPolicy": (".types", "OnConflictPolicy"), - "OpenpyxlEngineResult": (".models", "OpenpyxlEngineResult"), - "OpenpyxlWorksheetProtocol": (".models", "OpenpyxlWorksheetProtocol"), - "PATCH_OP_SPECS": (".specs", "PATCH_OP_SPECS"), - "PatchBackend": (".types", "PatchBackend"), - "PatchDiffItem": (".models", "PatchDiffItem"), - "PatchEngine": (".types", "PatchEngine"), - "PatchErrorDetail": (".models", "PatchErrorDetail"), - "PatchOp": (".models", "PatchOp"), - "PatchOpError": (".errors", "PatchOpError"), - "PatchOpSchema": (".op_schema", "PatchOpSchema"), - "PatchOpSpec": (".specs", "PatchOpSpec"), - "PatchOpType": (".types", "PatchOpType"), - "PatchRequest": (".models", "PatchRequest"), - "PatchResult": (".models", "PatchResult"), - "PatchStatus": (".types", "PatchStatus"), - "PatchValue": (".models", "PatchValue"), - "PatchValueKind": (".types", "PatchValueKind"), - "RowDimensionSnapshot": (".models", "RowDimensionSnapshot"), - "SUPPORTED_CHART_TYPES": (".chart_types", "SUPPORTED_CHART_TYPES"), - "SUPPORTED_CHART_TYPES_CSV": (".chart_types", "SUPPORTED_CHART_TYPES_CSV"), - "SUPPORTED_CHART_TYPES_SET": (".chart_types", "SUPPORTED_CHART_TYPES_SET"), - "VerticalAlignType": (".types", "VerticalAlignType"), - "XlwingsRangeProtocol": (".models", "XlwingsRangeProtocol"), - "alias_to_canonical_with_conflict_check": ( - ".normalize", - "alias_to_canonical_with_conflict_check", +LazyExportLoader = Callable[[], object] + + +def _load_api_attr(name: str) -> object: + from . import api as api_module + + return getattr(api_module, name) + + +def _load_chart_type_attr(name: str) -> object: + from . import chart_types as chart_types_module + + return getattr(chart_types_module, name) + + +def _load_error_attr(name: str) -> object: + from . import errors as errors_module + + return getattr(errors_module, name) + + +def _load_model_attr(name: str) -> object: + from . import models as models_module + + return getattr(models_module, name) + + +def _load_normalize_attr(name: str) -> object: + from . import normalize as normalize_module + + return getattr(normalize_module, name) + + +def _load_op_schema_attr(name: str) -> object: + from . import op_schema as op_schema_module + + return getattr(op_schema_module, name) + + +def _load_specs_attr(name: str) -> object: + from . import specs as specs_module + + return getattr(specs_module, name) + + +def _load_type_attr(name: str) -> object: + from . import types as types_module + + return getattr(types_module, name) + + +_LAZY_EXPORTS: dict[str, LazyExportLoader] = { + "AlignmentSnapshot": lambda: _load_model_attr("AlignmentSnapshot"), + "BorderSideSnapshot": lambda: _load_model_attr("BorderSideSnapshot"), + "BorderSnapshot": lambda: _load_model_attr("BorderSnapshot"), + "CHART_TYPE_ALIASES": lambda: _load_chart_type_attr("CHART_TYPE_ALIASES"), + "CHART_TYPE_TO_COM_ID": lambda: _load_chart_type_attr("CHART_TYPE_TO_COM_ID"), + "ColumnDimensionSnapshot": lambda: _load_model_attr("ColumnDimensionSnapshot"), + "DesignSnapshot": lambda: _load_model_attr("DesignSnapshot"), + "FillSnapshot": lambda: _load_model_attr("FillSnapshot"), + "FontSnapshot": lambda: _load_model_attr("FontSnapshot"), + "FormulaIssue": lambda: _load_model_attr("FormulaIssue"), + "FormulaIssueCode": lambda: _load_type_attr("FormulaIssueCode"), + "FormulaIssueLevel": lambda: _load_type_attr("FormulaIssueLevel"), + "HorizontalAlignType": lambda: _load_type_attr("HorizontalAlignType"), + "MakeRequest": lambda: _load_model_attr("MakeRequest"), + "MergeStateSnapshot": lambda: _load_model_attr("MergeStateSnapshot"), + "OnConflictPolicy": lambda: _load_type_attr("OnConflictPolicy"), + "OpenpyxlEngineResult": lambda: _load_model_attr("OpenpyxlEngineResult"), + "OpenpyxlWorksheetProtocol": lambda: _load_model_attr("OpenpyxlWorksheetProtocol"), + "PATCH_OP_SPECS": lambda: _load_specs_attr("PATCH_OP_SPECS"), + "PatchBackend": lambda: _load_type_attr("PatchBackend"), + "PatchDiffItem": lambda: _load_model_attr("PatchDiffItem"), + "PatchEngine": lambda: _load_type_attr("PatchEngine"), + "PatchErrorDetail": lambda: _load_model_attr("PatchErrorDetail"), + "PatchOp": lambda: _load_model_attr("PatchOp"), + "PatchOpError": lambda: _load_error_attr("PatchOpError"), + "PatchOpSchema": lambda: _load_op_schema_attr("PatchOpSchema"), + "PatchOpSpec": lambda: _load_specs_attr("PatchOpSpec"), + "PatchOpType": lambda: _load_type_attr("PatchOpType"), + "PatchRequest": lambda: _load_model_attr("PatchRequest"), + "PatchResult": lambda: _load_model_attr("PatchResult"), + "PatchStatus": lambda: _load_type_attr("PatchStatus"), + "PatchValue": lambda: _load_model_attr("PatchValue"), + "PatchValueKind": lambda: _load_type_attr("PatchValueKind"), + "RowDimensionSnapshot": lambda: _load_model_attr("RowDimensionSnapshot"), + "SUPPORTED_CHART_TYPES": lambda: _load_chart_type_attr("SUPPORTED_CHART_TYPES"), + "SUPPORTED_CHART_TYPES_CSV": lambda: _load_chart_type_attr( + "SUPPORTED_CHART_TYPES_CSV" + ), + "SUPPORTED_CHART_TYPES_SET": lambda: _load_chart_type_attr( + "SUPPORTED_CHART_TYPES_SET" + ), + "VerticalAlignType": lambda: _load_type_attr("VerticalAlignType"), + "XlwingsRangeProtocol": lambda: _load_model_attr("XlwingsRangeProtocol"), + "alias_to_canonical_with_conflict_check": lambda: _load_normalize_attr( + "alias_to_canonical_with_conflict_check" + ), + "build_missing_sheet_message": lambda: _load_normalize_attr( + "build_missing_sheet_message" + ), + "build_patch_op_error_message": lambda: _load_normalize_attr( + "build_patch_op_error_message" + ), + "build_patch_tool_mini_schema": lambda: _load_op_schema_attr( + "build_patch_tool_mini_schema" + ), + "coerce_patch_ops": lambda: _load_normalize_attr("coerce_patch_ops"), + "get_alias_map_for_op": lambda: _load_specs_attr("get_alias_map_for_op"), + "get_patch_op_schema": lambda: _load_op_schema_attr("get_patch_op_schema"), + "list_patch_op_schemas": lambda: _load_op_schema_attr("list_patch_op_schemas"), + "make_workbook": lambda: _load_api_attr("make_workbook"), + "normalize_chart_type": lambda: _load_chart_type_attr("normalize_chart_type"), + "normalize_draw_grid_border_range": lambda: _load_normalize_attr( + "normalize_draw_grid_border_range" ), - "build_missing_sheet_message": (".normalize", "build_missing_sheet_message"), - "build_patch_op_error_message": ( - ".normalize", - "build_patch_op_error_message", + "normalize_patch_op_aliases": lambda: _load_normalize_attr( + "normalize_patch_op_aliases" ), - "build_patch_tool_mini_schema": (".op_schema", "build_patch_tool_mini_schema"), - "coerce_patch_ops": (".normalize", "coerce_patch_ops"), - "get_alias_map_for_op": (".specs", "get_alias_map_for_op"), - "get_patch_op_schema": (".op_schema", "get_patch_op_schema"), - "list_patch_op_schemas": (".op_schema", "list_patch_op_schemas"), - "make_workbook": (".api", "make_workbook"), - "normalize_chart_type": (".chart_types", "normalize_chart_type"), - "normalize_draw_grid_border_range": ( - ".normalize", - "normalize_draw_grid_border_range", + "normalize_top_level_sheet": lambda: _load_normalize_attr( + "normalize_top_level_sheet" ), - "normalize_patch_op_aliases": (".normalize", "normalize_patch_op_aliases"), - "normalize_top_level_sheet": (".normalize", "normalize_top_level_sheet"), - "parse_patch_op_json": (".normalize", "parse_patch_op_json"), - "patch_workbook": (".api", "patch_workbook"), - "resolve_chart_type_id": (".chart_types", "resolve_chart_type_id"), - "resolve_top_level_sheet_for_payload": ( - ".normalize", - "resolve_top_level_sheet_for_payload", + "parse_patch_op_json": lambda: _load_normalize_attr("parse_patch_op_json"), + "patch_workbook": lambda: _load_api_attr("patch_workbook"), + "resolve_chart_type_id": lambda: _load_chart_type_attr("resolve_chart_type_id"), + "resolve_top_level_sheet_for_payload": lambda: _load_normalize_attr( + "resolve_top_level_sheet_for_payload" ), - "schema_with_sheet_resolution_rules": ( - ".op_schema", - "schema_with_sheet_resolution_rules", + "schema_with_sheet_resolution_rules": lambda: _load_op_schema_attr( + "schema_with_sheet_resolution_rules" ), } def _resolve_lazy_export(name: str) -> object: - module_name, attr_name = _LAZY_EXPORTS[name] - value = getattr(import_module(module_name, __name__), attr_name) + value = _LAZY_EXPORTS[name]() globals()[name] = value return value diff --git a/tasks/feature_spec.md b/tasks/feature_spec.md index 66a34ea5..fd47463b 100644 --- a/tasks/feature_spec.md +++ b/tasks/feature_spec.md @@ -233,3 +233,40 @@ - `not-needed` - rationale: this is a startup-focused internal refactor that preserves existing CLI/API contracts and backend policy. The durable guidance belongs in architecture notes rather than a new policy ADR. + +## 2026-03-20 issue #108 review and Codacy follow-up + +### Goal + +- Resolve the 3 Codacy `non-literal-import` findings on PR `#112` without regressing the lazy-import startup work. +- Address the substantive PR review comments about runtime annotation introspection and unnecessary eager imports on lightweight CLI paths. +- Keep the public CLI and Python export surface unchanged while tightening the internal implementation. + +### Public contract + +- `typing.get_type_hints(exstruct.extract)` and the other public convenience helpers in `src/exstruct/__init__.py` must keep resolving runtime-visible exported model types after the lazy-import refactor. +- `exstruct --help` and extraction-style argv that are clearly not edit subcommands must not import `exstruct.cli.edit`. +- Importing `exstruct.cli.edit` for routing/help-only purposes must not eagerly import `pydantic`. +- Public exports from `exstruct` and `exstruct.edit` remain unchanged; only the internal lazy-loader structure changes to satisfy static analysis. + +### Constraints + +- Do not undo the startup optimization by eagerly importing `exstruct.models`, `exstruct.edit.models`, or `pydantic` at module import time. +- Replace generic non-literal `import_module()` helpers with explicit literal import paths or literal loader functions so Codacy/Semgrep no longer flags them. +- Keep the existing monkeypatch-compatible wrappers in `src/exstruct/cli/main.py` and `src/exstruct/cli/edit.py`. + +### Verification plan + +- `tests/cli/test_cli_lazy_imports.py` + - `import exstruct.cli.edit` does not eagerly load `pydantic` + - `main(["--help"])` does not import `exstruct.cli.edit` + - `typing.get_type_hints(exstruct.extract)` resolves `WorkbookData` successfully +- `tests/cli/test_edit_cli.py` + - existing edit CLI behavior still passes with the new explicit loaders +- `uv run pytest tests/cli/test_cli_lazy_imports.py tests/cli/test_edit_cli.py tests/cli/test_cli.py -q` +- `uv run task precommit-run` + +### ADR verdict + +- `not-needed` +- rationale: this is a follow-up implementation hardening and static-analysis cleanup under the existing issue `#108` design, not a new policy decision. diff --git a/tasks/todo.md b/tasks/todo.md index 2aee8c17..a18d57fd 100644 --- a/tasks/todo.md +++ b/tasks/todo.md @@ -152,3 +152,37 @@ - `uv run pytest tests/cli/test_cli.py tests/cli/test_edit_cli.py tests/cli/test_cli_lazy_imports.py tests/edit/test_architecture.py -q` - `uv run task precommit-run` - manual `-X importtime` sanity probe for `-m exstruct.cli.main --help` and `-m exstruct.cli.main ops list` + +## 2026-03-20 issue #108 review and Codacy follow-up + +### Planning + +- [x] Retrieve PR `#112` Codacy findings and review comments with `scripts/codacy_issues.py` and `gh`. +- [x] Classify which findings are substantive and confirm the current implementation gaps locally. +- [x] Add the follow-up spec and task record to `tasks/feature_spec.md` and `tasks/todo.md`. +- [x] Replace the generic lazy-import helpers in `src/exstruct/__init__.py`, `src/exstruct/edit/__init__.py`, and `src/exstruct/cli/edit.py` with explicit literal loaders. +- [x] Restore runtime-resolvable type hints for public helpers in `src/exstruct/__init__.py` without eagerly importing `exstruct.models`. +- [x] Add a fast path in `src/exstruct/cli/main.py` so non-edit argv does not import `exstruct.cli.edit`. +- [x] Remove the top-level `pydantic` import from `src/exstruct/cli/edit.py`. +- [x] Add or update regression tests for startup import boundaries and runtime type hints. +- [x] Run targeted pytest for CLI follow-up coverage. +- [x] Run `uv run task precommit-run`. +- [x] Update this Review section with the final verification result and retention decision. + +### Review + +- Codacy's three `non-literal-import` findings were not exploitable security bugs in practice, because the module targets were fixed by code rather than user input. Even so, the finding was operationally valid for CI, so the generic loaders were replaced with explicit literal loader functions in `src/exstruct/__init__.py`, `src/exstruct/edit/__init__.py`, and `src/exstruct/cli/edit.py`. +- The PR review about runtime type hints was valid. `typing.get_type_hints(exstruct.extract)` regressed with `NameError` after the lazy-import refactor, so `src/exstruct/__init__.py` now patches the affected public helper annotations to resolve exported model types through `_lazy_type(...)` only when runtime introspection asks for them. +- The PR review about `cli.main` routing was valid. `src/exstruct/cli/main.py` now fast-fails obvious non-edit argv before importing `exstruct.cli.edit`, so `exstruct --help` and extraction-style argv no longer pay the edit-module import cost. +- The PR review about `pydantic` eager import in `src/exstruct/cli/edit.py` was valid for routing/help-only paths. The module now defers `pydantic` loading until an actual validation-error check happens and serializes JSON payloads via `model_dump` duck typing. +- `tests/cli/test_cli_lazy_imports.py` now locks the new boundaries: `import exstruct.cli.edit` keeps `pydantic` unloaded, `main(["--help"])` keeps `exstruct.cli.edit` unloaded, and `typing.get_type_hints(exstruct.extract)` resolves `WorkbookData` successfully. +- Retention decision: + - No new ADR or permanent spec migration was needed. This follow-up only hardens the existing issue `#108` implementation and review expectations under the already-recorded lightweight-startup rule in `dev-docs/architecture/overview.md`. + - The temporary working notes for this follow-up can remain limited to this section in `tasks/feature_spec.md` and `tasks/todo.md`. +- Verification: + - `python scripts/codacy_issues.py --pr 112 --min-level Error` + - `gh pr view 112 --json number,title,reviewDecision,reviews,comments,files,url,headRefName,baseRefName` + - `gh api repos/harumiWeb/exstruct/pulls/112/comments` + - `uv run pytest tests/cli/test_cli_lazy_imports.py tests/cli/test_edit_cli.py tests/cli/test_cli.py -q` + - `uv run task precommit-run` + - manual `uv run python` probes for `typing.get_type_hints(exstruct.extract)` and `main(["--help"])` import boundaries diff --git a/tests/cli/test_cli_lazy_imports.py b/tests/cli/test_cli_lazy_imports.py index 267a1749..e3058615 100644 --- a/tests/cli/test_cli_lazy_imports.py +++ b/tests/cli/test_cli_lazy_imports.py @@ -76,6 +76,7 @@ def test_import_cli_edit_does_not_load_mcp_or_edit_execution_modules() -> None: import sys import exstruct.cli.edit print(json.dumps({ + "pydantic": "pydantic" in sys.modules, "mcp": "exstruct.mcp" in sys.modules, "extract_runner": "exstruct.mcp.extract_runner" in sys.modules, "edit_api": "exstruct.edit.api" in sys.modules, @@ -85,6 +86,7 @@ def test_import_cli_edit_does_not_load_mcp_or_edit_execution_modules() -> None: ) assert payload == { + "pydantic": False, "mcp": False, "extract_runner": False, "edit_api": False, @@ -92,7 +94,37 @@ def test_import_cli_edit_does_not_load_mcp_or_edit_execution_modules() -> None: } -def test_help_and_ops_list_keep_lightweight_import_boundaries() -> None: +def test_public_type_hints_resolve_lazy_exports_at_runtime() -> None: + payload = _run_probe( + """ +import json +import sys +import typing + +import exstruct + +before = { + "models": "exstruct.models" in sys.modules, + "pydantic": "pydantic" in sys.modules, +} +hints = typing.get_type_hints(exstruct.extract) + +print(json.dumps({ + "before": before, + "return_module": hints["return"].__module__, + "return_name": hints["return"].__name__, + "after_models": "exstruct.models" in sys.modules, +})) +""" + ) + + assert payload["before"] == {"models": False, "pydantic": False} + assert payload["return_module"] == "exstruct.models" + assert payload["return_name"] == "WorkbookData" + assert payload["after_models"] is True + + +def test_help_path_keeps_lightweight_import_boundaries() -> None: payload = _run_probe( """ import io @@ -111,6 +143,33 @@ def test_help_and_ops_list_keep_lightweight_import_boundaries() -> None: help_exit = exc.code help_text = stdout_buffer.getvalue() +print(json.dumps({ + "help_exit": help_exit, + "help_has_option": "--auto-page-breaks-dir" in help_text, + "cli_edit": "exstruct.cli.edit" in sys.modules, + "mcp": "exstruct.mcp" in sys.modules, + "core_integrate": "exstruct.core.integrate" in sys.modules, +})) +""" + ) + + assert payload["help_exit"] == 0 + assert payload["help_has_option"] is True + assert payload["cli_edit"] is False + assert payload["mcp"] is False + assert payload["core_integrate"] is False + + +def test_ops_list_keeps_mcp_and_extraction_lightweight() -> None: + payload = _run_probe( + """ +import io +import json +import sys +from contextlib import redirect_stderr, redirect_stdout + +from exstruct.cli.main import main + stdout_buffer = io.StringIO() stderr_buffer = io.StringIO() with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer): @@ -118,10 +177,9 @@ def test_help_and_ops_list_keep_lightweight_import_boundaries() -> None: ops_payload = json.loads(stdout_buffer.getvalue()) print(json.dumps({ - "help_exit": help_exit, - "help_has_option": "--auto-page-breaks-dir" in help_text, "ops_exit": ops_exit, "ops_has_set_value": any(item["op"] == "set_value" for item in ops_payload["ops"]), + "cli_edit": "exstruct.cli.edit" in sys.modules, "mcp": "exstruct.mcp" in sys.modules, "extract_runner": "exstruct.mcp.extract_runner" in sys.modules, "core_integrate": "exstruct.core.integrate" in sys.modules, @@ -129,10 +187,9 @@ def test_help_and_ops_list_keep_lightweight_import_boundaries() -> None: """ ) - assert payload["help_exit"] == 0 - assert payload["help_has_option"] is True assert payload["ops_exit"] == 0 assert payload["ops_has_set_value"] is True + assert payload["cli_edit"] is True assert payload["mcp"] is False assert payload["extract_runner"] is False assert payload["core_integrate"] is False From 369587cd6a5dafc6c3f242e8f2a5d6b16efa9f6f Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Sat, 21 Mar 2026 16:53:40 +0900 Subject: [PATCH 3/3] Restore validate CLI error boundary for #108 --- src/exstruct/cli/edit.py | 6 +++++- tasks/feature_spec.md | 32 ++++++++++++++++++++++++++++++++ tasks/todo.md | 29 +++++++++++++++++++++++++++++ tests/cli/test_edit_cli.py | 15 +++++++++++++++ 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/src/exstruct/cli/edit.py b/src/exstruct/cli/edit.py index dca21ac8..9d60f6c1 100644 --- a/src/exstruct/cli/edit.py +++ b/src/exstruct/cli/edit.py @@ -132,6 +132,10 @@ def _is_cli_runtime_error(exc: Exception) -> bool: ) +def _is_validate_cli_error(exc: Exception) -> bool: + return isinstance(exc, OSError | ValueError) or _is_validation_error(exc) + + def patch_workbook(request: object) -> object: """Compatibility wrapper that resolves patch execution lazily.""" @@ -454,7 +458,7 @@ def _run_validate_command(args: argparse.Namespace) -> int: validate_input(_load_validate_input_request_model()(xlsx_path=args.input)), ) except Exception as exc: - if not _is_cli_runtime_error(exc): + if not _is_validate_cli_error(exc): raise _print_error(exc) return 1 diff --git a/tasks/feature_spec.md b/tasks/feature_spec.md index fd47463b..306b3752 100644 --- a/tasks/feature_spec.md +++ b/tasks/feature_spec.md @@ -270,3 +270,35 @@ - `not-needed` - rationale: this is a follow-up implementation hardening and static-analysis cleanup under the existing issue `#108` design, not a new policy decision. + +## 2026-03-21 issue #108 review follow-up: validate runtime error scope + +### Goal + +- Restore the original `validate` subcommand exception boundary after the lazy-loader refactor in `src/exstruct/cli/edit.py`. +- Keep the patch/make commands catching `RuntimeError` while ensuring `validate` does not silently absorb it. + +### Public contract + +- `patch` and `make` continue to convert backend/runtime failures in `(OSError, RuntimeError, ValidationError, ValueError)` into `Error: ...` stderr output with exit code `1`. +- `validate` keeps its narrower historical contract and only converts `(OSError, ValidationError, ValueError)` into CLI error output. +- If `validate_input(...)` raises `RuntimeError`, the exception must still propagate rather than being turned into a handled CLI error. + +### Constraints + +- Do not broaden this follow-up into another startup optimization pass. +- Keep the current lazy import boundary for `pydantic` and validation helpers intact. +- Do not change the behavior of `patch` and `make` while narrowing `validate`. + +### Verification plan + +- `tests/cli/test_edit_cli.py` + - `validate` still returns handled CLI errors for `OSError` + - `validate` propagates `RuntimeError` +- `uv run pytest tests/cli/test_edit_cli.py -q` +- `uv run task precommit-run` + +### ADR verdict + +- `not-needed` +- rationale: this is a narrow behavior-restoration follow-up inside the existing edit CLI contract, not a new design decision. diff --git a/tasks/todo.md b/tasks/todo.md index a18d57fd..a073707a 100644 --- a/tasks/todo.md +++ b/tasks/todo.md @@ -186,3 +186,32 @@ - `uv run pytest tests/cli/test_cli_lazy_imports.py tests/cli/test_edit_cli.py tests/cli/test_cli.py -q` - `uv run task precommit-run` - manual `uv run python` probes for `typing.get_type_hints(exstruct.extract)` and `main(["--help"])` import boundaries + +## 2026-03-21 issue #108 review follow-up: validate runtime error scope + +### Planning + +- [x] Retrieve the new PR `#112` review comments and classify which ones are substantively valid. +- [x] Confirm locally whether `isinstance(exc, OSError | RuntimeError | ValueError)` is actually invalid on the supported Python runtime. +- [x] Add the working spec and task record for this follow-up. +- [x] Narrow `validate` exception handling in `src/exstruct/cli/edit.py` back to the original `(OSError, ValidationError, ValueError)` scope. +- [x] Add a regression test that proves `validate` still propagates `RuntimeError`. +- [x] Run targeted pytest for `tests/cli/test_edit_cli.py`. +- [x] Run `uv run task precommit-run`. +- [x] Update this Review section with the final verification result and retention decision. + +### Review + +- The new Devin review finding was valid: the shared `_is_cli_runtime_error(...)` helper widened `_run_validate_command(...)` to catch `RuntimeError`, which changed the historical validate-subcommand contract. +- The new Copilot review finding was not valid on the supported runtime. A direct `uv run python` probe confirmed that `isinstance(OSError(), OSError | RuntimeError | ValueError)` evaluates successfully on Python `3.11`, so no change was made for that comment. +- `src/exstruct/cli/edit.py` now uses a separate `_is_validate_cli_error(...)` helper so `patch` / `make` still catch `RuntimeError` while `validate` only catches `(OSError, ValidationError, ValueError)` as before. +- `tests/cli/test_edit_cli.py` now includes a regression test proving that `validate` propagates `RuntimeError` instead of converting it to handled CLI stderr output. +- Retention decision: + - No new ADR or permanent spec migration was needed. This follow-up only restores the pre-existing validate CLI error boundary inside the current edit CLI design. + - The temporary working notes for this review follow-up can remain limited to this section in `tasks/feature_spec.md` and `tasks/todo.md`. +- Verification: + - `gh api repos/harumiWeb/exstruct/pulls/112/comments` + - `gh api graphql -f query='query { repository(owner:"harumiWeb", name:"exstruct") { pullRequest(number: 112) { reviewThreads(first: 30) { nodes { id isResolved isOutdated comments(first: 20) { nodes { id author { login } body path url createdAt } } } } } } }'` + - `uv run python` probe for `isinstance(OSError(), OSError | RuntimeError | ValueError)` + - `uv run pytest tests/cli/test_edit_cli.py -q` + - `uv run task precommit-run` diff --git a/tests/cli/test_edit_cli.py b/tests/cli/test_edit_cli.py index 1fed5fc4..218d6e33 100644 --- a/tests/cli/test_edit_cli.py +++ b/tests/cli/test_edit_cli.py @@ -427,6 +427,21 @@ def _raise_os_error(_request: object) -> object: assert "Error: boom" in result.stderr +def test_validate_cli_propagates_runtime_error( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + path = tmp_path / "input.xlsx" + path.write_bytes(b"x") + + def _raise_runtime_error(_request: object) -> object: + raise RuntimeError("boom") + + monkeypatch.setattr(edit_cli_module, "validate_input", _raise_runtime_error) + + with pytest.raises(RuntimeError, match="boom"): + _run_cli(["validate", "--input", str(path)]) + + def test_extraction_help_mentions_editing_commands() -> None: help_text = build_parser().format_help()