From c3b49f49678ea1ffc485091b1002eccc9073f943 Mon Sep 17 00:00:00 2001 From: harumiWeb Date: Sat, 21 Mar 2026 18:26:42 +0900 Subject: [PATCH] Release v0.7.1 --- CHANGELOG.md | 18 +++ docs/release-notes/v0.7.1.md | 43 ++++++ mkdocs.yml | 1 + pyproject.toml | 2 +- tasks/feature_spec.md | 268 +++++------------------------------ tasks/todo.md | 194 +++---------------------- uv.lock | 2 +- 7 files changed, 113 insertions(+), 415 deletions(-) create mode 100644 docs/release-notes/v0.7.1.md diff --git a/CHANGELOG.md b/CHANGELOG.md index b43e08c..7e20b8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,24 @@ All notable changes to this project are documented in this file. This changelog ## [Unreleased] +## [0.7.1] - 2026-03-21 + +### Added + +- Added regression coverage for extraction CLI runtime validation and lightweight import boundaries across `exstruct`, `exstruct.engine`, `exstruct.cli.main`, and `exstruct.cli.edit`. + +### Changed + +- Changed the extraction CLI so `--auto-page-breaks-dir` is always listed in help output and validated only when the flag is requested at runtime. +- Changed CLI and package import behavior so `exstruct --help`, `exstruct ops list`, `import exstruct`, and `import exstruct.engine` defer heavy extraction, edit, and rendering imports until needed. + +### Fixed + +- Fixed parser and help startup side effects by removing COM availability probing during extraction CLI parser construction. +- Fixed lazy-export follow-ups so public runtime type hints resolve correctly while keeping exported symbol names stable. +- Fixed edit CLI routing so non-edit argv and lightweight edit paths avoid unnecessary imports such as `exstruct.cli.edit` and `pydantic`. +- Fixed the `validate` subcommand error boundary so `RuntimeError` is no longer converted into handled CLI stderr output. + ## [0.7.0] - 2026-03-19 ### Added diff --git a/docs/release-notes/v0.7.1.md b/docs/release-notes/v0.7.1.md new file mode 100644 index 0000000..7f58a9c --- /dev/null +++ b/docs/release-notes/v0.7.1.md @@ -0,0 +1,43 @@ +# v0.7.1 Release Notes + +This patch release publishes the CLI and package import startup optimization work +completed under issues `#107`, `#108`, and `#109`. + +## Highlights + +- The extraction CLI now always shows `--auto-page-breaks-dir` in help output + and validates support only when the option is actually used at runtime. +- Lightweight CLI paths now avoid unnecessary heavy imports: + - `exstruct --help` + - extraction-style argv that do not route to edit commands + - `exstruct ops list` +- Public package imports are lighter: + - `import exstruct` + - `import exstruct.engine` + - exported names from `exstruct` and `exstruct.edit` remain compatible while + loading implementation modules lazily +- Regression coverage now locks the startup boundary and validates that + lightweight import paths do not eagerly load extraction, edit, MCP, render, + or `pydantic` dependencies. + +## Startup impact + +Compared with `v0.7.0`, local median startup timings on the same Python +environment improved by approximately: + +- `python -m exstruct.cli.main --help`: `2195 ms -> 64 ms` (`34.4x` faster) +- `import exstruct`: `1039 ms -> 55 ms` (`19.0x` faster) +- `python -m exstruct.cli.main ops list`: `1143 ms -> 207 ms` (`5.5x` faster) +- `import exstruct.engine`: `1005 ms -> 223 ms` (`4.5x` faster) + +These measurements were taken locally against `v0.7.0` and the current +`v0.7.1` code using the same virtual environment and direct source imports, so +exact numbers will vary by machine and startup conditions. + +## Notes + +- No new CLI commands were added in `v0.7.1`. +- MCP tool names and payload shapes remain compatible in `v0.7.1`. +- Backend selection policy remains `auto` / `com` / `openpyxl`. +- The edit CLI `validate` subcommand once again propagates `RuntimeError` + instead of converting it into handled CLI error output. diff --git a/mkdocs.yml b/mkdocs.yml index a153829..28f239d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -28,6 +28,7 @@ nav: - MCP Server: mcp.md - Concept / Why ExStruct?: concept.md - Release Notes: + - v0.7.1: release-notes/v0.7.1.md - v0.7.0: release-notes/v0.7.0.md - v0.6.1: release-notes/v0.6.1.md - v0.6.0: release-notes/v0.6.0.md diff --git a/pyproject.toml b/pyproject.toml index f4d1184..dd43697 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "exstruct" -version = "0.7.0" +version = "0.7.1" description = "Excel to structured JSON (tables, shapes, charts) for LLM/RAG pipelines" readme = "README.md" license = { file = "LICENSE" } diff --git a/tasks/feature_spec.md b/tasks/feature_spec.md index 306b375..39a7426 100644 --- a/tasks/feature_spec.md +++ b/tasks/feature_spec.md @@ -43,262 +43,58 @@ - `not-needed` - rationale: this was release preparation and task-log retention cleanup. The policy decisions already live in `ADR-0006`, `ADR-0007`, and the editing specs. -## 2026-03-20 issue #107 extraction CLI startup optimization +## 2026-03-21 v0.7.1 release closeout ### Goal -- Stop probing Excel COM availability while building the extraction CLI parser. -- Keep `--auto-page-breaks-dir` visible in help output on every host. -- Validate `--auto-page-breaks-dir` only when the user actually requests it at execution time. -- Return an explicit CLI error when auto page-break export is requested from an unsupported mode or unsupported runtime. +- Publish the `v0.7.1` release-prep artifacts for the CLI and package import startup optimization work delivered through issues `#107`, `#108`, and `#109`. +- Collapse the temporary issue and review logs for `#107` and `#108` after confirming that the durable contract and design rationale already live in permanent documentation. +- Keep a compact closeout record that states where permanent information now lives and how the release-prep work was verified. -### Public contract +### Public contract summary -- `build_parser()` and `exstruct --help` must not call the COM availability probe. -- `--auto-page-breaks-dir` is always listed in extraction CLI help output. -- `--auto-page-breaks-dir` runtime behavior: - - `mode="libreoffice"` keeps the existing `ConfigError` path and combined-error precedence. - - `mode="light"` is rejected explicitly by the CLI with a message that auto page-break export requires `standard` or `verbose` with Excel COM. - - `mode="standard"` / `mode="verbose"` trigger COM availability probing only when the flag is present. - - When COM is unavailable, the CLI exits non-zero and prints an actionable message that names the flag and includes the availability reason when present. -- Existing `--pdf` / `--image` runtime behavior is unchanged in this task. +- `--auto-page-breaks-dir` is always listed in extraction CLI help output and validated only when the flag is requested at runtime. +- `exstruct --help`, `exstruct ops list`, non-edit CLI routing, `import exstruct`, and `import exstruct.engine` now defer heavy imports until execution actually needs them. +- Public exported symbol names from `exstruct` and `exstruct.edit` remain stable; only import timing changed. +- The edit CLI `validate` subcommand keeps its narrow historical error boundary and must still propagate `RuntimeError`. +- No new CLI commands, MCP payload shapes, or backend-selection policy changes are introduced in this closeout. ### Permanent destinations -- `dev-docs/adr/` - - `ADR-0008` records the extraction CLI policy change: runtime capability validation instead of parser-time environment probing. +- `CHANGELOG.md` + - Holds the `0.7.1` `Added` / `Changed` / `Fixed` summary for the public release. - `docs/` - - `docs/cli.md` becomes the canonical public CLI contract for always-visible `--auto-page-breaks-dir` and runtime validation wording. + - `docs/release-notes/v0.7.1.md` records the user-facing release narrative for issues `#107`, `#108`, and `#109`. + - `mkdocs.yml` keeps the canonical `Release Notes` navigation entry for `v0.7.1`. + - `docs/cli.md` remains the canonical public CLI contract for extraction help/runtime validation behavior. - `README.md` and `README.ja.md` - - Update quick-start examples and current-behavior prose so they no longer claim the flag is hidden on unsupported hosts. + - Retain the public-facing wording for extraction runtime validation and CLI behavior that shipped with issue `#107`. - `dev-docs/specs/` - - `dev-docs/specs/excel-extraction.md` should keep the internal guarantee that auto page-break extraction is COM-only and tied to runtime validation. -- `tasks/feature_spec.md` and `tasks/todo.md` - - Retain only the temporary working record, verification, and migration notes for this issue. - -### Constraints - -- Do not broaden the task into a Python API or MCP contract change. -- Do not rewrite historical release notes that describe the old behavior at the time they shipped. -- Preserve existing file-not-found behavior and existing `libreoffice` combined-error precedence. - -### Verification plan - -- `tests/cli/test_cli.py` - - Help output always includes `--auto-page-breaks-dir`. - - Parser/help generation does not call `get_com_availability()`. - - `mode="light"` + `--auto-page-breaks-dir` fails with a clear runtime error. - - `mode="standard"` / `mode="verbose"` + `--auto-page-breaks-dir` + unavailable COM fails with a clear runtime error. - - Existing `libreoffice` rejection tests still pass. -- Targeted pytest for the CLI extraction test module. -- `uv run task build-docs` -- `uv run task precommit-run` - -### ADR verdict - -- `required` -- rationale: this changes the public extraction CLI contract for help visibility and execution-time validation of a COM-only flag, and it sets a reusable policy for future capability-gated CLI features. - -## 2026-03-20 issue #107 review follow-up: libreoffice auto page-break fast-fail - -### Goal - -- Align `mode="libreoffice"` handling for `--auto-page-breaks-dir` with the new CLI-side fast-fail policy. -- Preserve the existing LibreOffice single-error and combined-error precedence from the shared validator. -- Prove that the CLI rejects the invalid request before `process_excel()` runs. - -### Public contract - -- `--mode libreoffice --auto-page-breaks-dir ...` fails in the CLI layer without calling `process_excel()`. -- When `--pdf` or `--image` is also present, the CLI keeps the existing combined LibreOffice error message precedence. -- This follow-up does not change the already documented `standard` / `verbose` COM-runtime validation policy. - -### Permanent destinations - -- No new permanent destination is required beyond the documents already updated for issue `#107`. -- The durable contract remains in `docs/cli.md`, `README.md`, `README.ja.md`, `dev-docs/specs/excel-extraction.md`, and `dev-docs/adr/ADR-0008-extraction-cli-runtime-capability-validation.md`. - -### Constraints - -- Reuse the existing LibreOffice validator instead of duplicating message composition in the CLI. -- Keep parser/help no-probe behavior unchanged. - -### Verification plan - -- `tests/cli/test_cli.py` - - `mode="libreoffice"` + `--auto-page-breaks-dir` rejects before `process_excel()`. - - `mode="libreoffice"` + rendering + `--auto-page-breaks-dir` keeps the combined error and also rejects before `process_excel()`. -- Targeted pytest for `tests/cli/test_cli.py`. - -### ADR verdict - -- `not-needed` -- rationale: this is a corrective follow-up that aligns implementation with the already-recorded `ADR-0008` policy rather than creating a new architectural decision. - -## 2026-03-20 issue #107 review follow-up: wording and help-text clarity - -### Goal - -- Resolve the PR review wording nits in tracked documentation. -- Make the extraction CLI help text for `--auto-page-breaks-dir` match the runtime contract already documented elsewhere. - -### Public contract - -- The help text for `--auto-page-breaks-dir` states that it writes one file per auto page-break area, follows `--format`, and requires `--mode standard` or `--mode verbose` with Excel COM. -- This follow-up does not change runtime behavior; it only tightens wording and help-text clarity. - -### Permanent destinations - -- No new permanent destination is required. -- The durable wording lives in `src/exstruct/cli/main.py`, `dev-docs/adr/ADR-0008-extraction-cli-runtime-capability-validation.md`, and the existing issue `#107` task notes. - -### Constraints - -- Keep the change limited to wording/help-text clarity; do not change runtime validation or expand scope beyond the reviewed lines. - -### Verification plan - -- `tests/cli/test_cli.py` - - Help output still includes `--auto-page-breaks-dir`. - - Help output includes the clarified runtime-contract wording. -- `uv run pytest tests/cli/test_cli.py -q` - -### ADR verdict - -- `not-needed` -- rationale: this is a wording-only follow-up under the existing `ADR-0008` decision. - -## 2026-03-20 issue #108 CLI startup lazy import optimization - -### Goal - -- Reduce startup import cost for lightweight CLI paths such as `exstruct --help` and `exstruct ops list`. -- Keep the existing extraction and editing CLI contracts unchanged while delaying heavy implementation imports until routing is known. -- Preserve the current module-level monkeypatch surfaces used by the CLI test suite. - -### Public contract - -- `exstruct --help` and parser construction keep the current CLI syntax, help text, and exit behavior. -- `exstruct ops list` and `exstruct ops describe` keep their current output shape and exit behavior. -- Extraction invocations still call `process_excel(...)` with the same arguments and keep current file-not-found and auto page-break validation behavior. -- Public Python symbol names exported from `exstruct` and `exstruct.edit` remain unchanged; only their import timing changes. - -### Internal implementation guarantees - -- `src/exstruct/__init__.py` must not eagerly import extraction engine, IO, render, or model modules during package import; convenience functions may import those dependencies inside function bodies. -- `src/exstruct/edit/__init__.py` must not eagerly import editing service/runtime/model modules during package import; exported names should resolve lazily. -- `src/exstruct/cli/main.py` must route edit vs extraction commands before importing edit/extraction implementations. -- `src/exstruct/cli/edit.py` must not import `exstruct.mcp.validate_input` or editing execution helpers at module import time; command handlers load only the functionality they need. -- Existing CLI module patch points (`process_excel`, `get_com_availability`, `is_edit_subcommand`, `run_edit_cli`, `patch_workbook`, `make_workbook`, `resolve_top_level_sheet_for_payload`, `validate_input`) remain present as thin wrappers. - -### Scope and non-goals - -- In scope: - - `src/exstruct/__init__.py` - - `src/exstruct/edit/__init__.py` - - `src/exstruct/cli/main.py` - - `src/exstruct/cli/edit.py` - - targeted tests and one architecture note -- Out of scope: - - changing CLI syntax, help wording, or JSON contracts - - changing backend selection policy - - optimizing `exstruct validate` startup beyond removing it from the `ops` path - - refactoring `src/exstruct/mcp/__init__.py` unless required by failing tests on the `ops` path - -### Permanent destinations - + - `dev-docs/specs/excel-extraction.md` remains the canonical internal guarantee for extraction CLI runtime validation. - `dev-docs/architecture/overview.md` - - Records that package `__init__` files and lightweight CLI startup paths must remain side-effect-free and defer heavy imports. -- `tasks/feature_spec.md` and `tasks/todo.md` - - Keep the temporary implementation/verification record for this issue. + - Records the durable lightweight-startup rule for package `__init__` files, CLI routing, and `exstruct.engine`. - `dev-docs/adr/` - - No new ADR is planned; this issue changes import timing only and does not alter the public contract or policy. - -### Verification plan - -- `tests/cli/test_cli.py` - - help and extraction routing still behave the same - - lightweight startup paths do not eagerly load edit/extraction implementation modules -- `tests/cli/test_edit_cli.py` - - `ops list` / `ops describe` do not depend on extraction import paths - - existing monkeypatch-based tests still pass -- `tests/edit/test_architecture.py` or a focused startup test module - - `import exstruct` does not eagerly load extraction engine modules - - `import exstruct.cli.edit` does not eagerly load `exstruct.mcp` / `exstruct.mcp.extract_runner` -- `uv run pytest tests/cli/test_cli.py tests/cli/test_edit_cli.py tests/edit/test_architecture.py -q` -- `uv run task precommit-run` -- manual importtime sanity checks for `--help` and `ops list` - -### ADR verdict - -- `not-needed` -- rationale: this is a startup-focused internal refactor that preserves existing CLI/API contracts and backend policy. The durable guidance belongs in architecture notes rather than a new policy ADR. - -## 2026-03-20 issue #108 review and Codacy follow-up - -### Goal - -- Resolve the 3 Codacy `non-literal-import` findings on PR `#112` without regressing the lazy-import startup work. -- Address the substantive PR review comments about runtime annotation introspection and unnecessary eager imports on lightweight CLI paths. -- Keep the public CLI and Python export surface unchanged while tightening the internal implementation. - -### Public contract - -- `typing.get_type_hints(exstruct.extract)` and the other public convenience helpers in `src/exstruct/__init__.py` must keep resolving runtime-visible exported model types after the lazy-import refactor. -- `exstruct --help` and extraction-style argv that are clearly not edit subcommands must not import `exstruct.cli.edit`. -- Importing `exstruct.cli.edit` for routing/help-only purposes must not eagerly import `pydantic`. -- Public exports from `exstruct` and `exstruct.edit` remain unchanged; only the internal lazy-loader structure changes to satisfy static analysis. - -### Constraints - -- Do not undo the startup optimization by eagerly importing `exstruct.models`, `exstruct.edit.models`, or `pydantic` at module import time. -- Replace generic non-literal `import_module()` helpers with explicit literal import paths or literal loader functions so Codacy/Semgrep no longer flags them. -- Keep the existing monkeypatch-compatible wrappers in `src/exstruct/cli/main.py` and `src/exstruct/cli/edit.py`. - -### Verification plan - -- `tests/cli/test_cli_lazy_imports.py` - - `import exstruct.cli.edit` does not eagerly load `pydantic` - - `main(["--help"])` does not import `exstruct.cli.edit` - - `typing.get_type_hints(exstruct.extract)` resolves `WorkbookData` successfully -- `tests/cli/test_edit_cli.py` - - existing edit CLI behavior still passes with the new explicit loaders -- `uv run pytest tests/cli/test_cli_lazy_imports.py tests/cli/test_edit_cli.py tests/cli/test_cli.py -q` -- `uv run task precommit-run` - -### ADR verdict - -- `not-needed` -- rationale: this is a follow-up implementation hardening and static-analysis cleanup under the existing issue `#108` design, not a new policy decision. - -## 2026-03-21 issue #108 review follow-up: validate runtime error scope - -### Goal - -- Restore the original `validate` subcommand exception boundary after the lazy-loader refactor in `src/exstruct/cli/edit.py`. -- Keep the patch/make commands catching `RuntimeError` while ensuring `validate` does not silently absorb it. - -### Public contract - -- `patch` and `make` continue to convert backend/runtime failures in `(OSError, RuntimeError, ValidationError, ValueError)` into `Error: ...` stderr output with exit code `1`. -- `validate` keeps its narrower historical contract and only converts `(OSError, ValidationError, ValueError)` into CLI error output. -- If `validate_input(...)` raises `RuntimeError`, the exception must still propagate rather than being turned into a handled CLI error. + - `ADR-0008` remains the canonical policy source for runtime capability validation in the extraction CLI. +- `tasks/feature_spec.md` and `tasks/todo.md` + - Retain only the release-closeout records plus verification, not the detailed issue-by-issue implementation log. ### Constraints -- Do not broaden this follow-up into another startup optimization pass. -- Keep the current lazy import boundary for `pydantic` and validation helpers intact. -- Do not change the behavior of `patch` and `make` while narrowing `validate`. +- `README.md` and `docs/index.md` do not gain direct release-note links; `mkdocs.yml` stays the canonical navigation route. +- `uv.lock` is not fully regenerated; only the editable `exstruct` package version is aligned to `0.7.1`. +- This closeout does not add a new ADR or new permanent spec document; it only points to the existing permanent sources for the shipped behavior. -### Verification plan +### Verification -- `tests/cli/test_edit_cli.py` - - `validate` still returns handled CLI errors for `OSError` - - `validate` propagates `RuntimeError` -- `uv run pytest tests/cli/test_edit_cli.py -q` +- `uv run pytest tests/cli/test_cli.py tests/cli/test_cli_lazy_imports.py tests/cli/test_edit_cli.py tests/edit/test_architecture.py -q` +- `uv run task build-docs` - `uv run task precommit-run` +- `rg -n "0\.7\.1|v0\.7\.1" CHANGELOG.md mkdocs.yml docs/release-notes/v0.7.1.md` +- `rg -n '^version = "0\.7\.1"$' pyproject.toml uv.lock` +- `rg -n "^## " tasks/feature_spec.md tasks/todo.md` +- `git diff --check -- CHANGELOG.md docs/release-notes/v0.7.1.md mkdocs.yml pyproject.toml uv.lock tasks/feature_spec.md tasks/todo.md` ### ADR verdict - `not-needed` -- rationale: this is a narrow behavior-restoration follow-up inside the existing edit CLI contract, not a new design decision. +- rationale: this was release preparation and task-log retention cleanup. The shipped policy decisions already live in `ADR-0008`, the extraction docs/specs, and the architecture note. diff --git a/tasks/todo.md b/tasks/todo.md index a073707..c7c3f67 100644 --- a/tasks/todo.md +++ b/tasks/todo.md @@ -27,191 +27,31 @@ - `rg -n "^## " tasks/feature_spec.md tasks/todo.md` - `git diff --check -- CHANGELOG.md docs/release-notes/v0.7.0.md mkdocs.yml pyproject.toml uv.lock tasks/feature_spec.md tasks/todo.md dev-docs/architecture/overview.md` -## 2026-03-20 issue #107 extraction CLI startup optimization +## 2026-03-21 v0.7.1 release closeout ### Planning -- [x] Confirm issue `#107` details with `gh issue view 107`. -- [x] Read current extraction CLI code, related docs/specs, and relevant tests. -- [x] Classify ADR need for the public CLI contract change. -- [x] Add the issue `#107` working spec to `tasks/feature_spec.md`. -- [x] Add or update the ADR for extraction CLI runtime capability validation. -- [x] Refactor extraction CLI parser construction so it never probes COM availability. -- [x] Always register `--auto-page-breaks-dir` in extraction CLI help. -- [x] Add runtime validation for `--auto-page-breaks-dir` covering `light` mode and unavailable COM on `standard` / `verbose`. -- [x] Keep existing `libreoffice` rejection and combined-error precedence intact. -- [x] Update extraction CLI tests for no-probe help and runtime validation. -- [x] Update current user docs and internal specs for the new CLI contract. -- [x] Run targeted pytest for extraction CLI coverage. +- [x] Add the `0.7.1` changelog entry with `Added` / `Changed` / `Fixed`. +- [x] Create `docs/release-notes/v0.7.1.md` for issues `#107`, `#108`, and `#109`. +- [x] Add `v0.7.1` to the `Release Notes` nav in `mkdocs.yml`. +- [x] Align the local package version in `pyproject.toml` and the editable `exstruct` package entry in `uv.lock` to `0.7.1`. +- [x] Compress the detailed `#107` / `#108` working logs in `tasks/feature_spec.md` and `tasks/todo.md` into this release-closeout record. +- [x] Run `uv run pytest tests/cli/test_cli.py tests/cli/test_cli_lazy_imports.py tests/cli/test_edit_cli.py tests/edit/test_architecture.py -q`. - [x] Run `uv run task build-docs`. - [x] Run `uv run task precommit-run`. -- [x] Review task/spec retention and record permanent destinations in the Review section. +- [x] Run the release-prep `rg` and `git diff --check` consistency checks. ### Review -- Extraction CLI parser construction is now side-effect free: `build_parser()` always registers `--auto-page-breaks-dir`, and COM probing happens only when that flag is requested at runtime from a supported mode. -- `mode="light"` now fails explicitly for `--auto-page-breaks-dir`, while `mode="libreoffice"` keeps the existing core validation and combined-error precedence. -- Permanent destinations: - - `dev-docs/adr/ADR-0008-extraction-cli-runtime-capability-validation.md` records the policy change and why parser-time probing is forbidden. - - `docs/cli.md`, `README.md`, and `README.ja.md` now describe the always-visible flag and execution-time validation contract. - - `dev-docs/specs/excel-extraction.md` now records the internal guarantee that the extraction CLI validates auto page-break export at runtime instead of parser construction. -- ADR checks: - - `adr-linter`: no high/medium/low findings for `ADR-0008`. - - `adr-reviewer`: `ready`, no findings. - - `adr-reconciler`: no policy drift, no stale references, two low-evidence findings addressed by adding `verbose` and `main(["--help"])` regression tests. - - `adr-indexer`: index artifacts were synchronized manually from the ADR source text (`README.md`, `index.yaml`, `decision-map.md`). +- `CHANGELOG.md`, `docs/release-notes/v0.7.1.md`, `mkdocs.yml`, `pyproject.toml`, and `uv.lock` now describe and label the `0.7.1` release consistently around the CLI/package import optimization work from issues `#107`, `#108`, and `#109`. +- The release narrative explicitly documents the public behavior deltas that shipped after `v0.7.0`: runtime validation for `--auto-page-breaks-dir`, lighter startup/import behavior for CLI and package entrypoints, preserved exported symbol names, and the restored `validate` error boundary. +- Historical implementation and review logs for issues `#107` and `#108` were intentionally removed from `tasks/feature_spec.md` and `tasks/todo.md` after permanent information was classified and retained in `CHANGELOG.md`, `docs/release-notes/v0.7.1.md`, `docs/cli.md`, `README.md`, `README.ja.md`, `dev-docs/specs/excel-extraction.md`, `dev-docs/architecture/overview.md`, and `ADR-0008`. +- No new `dev-docs/specs/` or `dev-docs/adr/` migration was required for this closeout; the existing CLI docs, architecture note, extraction spec, and `ADR-0008` remain the canonical permanent sources for the shipped behavior. - Verification: - - `gh issue view 107 --json number,title,body,labels,assignees,state,url` - - `uv run pytest tests/cli/test_cli.py tests/cli/test_edit_cli.py -q` + - `uv run pytest tests/cli/test_cli.py tests/cli/test_cli_lazy_imports.py tests/cli/test_edit_cli.py tests/edit/test_architecture.py -q` - `uv run task build-docs` - `uv run task precommit-run` - - `git diff --check` - -## 2026-03-20 issue #107 review follow-up: libreoffice auto page-break fast-fail - -### Planning - -- [x] Re-read the CLI review comment and compare it with `src/exstruct/cli/main.py` and the shared LibreOffice validator. -- [x] Update `tasks/feature_spec.md` with the follow-up contract and verification scope. -- [x] Make the CLI reject `--mode libreoffice --auto-page-breaks-dir` before `process_excel()`. -- [x] Preserve the existing combined LibreOffice error precedence when rendering flags are also present. -- [x] Add regression tests that prove the CLI rejects these requests before `process_excel()` runs. -- [x] Run targeted pytest for `tests/cli/test_cli.py`. -- [x] Update this Review section with the final validation result and retention decision. - -### Review - -- The review finding was valid: `_validate_auto_page_breaks_request()` treated `mode="light"` as a CLI-side fast-fail but let `mode="libreoffice"` fall through to the engine layer, which made responsibility and failure timing inconsistent. -- `src/exstruct/cli/main.py` now reuses `validate_libreoffice_process_request(...)` for `--auto-page-breaks-dir` in `mode="libreoffice"`, so the CLI rejects invalid requests before `process_excel()` while preserving the existing single-error and combined-error message precedence. -- `tests/cli/test_cli.py` now proves both `libreoffice + --auto-page-breaks-dir` and `libreoffice + --pdf + --auto-page-breaks-dir` fail before `process_excel()` runs, and that these paths do not probe COM availability. -- Retention decision: - - No new permanent document was needed. This follow-up only brought the implementation back into alignment with the policy already recorded in `ADR-0008`, `docs/cli.md`, `README.md`, `README.ja.md`, and `dev-docs/specs/excel-extraction.md`. - - The temporary working notes for this follow-up can stay limited to this section in `tasks/feature_spec.md` and `tasks/todo.md`. -- Verification: - - `uv run pytest tests/cli/test_cli.py -q` - - `uv run task precommit-run` - - `git diff --check` - -## 2026-03-20 issue #107 review follow-up: wording and help-text clarity - -### Planning - -- [x] Retrieve the PR review comments with `gh` and classify which findings are substantive. -- [x] Confirm the wording nits in `tasks/todo.md` and `dev-docs/adr/ADR-0008-extraction-cli-runtime-capability-validation.md`. -- [x] Clarify the `--auto-page-breaks-dir` help text in `src/exstruct/cli/main.py` so it matches the runtime contract. -- [x] Update CLI help tests for the clarified wording. -- [x] Run targeted pytest for `tests/cli/test_cli.py`. -- [x] Run `uv run task precommit-run`. -- [x] Record the review outcome and retention decision here. - -### Review - -- The explicit PR review findings were valid but minor: `tasks/todo.md` and `dev-docs/adr/ADR-0008-extraction-cli-runtime-capability-validation.md` each had a hyphenation nit (`low-evidence`, `side-effect-free`). -- A separate suppressed Copilot note about `--auto-page-breaks-dir` help text was also substantively valid: the old string mentioned only LibreOffice rejection and omitted that output files follow `--format`, while the actual runtime contract also rejects `light` and requires `standard`/`verbose` with Excel COM. -- `src/exstruct/cli/main.py` now states the fuller contract in the argument help text, and `tests/cli/test_cli.py` now checks for the clarified help wording without depending on exact `argparse` line wrapping. -- Retention decision: - - No new ADR or spec migration was needed. The durable contract remains in `ADR-0008`, `docs/cli.md`, and the README files; this follow-up only aligns wording and help text with that existing policy. - - The temporary working record can stay limited to this section in `tasks/feature_spec.md` and `tasks/todo.md`. -- Verification: - - `gh pr view 111 --json number,title,reviewDecision,reviews,comments,files,url` - - `gh api repos/harumiWeb/exstruct/pulls/111/comments` - - `uv run pytest tests/cli/test_cli.py -q` - - `uv run task precommit-run` - - `git diff --check` - -## 2026-03-20 issue #108 CLI startup lazy import optimization - -### Planning - -- [x] Confirm issue `#108` details with `gh issue view 108`. -- [x] Inspect current import paths in `src/exstruct/__init__.py`, `src/exstruct/edit/__init__.py`, `src/exstruct/cli/main.py`, and `src/exstruct/cli/edit.py`. -- [x] Classify ADR need for the startup optimization work. -- [x] Add the issue `#108` working spec to `tasks/feature_spec.md`. -- [x] Refactor `src/exstruct/__init__.py` to defer heavy imports while preserving exported symbol names. -- [x] Refactor `src/exstruct/edit/__init__.py` to defer heavy imports while preserving exported symbol names. -- [x] Refactor `src/exstruct/cli/main.py` so edit/extraction implementations load only after routing is known. -- [x] Refactor `src/exstruct/cli/edit.py` so `ops` commands avoid extraction-path imports and handler-specific dependencies load lazily. -- [x] Add regression tests for startup import isolation and existing CLI behavior. -- [x] Update `dev-docs/architecture/overview.md` with the lightweight-startup import rule. -- [x] Run targeted pytest for CLI/startup coverage. -- [x] Run `uv run task precommit-run`. -- [x] Record final verification and retention notes in this Review section. - -### Review - -- `src/exstruct/__init__.py` now keeps the public export surface but resolves heavy extraction/runtime symbols lazily, so importing `exstruct` no longer front-loads extraction engine modules for CLI startup. -- `src/exstruct/edit/__init__.py` now resolves editing exports lazily, which lets CLI code import edit submodules without paying the full patch-service import cost up front. -- `src/exstruct/cli/main.py` now keeps monkeypatch-compatible wrappers for `process_excel`, `get_com_availability`, `is_edit_subcommand`, and `run_edit_cli`, but loads the underlying implementations only after routing demands them. -- `src/exstruct/cli/edit.py` now keeps monkeypatch-compatible wrappers for `patch_workbook`, `make_workbook`, `resolve_top_level_sheet_for_payload`, and `validate_input`, while `ops` commands load only schema metadata and avoid dragging the extraction path into startup. -- `tests/cli/test_cli_lazy_imports.py` now locks the startup boundary with subprocess `sys.modules` probes for `import exstruct`, `import exstruct.cli.main`, `import exstruct.cli.edit`, and `main(["ops", "list"])`. -- `dev-docs/architecture/overview.md` now records the durable rule that package `__init__` files and lightweight CLI startup paths must remain side-effect-free. -- Retention decision: - - No new ADR was added. The change preserves the public contract and only changes import timing, so the durable guidance lives in `dev-docs/architecture/overview.md`. - - The temporary working record for implementation order and verification remains limited to this section in `tasks/feature_spec.md` and `tasks/todo.md`. -- Verification: - - `uv run pytest tests/cli/test_cli.py tests/cli/test_edit_cli.py tests/cli/test_cli_lazy_imports.py tests/edit/test_architecture.py -q` - - `uv run task precommit-run` - - manual `-X importtime` sanity probe for `-m exstruct.cli.main --help` and `-m exstruct.cli.main ops list` - -## 2026-03-20 issue #108 review and Codacy follow-up - -### Planning - -- [x] Retrieve PR `#112` Codacy findings and review comments with `scripts/codacy_issues.py` and `gh`. -- [x] Classify which findings are substantive and confirm the current implementation gaps locally. -- [x] Add the follow-up spec and task record to `tasks/feature_spec.md` and `tasks/todo.md`. -- [x] Replace the generic lazy-import helpers in `src/exstruct/__init__.py`, `src/exstruct/edit/__init__.py`, and `src/exstruct/cli/edit.py` with explicit literal loaders. -- [x] Restore runtime-resolvable type hints for public helpers in `src/exstruct/__init__.py` without eagerly importing `exstruct.models`. -- [x] Add a fast path in `src/exstruct/cli/main.py` so non-edit argv does not import `exstruct.cli.edit`. -- [x] Remove the top-level `pydantic` import from `src/exstruct/cli/edit.py`. -- [x] Add or update regression tests for startup import boundaries and runtime type hints. -- [x] Run targeted pytest for CLI follow-up coverage. -- [x] Run `uv run task precommit-run`. -- [x] Update this Review section with the final verification result and retention decision. - -### Review - -- Codacy's three `non-literal-import` findings were not exploitable security bugs in practice, because the module targets were fixed by code rather than user input. Even so, the finding was operationally valid for CI, so the generic loaders were replaced with explicit literal loader functions in `src/exstruct/__init__.py`, `src/exstruct/edit/__init__.py`, and `src/exstruct/cli/edit.py`. -- The PR review about runtime type hints was valid. `typing.get_type_hints(exstruct.extract)` regressed with `NameError` after the lazy-import refactor, so `src/exstruct/__init__.py` now patches the affected public helper annotations to resolve exported model types through `_lazy_type(...)` only when runtime introspection asks for them. -- The PR review about `cli.main` routing was valid. `src/exstruct/cli/main.py` now fast-fails obvious non-edit argv before importing `exstruct.cli.edit`, so `exstruct --help` and extraction-style argv no longer pay the edit-module import cost. -- The PR review about `pydantic` eager import in `src/exstruct/cli/edit.py` was valid for routing/help-only paths. The module now defers `pydantic` loading until an actual validation-error check happens and serializes JSON payloads via `model_dump` duck typing. -- `tests/cli/test_cli_lazy_imports.py` now locks the new boundaries: `import exstruct.cli.edit` keeps `pydantic` unloaded, `main(["--help"])` keeps `exstruct.cli.edit` unloaded, and `typing.get_type_hints(exstruct.extract)` resolves `WorkbookData` successfully. -- Retention decision: - - No new ADR or permanent spec migration was needed. This follow-up only hardens the existing issue `#108` implementation and review expectations under the already-recorded lightweight-startup rule in `dev-docs/architecture/overview.md`. - - The temporary working notes for this follow-up can remain limited to this section in `tasks/feature_spec.md` and `tasks/todo.md`. -- Verification: - - `python scripts/codacy_issues.py --pr 112 --min-level Error` - - `gh pr view 112 --json number,title,reviewDecision,reviews,comments,files,url,headRefName,baseRefName` - - `gh api repos/harumiWeb/exstruct/pulls/112/comments` - - `uv run pytest tests/cli/test_cli_lazy_imports.py tests/cli/test_edit_cli.py tests/cli/test_cli.py -q` - - `uv run task precommit-run` - - manual `uv run python` probes for `typing.get_type_hints(exstruct.extract)` and `main(["--help"])` import boundaries - -## 2026-03-21 issue #108 review follow-up: validate runtime error scope - -### Planning - -- [x] Retrieve the new PR `#112` review comments and classify which ones are substantively valid. -- [x] Confirm locally whether `isinstance(exc, OSError | RuntimeError | ValueError)` is actually invalid on the supported Python runtime. -- [x] Add the working spec and task record for this follow-up. -- [x] Narrow `validate` exception handling in `src/exstruct/cli/edit.py` back to the original `(OSError, ValidationError, ValueError)` scope. -- [x] Add a regression test that proves `validate` still propagates `RuntimeError`. -- [x] Run targeted pytest for `tests/cli/test_edit_cli.py`. -- [x] Run `uv run task precommit-run`. -- [x] Update this Review section with the final verification result and retention decision. - -### Review - -- The new Devin review finding was valid: the shared `_is_cli_runtime_error(...)` helper widened `_run_validate_command(...)` to catch `RuntimeError`, which changed the historical validate-subcommand contract. -- The new Copilot review finding was not valid on the supported runtime. A direct `uv run python` probe confirmed that `isinstance(OSError(), OSError | RuntimeError | ValueError)` evaluates successfully on Python `3.11`, so no change was made for that comment. -- `src/exstruct/cli/edit.py` now uses a separate `_is_validate_cli_error(...)` helper so `patch` / `make` still catch `RuntimeError` while `validate` only catches `(OSError, ValidationError, ValueError)` as before. -- `tests/cli/test_edit_cli.py` now includes a regression test proving that `validate` propagates `RuntimeError` instead of converting it to handled CLI stderr output. -- Retention decision: - - No new ADR or permanent spec migration was needed. This follow-up only restores the pre-existing validate CLI error boundary inside the current edit CLI design. - - The temporary working notes for this review follow-up can remain limited to this section in `tasks/feature_spec.md` and `tasks/todo.md`. -- Verification: - - `gh api repos/harumiWeb/exstruct/pulls/112/comments` - - `gh api graphql -f query='query { repository(owner:"harumiWeb", name:"exstruct") { pullRequest(number: 112) { reviewThreads(first: 30) { nodes { id isResolved isOutdated comments(first: 20) { nodes { id author { login } body path url createdAt } } } } } } }'` - - `uv run python` probe for `isinstance(OSError(), OSError | RuntimeError | ValueError)` - - `uv run pytest tests/cli/test_edit_cli.py -q` - - `uv run task precommit-run` + - `rg -n "0\.7\.1|v0\.7\.1" CHANGELOG.md mkdocs.yml docs/release-notes/v0.7.1.md` + - `rg -n '^version = "0\.7\.1"$' pyproject.toml uv.lock` + - `rg -n "^## " tasks/feature_spec.md tasks/todo.md` + - `git diff --check -- CHANGELOG.md docs/release-notes/v0.7.1.md mkdocs.yml pyproject.toml uv.lock tasks/feature_spec.md tasks/todo.md` diff --git a/uv.lock b/uv.lock index 3fcde34..c6a77f3 100644 --- a/uv.lock +++ b/uv.lock @@ -651,7 +651,7 @@ wheels = [ [[package]] name = "exstruct" -version = "0.7.0" +version = "0.7.1" source = { editable = "." } dependencies = [ { name = "defusedxml" },