From afaccff39cab29aaac0d89dacc22a2c834bfd04d Mon Sep 17 00:00:00 2001 From: Ramesh Padmanabhaiah Date: Tue, 16 Jun 2026 11:44:00 -0700 Subject: [PATCH] Add bb document inspection commands --- .ai-context/COMMANDS.md | 4 ++ .ai-context/STATUS.md | 6 +- CHANGELOG.md | 2 + README.md | 4 ++ src/bankbuddy/bb/cli.py | 114 +++++++++++++++++++++++++++++++++ src/bankbuddy/bb/dao.py | 45 +++++++++++++ src/bankbuddy/bb/documents.py | 46 +++++++++++++ src/bankbuddy/bb/storage.py | 33 ++++++++++ tests/test_bb_documents_cli.py | 69 ++++++++++++++++++++ 9 files changed, 321 insertions(+), 2 deletions(-) diff --git a/.ai-context/COMMANDS.md b/.ai-context/COMMANDS.md index 0ad29a6..1119feb 100644 --- a/.ai-context/COMMANDS.md +++ b/.ai-context/COMMANDS.md @@ -37,6 +37,8 @@ bb init bb status bb documents import --dry-run --file path/to/document.pdf bb documents import --file path/to/document.pdf +bb documents list +bb documents show 1 ``` `bb` is the side-by-side command surface for new `BB_` schema work. It should @@ -45,6 +47,8 @@ surfaces remain available. Use `bb init` to apply current migrations and prepare v2 `financial/` storage roots for the active data home. `bb documents import` is parser-free: it hashes the explicit file, records v2 document/object metadata, and copies the canonical object into managed storage. +Use `bb documents list` and `bb documents show DOCUMENT_ID` to inspect imported +v2 documents without opening SQLite directly. ## BankBuddy CLI diff --git a/.ai-context/STATUS.md b/.ai-context/STATUS.md index ab4fdde..e8bf69b 100644 --- a/.ai-context/STATUS.md +++ b/.ai-context/STATUS.md @@ -15,8 +15,8 @@ section is `Unreleased`. - Banking CLI commands for banks, accounts, statement refs, imports, transactions, categories, reports, exports, storage migration, and status. - Side-by-side `bb` CLI for v2 financial intelligence initialization, - foundation status, storage readiness, generic document import, and `BB_` - schema visibility. + foundation status, storage readiness, generic document import, document + inventory inspection, and `BB_` schema visibility. - Supported banking imports for Bank of America PDF/CSV, Apple Card PDF, ICICI `.xls`, and HDFC `.xls`. - Statement inventory and statement coverage auditing. @@ -35,6 +35,8 @@ section is `Unreleased`. without depending on the legacy `bankbuddy` command surface. - Generic `bb documents import` for parser-free explicit-file intake into `BB_DOCUMENT` and `BB_DOCUMENT_OBJECT` with SHA-256 canonical storage keys. +- `bb documents list/show` for read-only inspection of generic v2 document and + canonical object metadata. - Prospective relicensing from MIT to AGPL-3.0-or-later. - Canonical data-home layout with `database/`, `bank/`, and `tax/` directories. - First TaxBuddy CLI slice and `tax_documents` metadata index. diff --git a/CHANGELOG.md b/CHANGELOG.md index 16c92ff..d630e2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,8 @@ and versions are tracked in the repo-root `VERSION` file. ### Added +- Added `bb documents list` and `bb documents show DOCUMENT_ID` for read-only + inspection of generic v2 document records and canonical object metadata. - Added `bb documents import --dry-run --file ...` and `bb documents import --file ...` for parser-free v2 document intake with SHA-256 canonical object storage and idempotent duplicate handling. diff --git a/README.md b/README.md index 4cd444d..06fe8e3 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,8 @@ bb init bb status bb documents import --dry-run --file path/to/document.pdf bb documents import --file path/to/document.pdf +bb documents list +bb documents show 1 bankbuddy --help bankbuddy status bankbuddy init @@ -170,6 +172,8 @@ Use `bb documents import` for parser-free v2 document intake. It hashes the file, records a `BB_DOCUMENT`, stores a canonical object under `financial/canonical`, and leaves bank-specific parsing or inference for later workflows. +Use `bb documents list` and `bb documents show DOCUMENT_ID` to inspect the +generic v2 document inventory and canonical object metadata. Switch the current shell by exporting `BANKBUDDY_ENV`: diff --git a/src/bankbuddy/bb/cli.py b/src/bankbuddy/bb/cli.py index 7135584..629038b 100644 --- a/src/bankbuddy/bb/cli.py +++ b/src/bankbuddy/bb/cli.py @@ -10,7 +10,10 @@ from bankbuddy import __version__ from bankbuddy.database import initialize_database from bankbuddy.bb.documents import DocumentImportError +from bankbuddy.bb.documents import DocumentSummary +from bankbuddy.bb.documents import get_document_summary from bankbuddy.bb.documents import import_document +from bankbuddy.bb.documents import list_documents from bankbuddy.bb.documents import plan_document_import from bankbuddy.paths import resolve_app_paths from bankbuddy.bb.storage import ensure_financial_storage_dirs @@ -169,6 +172,31 @@ def documents() -> None: """Manage v2 documents.""" +@documents.command("list") +@click.pass_context +def documents_list(ctx: click.Context) -> None: + """List imported v2 documents.""" + + runtime = runtime_from_context(ctx) + paths = resolve_app_paths(environment=runtime.environment) + rows = list_documents(paths) + render_document_table(rows) + + +@documents.command("show") +@click.argument("document_id", type=int) +@click.pass_context +def documents_show(ctx: click.Context, document_id: int) -> None: + """Show one imported v2 document.""" + + runtime = runtime_from_context(ctx) + paths = resolve_app_paths(environment=runtime.environment) + summary = get_document_summary(paths, document_id) + if summary is None: + raise click.ClickException(f"Document not found: {document_id}") + render_document_summary(summary) + + @documents.command("import") @click.option("--dry-run", is_flag=True, help="Plan the import without writes.") @click.option( @@ -230,6 +258,92 @@ def _print_document_import_plan(plan, *, dry_run: bool) -> None: click.echo(f"Canonical object: {plan.canonical_relative_path}") +def render_document_table(rows: list[DocumentSummary]) -> None: + """Render v2 documents as a compact pretty table.""" + + table = [ + [ + str(row.document.document_id), + row.document.original_file_name, + row.document.document_status, + str(row.canonical_object.byte_size) + if row.canonical_object and row.canonical_object.byte_size is not None + else "-", + row.canonical_object.media_type if row.canonical_object else "-", + row.document.file_hash[:12], + ] + for row in rows + ] + render_pretty_table( + ["ID", "File", "Status", "Size", "Media Type", "SHA-256"], + table, + align_right={0, 3}, + ) + + +def render_document_summary(summary: DocumentSummary) -> None: + """Render one v2 document detail view.""" + + document = summary.document + canonical_object = summary.canonical_object + click.echo(f"Document ID: {document.document_id}") + click.echo(f"Original file: {document.original_file_name}") + click.echo(f"SHA-256: {document.file_hash}") + click.echo(f"Status: {document.document_status}") + click.echo(f"Type: {_display_value(document.document_type)}") + click.echo(f"Jurisdiction: {_display_value(document.jurisdiction_code)}") + click.echo(f"Tax year: {_display_value(document.tax_year)}") + if canonical_object is None: + click.echo("Canonical object ID: -") + click.echo("Canonical object: -") + return + click.echo(f"Canonical object ID: {canonical_object.document_object_id}") + click.echo(f"Canonical object: financial/canonical/{canonical_object.object_key}") + click.echo(f"Media type: {_display_value(canonical_object.media_type)}") + click.echo(f"Size: {_display_value(canonical_object.byte_size)} bytes") + + +def render_pretty_table( + headers: list[str], + rows: list[list[str]], + *, + align_right: set[int] | None = None, +) -> None: + """Render a small pretty table with vertical separators.""" + + align_right = align_right or set() + widths = [ + max(len(headers[index]), *(len(row[index]) for row in rows)) + for index in range(len(headers)) + ] + click.echo(_pretty_row(headers, widths, align_right=set())) + click.echo("-+-".join("-" * width for width in widths)) + for row in rows: + click.echo(_pretty_row(row, widths, align_right=align_right)) + + +def _pretty_row( + values: list[str], + widths: list[int], + *, + align_right: set[int], +) -> str: + cells = [] + for index, value in enumerate(values): + width = widths[index] + if index in align_right: + cells.append(value.rjust(width)) + else: + cells.append(value.ljust(width)) + return " | ".join(cells) + + +def _display_value(value) -> str: + if value is None: + return "-" + return str(value) + + def _v2_storage_ready(paths) -> bool: return all( path.is_dir() diff --git a/src/bankbuddy/bb/dao.py b/src/bankbuddy/bb/dao.py index e3dfe18..7d0f615 100644 --- a/src/bankbuddy/bb/dao.py +++ b/src/bankbuddy/bb/dao.py @@ -162,6 +162,51 @@ def find_document_by_hash(self, file_hash: str) -> DocumentRecord | None: return None return _document_from_row(row) + def get_document(self, document_id: int) -> DocumentRecord | None: + """Return one v2 document by id.""" + + row = self._conn.execute( + """ + select + document_id, + file_hash, + original_file_name, + canonical_file_name, + source_uri, + document_type, + jurisdiction_code, + tax_year, + document_status + from BB_DOCUMENT + where document_id = ? + """, + (document_id,), + ).fetchone() + if row is None: + return None + return _document_from_row(row) + + def list_documents(self) -> list[DocumentRecord]: + """Return v2 documents ordered by id.""" + + rows = self._conn.execute( + """ + select + document_id, + file_hash, + original_file_name, + canonical_file_name, + source_uri, + document_type, + jurisdiction_code, + tax_year, + document_status + from BB_DOCUMENT + order by document_id + """ + ).fetchall() + return [_document_from_row(row) for row in rows] + def create_entity(self, record: EntityCreate) -> EntityRecord: """Create a v2 entity row.""" diff --git a/src/bankbuddy/bb/documents.py b/src/bankbuddy/bb/documents.py index 839aadd..61b6d86 100644 --- a/src/bankbuddy/bb/documents.py +++ b/src/bankbuddy/bb/documents.py @@ -44,6 +44,14 @@ class DocumentImportResult: duplicate: bool +@dataclass(frozen=True) +class DocumentSummary: + """Read-only document summary with canonical object metadata.""" + + document: DocumentRecord + canonical_object: DocumentObjectRecord | None + + class DocumentImportError(ValueError): """Raised when a generic document import cannot be planned or completed.""" @@ -125,6 +133,44 @@ def import_document(paths: AppPaths, source_path: Path) -> DocumentImportResult: ) +def list_documents(paths: AppPaths) -> list[DocumentSummary]: + """Return imported v2 documents with canonical object metadata.""" + + if not paths.database.exists(): + return [] + + with connect_database(paths) as conn: + documents = FinancialIntelligenceDAO(conn) + storage = FinancialStorageDAO(conn) + return [ + DocumentSummary( + document=document, + canonical_object=storage.find_canonical_document_object( + document.document_id + ), + ) + for document in documents.list_documents() + ] + + +def get_document_summary(paths: AppPaths, document_id: int) -> DocumentSummary | None: + """Return one imported v2 document with canonical object metadata.""" + + if not paths.database.exists(): + return None + + with connect_database(paths) as conn: + documents = FinancialIntelligenceDAO(conn) + storage = FinancialStorageDAO(conn) + document = documents.get_document(document_id) + if document is None: + return None + return DocumentSummary( + document=document, + canonical_object=storage.find_canonical_document_object(document_id), + ) + + def hash_file(path: Path) -> str: """Return the SHA-256 hex digest for a local file.""" diff --git a/src/bankbuddy/bb/storage.py b/src/bankbuddy/bb/storage.py index 8120c74..3fd60c6 100644 --- a/src/bankbuddy/bb/storage.py +++ b/src/bankbuddy/bb/storage.py @@ -144,6 +144,39 @@ def find_document_object( return None return _document_object_from_row(row) + def find_canonical_document_object( + self, + document_id: int, + ) -> DocumentObjectRecord | None: + """Return the canonical object for a document when one exists.""" + + row = self._conn.execute( + """ + select + BB_DOCUMENT_OBJECT.document_object_id, + BB_DOCUMENT_OBJECT.document_id, + BB_STORAGE_ROOT.storage_root_code, + BB_DOCUMENT_OBJECT.object_key, + BB_DOCUMENT_OBJECT.object_role, + BB_DOCUMENT_OBJECT.content_hash, + BB_DOCUMENT_OBJECT.byte_size, + BB_DOCUMENT_OBJECT.media_type, + BB_DOCUMENT_OBJECT.original_file_name, + BB_DOCUMENT_OBJECT.storage_root_id + from BB_DOCUMENT_OBJECT + join BB_STORAGE_ROOT using (storage_root_id) + where + BB_DOCUMENT_OBJECT.document_id = ? + and BB_DOCUMENT_OBJECT.object_role = 'canonical' + order by BB_DOCUMENT_OBJECT.document_object_id + limit 1 + """, + (document_id,), + ).fetchone() + if row is None: + return None + return _document_object_from_row(row) + def create_document_view( self, record: DocumentViewCreate, diff --git a/tests/test_bb_documents_cli.py b/tests/test_bb_documents_cli.py index efab9a9..bdd5125 100644 --- a/tests/test_bb_documents_cli.py +++ b/tests/test_bb_documents_cli.py @@ -116,3 +116,72 @@ def test_bb_documents_import_is_idempotent_for_existing_hash(tmp_path) -> None: assert document_count == 1 assert object_count == 1 + + +def test_bb_documents_list_outputs_imported_documents(tmp_path) -> None: + home = tmp_path / "home" + first_source = tmp_path / "statement.pdf" + second_source = tmp_path / "letter.txt" + first_source.write_bytes(b"%PDF-1.4 placeholder") + second_source.write_text("plain text document", encoding="utf-8") + first_hash = sha256(first_source.read_bytes()).hexdigest() + second_hash = sha256(second_source.read_bytes()).hexdigest() + runner = CliRunner() + env = {"BANKBUDDY_HOME": str(home)} + runner.invoke(main, ["documents", "import", "--file", str(first_source)], env=env) + runner.invoke(main, ["documents", "import", "--file", str(second_source)], env=env) + + result = runner.invoke(main, ["documents", "list"], env=env) + + assert result.exit_code == 0 + assert "ID | File | Status | Size | Media Type | SHA-256" in ( + result.output + ) + assert f" 1 | statement.pdf | active | 20 | application/pdf | {first_hash[:12]}" in ( + result.output + ) + assert f" 2 | letter.txt | active | 19 | text/plain | {second_hash[:12]}" in ( + result.output + ) + + +def test_bb_documents_show_outputs_document_and_object_details(tmp_path) -> None: + home = tmp_path / "home" + source = tmp_path / "statement.pdf" + source_bytes = b"%PDF-1.4 placeholder" + source.write_bytes(source_bytes) + file_hash = sha256(source_bytes).hexdigest() + runner = CliRunner() + env = {"BANKBUDDY_HOME": str(home)} + runner.invoke(main, ["documents", "import", "--file", str(source)], env=env) + + result = runner.invoke(main, ["documents", "show", "1"], env=env) + + assert result.exit_code == 0 + assert "Document ID: 1" in result.output + assert "Original file: statement.pdf" in result.output + assert f"SHA-256: {file_hash}" in result.output + assert "Status: active" in result.output + assert "Type: -" in result.output + assert "Jurisdiction: -" in result.output + assert "Tax year: -" in result.output + assert "Canonical object ID: 1" in result.output + assert ( + f"Canonical object: financial/canonical/sha256/{file_hash[:2]}/" + f"{file_hash[2:4]}/{file_hash}.pdf" + ) in result.output + assert "Media type: application/pdf" in result.output + assert "Size: 20 bytes" in result.output + + +def test_bb_documents_show_fails_for_missing_document(tmp_path) -> None: + home = tmp_path / "home" + + result = CliRunner().invoke( + main, + ["documents", "show", "999"], + env={"BANKBUDDY_HOME": str(home)}, + ) + + assert result.exit_code == 1 + assert "Document not found: 999" in result.output