diff --git a/.ai-context/COMMANDS.md b/.ai-context/COMMANDS.md index 1119feb..3c468ae 100644 --- a/.ai-context/COMMANDS.md +++ b/.ai-context/COMMANDS.md @@ -38,7 +38,9 @@ bb status bb documents import --dry-run --file path/to/document.pdf bb documents import --file path/to/document.pdf bb documents list +bb documents list --type bank_statement --jurisdiction US --tax-year 2025 bb documents show 1 +bb documents update 1 --type bank_statement --jurisdiction US --tax-year 2025 ``` `bb` is the side-by-side command surface for new `BB_` schema work. It should @@ -49,6 +51,8 @@ prepare v2 `financial/` storage roots for the active data home. document/object metadata, and copies the canonical object into managed storage. Use `bb documents list` and `bb documents show DOCUMENT_ID` to inspect imported v2 documents without opening SQLite directly. +Use `bb documents update DOCUMENT_ID` to set document metadata. `bb documents +list` supports metadata filters for type, jurisdiction, tax year, and status. ## BankBuddy CLI diff --git a/.ai-context/STATUS.md b/.ai-context/STATUS.md index e8bf69b..04d0285 100644 --- a/.ai-context/STATUS.md +++ b/.ai-context/STATUS.md @@ -16,7 +16,8 @@ section is `Unreleased`. transactions, categories, reports, exports, storage migration, and status. - Side-by-side `bb` CLI for v2 financial intelligence initialization, foundation status, storage readiness, generic document import, document - inventory inspection, and `BB_` schema visibility. + inventory inspection, metadata edits, metadata filters, and `BB_` schema + visibility. - Supported banking imports for Bank of America PDF/CSV, Apple Card PDF, ICICI `.xls`, and HDFC `.xls`. - Statement inventory and statement coverage auditing. @@ -37,6 +38,8 @@ section is `Unreleased`. `BB_DOCUMENT` and `BB_DOCUMENT_OBJECT` with SHA-256 canonical storage keys. - `bb documents list/show` for read-only inspection of generic v2 document and canonical object metadata. +- `bb documents update` and metadata filters on `bb documents list` for manual + classification by type, jurisdiction, tax year, and status. - Prospective relicensing from MIT to AGPL-3.0-or-later. - Canonical data-home layout with `database/`, `bank/`, and `tax/` directories. - First TaxBuddy CLI slice and `tax_documents` metadata index. diff --git a/CHANGELOG.md b/CHANGELOG.md index d630e2d..03aca7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,8 @@ and versions are tracked in the repo-root `VERSION` file. ### Added +- Added `bb documents update DOCUMENT_ID` for v2 document metadata edits and + metadata filters on `bb documents list`. - Added `bb documents list` and `bb documents show DOCUMENT_ID` for read-only inspection of generic v2 document records and canonical object metadata. - Added `bb documents import --dry-run --file ...` and diff --git a/README.md b/README.md index 06fe8e3..6704e21 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,9 @@ bb status bb documents import --dry-run --file path/to/document.pdf bb documents import --file path/to/document.pdf bb documents list +bb documents list --type bank_statement --jurisdiction US --tax-year 2025 bb documents show 1 +bb documents update 1 --type bank_statement --jurisdiction US --tax-year 2025 bankbuddy --help bankbuddy status bankbuddy init @@ -174,6 +176,10 @@ file, records a `BB_DOCUMENT`, stores a canonical object under workflows. Use `bb documents list` and `bb documents show DOCUMENT_ID` to inspect the generic v2 document inventory and canonical object metadata. +Use `bb documents update DOCUMENT_ID` to classify imported generic documents +with metadata such as type, jurisdiction, tax year, and status. `bb documents +list` accepts the same metadata filters so you can inspect a focused document +set without opening SQLite. Switch the current shell by exporting `BANKBUDDY_ENV`: diff --git a/src/bankbuddy/bb/cli.py b/src/bankbuddy/bb/cli.py index 629038b..ab9835c 100644 --- a/src/bankbuddy/bb/cli.py +++ b/src/bankbuddy/bb/cli.py @@ -9,12 +9,15 @@ from bankbuddy import __version__ from bankbuddy.database import initialize_database +from bankbuddy.bb.documents import DOCUMENT_STATUSES from bankbuddy.bb.documents import DocumentImportError +from bankbuddy.bb.documents import DocumentMetadataError from bankbuddy.bb.documents import DocumentSummary from bankbuddy.bb.documents import get_document_summary from bankbuddy.bb.documents import import_document from bankbuddy.bb.documents import list_documents from bankbuddy.bb.documents import plan_document_import +from bankbuddy.bb.documents import update_document_metadata from bankbuddy.paths import resolve_app_paths from bankbuddy.bb.storage import ensure_financial_storage_dirs from bankbuddy.runtime import CliRuntime @@ -173,13 +176,38 @@ def documents() -> None: @documents.command("list") +@click.option("--type", "document_type", help="Filter by document type.") +@click.option("--jurisdiction", "jurisdiction_code", help="Filter by jurisdiction code.") +@click.option( + "--tax-year", + type=click.IntRange(1000, 9999), + help="Filter by four-digit tax year.", +) +@click.option( + "--status", + "document_status", + type=click.Choice(DOCUMENT_STATUSES), + help="Filter by document status.", +) @click.pass_context -def documents_list(ctx: click.Context) -> None: +def documents_list( + ctx: click.Context, + document_type: str | None, + jurisdiction_code: str | None, + tax_year: int | None, + document_status: str | None, +) -> None: """List imported v2 documents.""" runtime = runtime_from_context(ctx) paths = resolve_app_paths(environment=runtime.environment) - rows = list_documents(paths) + rows = list_documents( + paths, + document_type=document_type, + jurisdiction_code=jurisdiction_code, + tax_year=tax_year, + document_status=document_status, + ) render_document_table(rows) @@ -197,6 +225,51 @@ def documents_show(ctx: click.Context, document_id: int) -> None: render_document_summary(summary) +@documents.command("update") +@click.argument("document_id", type=int) +@click.option("--type", "document_type", help="Set the document type.") +@click.option("--jurisdiction", "jurisdiction_code", help="Set the jurisdiction code.") +@click.option( + "--tax-year", + type=click.IntRange(1000, 9999), + help="Set the four-digit tax year.", +) +@click.option( + "--status", + "document_status", + type=click.Choice(DOCUMENT_STATUSES), + help="Set the document status.", +) +@click.pass_context +def documents_update( + ctx: click.Context, + document_id: int, + document_type: str | None, + jurisdiction_code: str | None, + tax_year: int | None, + document_status: str | None, +) -> None: + """Update one v2 document's metadata.""" + + runtime = runtime_from_context(ctx) + paths = resolve_app_paths(environment=runtime.environment) + try: + summary = update_document_metadata( + paths, + document_id, + document_type=document_type, + jurisdiction_code=jurisdiction_code, + tax_year=tax_year, + document_status=document_status, + ) + except DocumentMetadataError as exc: + raise click.ClickException(str(exc)) from exc + if summary is None: + raise click.ClickException(f"Document not found: {document_id}") + + render_document_summary(summary) + + @documents.command("import") @click.option("--dry-run", is_flag=True, help="Plan the import without writes.") @click.option( @@ -265,6 +338,9 @@ def render_document_table(rows: list[DocumentSummary]) -> None: [ str(row.document.document_id), row.document.original_file_name, + _display_value(row.document.document_type), + _display_value(row.document.jurisdiction_code), + _display_value(row.document.tax_year), row.document.document_status, str(row.canonical_object.byte_size) if row.canonical_object and row.canonical_object.byte_size is not None @@ -275,9 +351,19 @@ def render_document_table(rows: list[DocumentSummary]) -> None: for row in rows ] render_pretty_table( - ["ID", "File", "Status", "Size", "Media Type", "SHA-256"], + [ + "ID", + "File", + "Type", + "Jurisdiction", + "Tax Year", + "Status", + "Size", + "Media Type", + "SHA-256", + ], table, - align_right={0, 3}, + align_right={0, 4, 6}, ) diff --git a/src/bankbuddy/bb/dao.py b/src/bankbuddy/bb/dao.py index 7d0f615..ba7c4f3 100644 --- a/src/bankbuddy/bb/dao.py +++ b/src/bankbuddy/bb/dao.py @@ -6,6 +6,8 @@ from bankbuddy.bb.records import ( DocumentCreate, + DocumentListFilter, + DocumentMetadataUpdate, DocumentRecord, EntityAttributeCreate, EntityAttributeRecord, @@ -186,11 +188,32 @@ def get_document(self, document_id: int) -> DocumentRecord | None: return None return _document_from_row(row) - def list_documents(self) -> list[DocumentRecord]: + def list_documents( + self, + filters: DocumentListFilter | None = None, + ) -> list[DocumentRecord]: """Return v2 documents ordered by id.""" + filters = filters or DocumentListFilter() + conditions: list[str] = [] + params: list[object] = [] + if filters.document_type is not None: + conditions.append("document_type = ?") + params.append(filters.document_type) + if filters.jurisdiction_code is not None: + conditions.append("jurisdiction_code = ?") + params.append(filters.jurisdiction_code) + if filters.tax_year is not None: + conditions.append("tax_year = ?") + params.append(filters.tax_year) + if filters.document_status is not None: + conditions.append("document_status = ?") + params.append(filters.document_status) + where_clause = "" + if conditions: + where_clause = "where " + " and ".join(conditions) rows = self._conn.execute( - """ + f""" select document_id, file_hash, @@ -202,11 +225,65 @@ def list_documents(self) -> list[DocumentRecord]: tax_year, document_status from BB_DOCUMENT + {where_clause} order by document_id - """ + """, + params, ).fetchall() return [_document_from_row(row) for row in rows] + def update_document_metadata( + self, + document_id: int, + update: DocumentMetadataUpdate, + ) -> DocumentRecord | None: + """Update metadata fields on one v2 document.""" + + assignments: list[str] = [] + params: list[object] = [] + if update.document_type is not None: + assignments.append("document_type = ?") + params.append(update.document_type) + if update.jurisdiction_code is not None: + assignments.append("jurisdiction_code = ?") + params.append(update.jurisdiction_code) + if update.tax_year is not None: + assignments.append("tax_year = ?") + params.append(update.tax_year) + if update.document_status is not None: + assignments.append("document_status = ?") + params.append(update.document_status) + if not assignments: + return self.get_document(document_id) + + params.append(document_id) + cursor = self._conn.execute( + f""" + update BB_DOCUMENT + set + {", ".join(assignments)}, + updated_at = current_timestamp + where document_id = ? + """, + params, + ) + if cursor.rowcount == 0: + return None + return self.get_document(document_id) + + def jurisdiction_exists(self, jurisdiction_code: str) -> bool: + """Return whether a seeded jurisdiction exists.""" + + row = self._conn.execute( + """ + select 1 + from BB_JURISDICTION + where jurisdiction_code = ? + """, + (jurisdiction_code,), + ).fetchone() + return row is not None + def create_entity(self, record: EntityCreate) -> EntityRecord: """Create a v2 entity row.""" diff --git a/src/bankbuddy/bb/documents.py b/src/bankbuddy/bb/documents.py index 61b6d86..8ee5261 100644 --- a/src/bankbuddy/bb/documents.py +++ b/src/bankbuddy/bb/documents.py @@ -10,6 +10,8 @@ from bankbuddy.bb.dao import FinancialIntelligenceDAO from bankbuddy.bb.records import DocumentCreate +from bankbuddy.bb.records import DocumentListFilter +from bankbuddy.bb.records import DocumentMetadataUpdate from bankbuddy.bb.records import DocumentObjectCreate from bankbuddy.bb.records import DocumentObjectRecord from bankbuddy.bb.records import DocumentRecord @@ -22,6 +24,9 @@ from bankbuddy.paths import AppPaths +DOCUMENT_STATUSES = ("active", "archived", "duplicate", "failed") + + @dataclass(frozen=True) class DocumentImportPlan: """Dry-run-safe plan for importing one document.""" @@ -56,6 +61,10 @@ class DocumentImportError(ValueError): """Raised when a generic document import cannot be planned or completed.""" +class DocumentMetadataError(ValueError): + """Raised when document metadata cannot be updated.""" + + def plan_document_import(paths: AppPaths, source_path: Path) -> DocumentImportPlan: """Return a deterministic import plan without creating directories or rows.""" @@ -133,12 +142,25 @@ def import_document(paths: AppPaths, source_path: Path) -> DocumentImportResult: ) -def list_documents(paths: AppPaths) -> list[DocumentSummary]: +def list_documents( + paths: AppPaths, + *, + document_type: str | None = None, + jurisdiction_code: str | None = None, + tax_year: int | None = None, + document_status: str | None = None, +) -> list[DocumentSummary]: """Return imported v2 documents with canonical object metadata.""" if not paths.database.exists(): return [] + filters = DocumentListFilter( + document_type=_clean_text(document_type), + jurisdiction_code=_clean_jurisdiction_code(jurisdiction_code), + tax_year=tax_year, + document_status=document_status, + ) with connect_database(paths) as conn: documents = FinancialIntelligenceDAO(conn) storage = FinancialStorageDAO(conn) @@ -149,7 +171,7 @@ def list_documents(paths: AppPaths) -> list[DocumentSummary]: document.document_id ), ) - for document in documents.list_documents() + for document in documents.list_documents(filters) ] @@ -171,6 +193,46 @@ def get_document_summary(paths: AppPaths, document_id: int) -> DocumentSummary | ) +def update_document_metadata( + paths: AppPaths, + document_id: int, + *, + document_type: str | None = None, + jurisdiction_code: str | None = None, + tax_year: int | None = None, + document_status: str | None = None, +) -> DocumentSummary | None: + """Update one document's user-editable metadata.""" + + update = _build_metadata_update( + document_type=document_type, + jurisdiction_code=jurisdiction_code, + tax_year=tax_year, + document_status=document_status, + ) + if not paths.database.exists(): + return None + + with connect_database(paths) as conn: + documents = FinancialIntelligenceDAO(conn) + storage = FinancialStorageDAO(conn) + if ( + update.jurisdiction_code is not None + and not documents.jurisdiction_exists(update.jurisdiction_code) + ): + raise DocumentMetadataError( + f"Unknown jurisdiction code: {update.jurisdiction_code}" + ) + document = documents.update_document_metadata(document_id, update) + if document is None: + return None + conn.commit() + return DocumentSummary( + document=document, + canonical_object=storage.find_canonical_document_object(document_id), + ) + + def hash_file(path: Path) -> str: """Return the SHA-256 hex digest for a local file.""" @@ -188,6 +250,54 @@ def guess_media_type(path: Path) -> str: return media_type or "application/octet-stream" +def _build_metadata_update( + *, + document_type: str | None, + jurisdiction_code: str | None, + tax_year: int | None, + document_status: str | None, +) -> DocumentMetadataUpdate: + normalized_type = _clean_text(document_type) + normalized_jurisdiction = _clean_jurisdiction_code(jurisdiction_code) + normalized_status = _clean_text(document_status) + if normalized_status is not None and normalized_status not in DOCUMENT_STATUSES: + raise DocumentMetadataError(f"Unknown document status: {normalized_status}") + if tax_year is not None and (tax_year < 1000 or tax_year > 9999): + raise DocumentMetadataError("Tax year must be a four-digit year.") + if all( + value is None + for value in ( + normalized_type, + normalized_jurisdiction, + tax_year, + normalized_status, + ) + ): + raise DocumentMetadataError("At least one metadata option is required.") + return DocumentMetadataUpdate( + document_type=normalized_type, + jurisdiction_code=normalized_jurisdiction, + tax_year=tax_year, + document_status=normalized_status, + ) + + +def _clean_text(value: str | None) -> str | None: + if value is None: + return None + cleaned = value.strip() + if not cleaned: + return None + return cleaned + + +def _clean_jurisdiction_code(value: str | None) -> str | None: + cleaned = _clean_text(value) + if cleaned is None: + return None + return cleaned.upper() + + def _copy_canonical_object(source_path: Path, canonical_path: Path, file_hash: str) -> None: canonical_path.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(source_path, canonical_path) diff --git a/src/bankbuddy/bb/records.py b/src/bankbuddy/bb/records.py index c239f9a..d216076 100644 --- a/src/bankbuddy/bb/records.py +++ b/src/bankbuddy/bb/records.py @@ -71,6 +71,26 @@ class DocumentRecord(DocumentCreate): document_id: int = 0 +@dataclass(frozen=True) +class DocumentListFilter: + """Optional filters for listing v2 documents.""" + + document_type: str | None = None + jurisdiction_code: str | None = None + tax_year: int | None = None + document_status: str | None = None + + +@dataclass(frozen=True) +class DocumentMetadataUpdate: + """Optional metadata changes for one v2 document.""" + + document_type: str | None = None + jurisdiction_code: str | None = None + tax_year: int | None = None + document_status: str | None = None + + @dataclass(frozen=True) class StorageRootRecord: """A configured v2 document storage root.""" diff --git a/tests/test_bb_documents_cli.py b/tests/test_bb_documents_cli.py index bdd5125..a911523 100644 --- a/tests/test_bb_documents_cli.py +++ b/tests/test_bb_documents_cli.py @@ -134,16 +134,200 @@ def test_bb_documents_list_outputs_imported_documents(tmp_path) -> None: result = runner.invoke(main, ["documents", "list"], env=env) assert result.exit_code == 0 - assert "ID | File | Status | Size | Media Type | SHA-256" in ( - result.output + assert ( + "ID | File | Type | Jurisdiction | Tax Year | Status | Size | " + "Media Type | SHA-256" + ) in result.output + assert ( + f" 1 | statement.pdf | - | - | - | active | 20 | " + f"application/pdf | {first_hash[:12]}" + ) in result.output + assert ( + f" 2 | letter.txt | - | - | - | active | 19 | " + f"text/plain | {second_hash[:12]}" + ) in result.output + + +def test_bb_documents_update_sets_metadata_and_show_reflects_it(tmp_path) -> None: + home = tmp_path / "home" + source = tmp_path / "statement.pdf" + source.write_bytes(b"%PDF-1.4 placeholder") + runner = CliRunner() + env = {"BANKBUDDY_HOME": str(home)} + runner.invoke(main, ["documents", "import", "--file", str(source)], env=env) + + result = runner.invoke( + main, + [ + "documents", + "update", + "1", + "--type", + "bank_statement", + "--jurisdiction", + "us", + "--tax-year", + "2025", + "--status", + "archived", + ], + env=env, ) - assert f" 1 | statement.pdf | active | 20 | application/pdf | {first_hash[:12]}" in ( - result.output + + assert result.exit_code == 0 + assert "Document ID: 1" in result.output + assert "Type: bank_statement" in result.output + assert "Jurisdiction: US" in result.output + assert "Tax year: 2025" in result.output + assert "Status: archived" in result.output + + show = runner.invoke(main, ["documents", "show", "1"], env=env) + assert show.exit_code == 0 + assert "Type: bank_statement" in show.output + assert "Jurisdiction: US" in show.output + assert "Tax year: 2025" in show.output + assert "Status: archived" in show.output + + with connect_database(resolve_app_paths(home)) as conn: + document = conn.execute( + """ + select document_type, jurisdiction_code, tax_year, document_status + from BB_DOCUMENT + where document_id = 1 + """ + ).fetchone() + + assert document["document_type"] == "bank_statement" + assert document["jurisdiction_code"] == "US" + assert document["tax_year"] == 2025 + assert document["document_status"] == "archived" + + +def test_bb_documents_list_filters_by_metadata(tmp_path) -> None: + home = tmp_path / "home" + first_source = tmp_path / "statement.pdf" + second_source = tmp_path / "tax.pdf" + first_source.write_bytes(b"%PDF-1.4 statement") + second_source.write_bytes(b"%PDF-1.4 tax") + runner = CliRunner() + env = {"BANKBUDDY_HOME": str(home)} + runner.invoke(main, ["documents", "import", "--file", str(first_source)], env=env) + runner.invoke(main, ["documents", "import", "--file", str(second_source)], env=env) + runner.invoke( + main, + [ + "documents", + "update", + "1", + "--type", + "bank_statement", + "--jurisdiction", + "US", + "--tax-year", + "2025", + ], + env=env, ) - assert f" 2 | letter.txt | active | 19 | text/plain | {second_hash[:12]}" in ( - result.output + runner.invoke( + main, + [ + "documents", + "update", + "2", + "--type", + "tax_document", + "--jurisdiction", + "IN", + "--tax-year", + "2024", + "--status", + "archived", + ], + env=env, ) + result = runner.invoke( + main, + [ + "documents", + "list", + "--type", + "bank_statement", + "--jurisdiction", + "us", + "--tax-year", + "2025", + "--status", + "active", + ], + env=env, + ) + + assert result.exit_code == 0 + assert "statement.pdf" in result.output + assert "bank_statement" in result.output + assert "US" in result.output + assert "2025" in result.output + assert "tax.pdf" not in result.output + assert "tax_document" not in result.output + + +def test_bb_documents_update_requires_metadata_options(tmp_path) -> None: + home = tmp_path / "home" + source = tmp_path / "statement.pdf" + source.write_bytes(b"%PDF-1.4 placeholder") + runner = CliRunner() + env = {"BANKBUDDY_HOME": str(home)} + runner.invoke(main, ["documents", "import", "--file", str(source)], env=env) + + result = runner.invoke(main, ["documents", "update", "1"], env=env) + + assert result.exit_code == 1 + assert "At least one metadata option is required." in result.output + + +def test_bb_documents_update_fails_for_unknown_jurisdiction(tmp_path) -> None: + home = tmp_path / "home" + source = tmp_path / "statement.pdf" + source.write_bytes(b"%PDF-1.4 placeholder") + runner = CliRunner() + env = {"BANKBUDDY_HOME": str(home)} + runner.invoke(main, ["documents", "import", "--file", str(source)], env=env) + + result = runner.invoke( + main, + [ + "documents", + "update", + "1", + "--jurisdiction", + "zz", + ], + env=env, + ) + + assert result.exit_code == 1 + assert "Unknown jurisdiction code: ZZ" in result.output + + +def test_bb_documents_update_fails_for_missing_document(tmp_path) -> None: + home = tmp_path / "home" + + result = CliRunner().invoke( + main, + [ + "documents", + "update", + "999", + "--type", + "bank_statement", + ], + env={"BANKBUDDY_HOME": str(home)}, + ) + + assert result.exit_code == 1 + assert "Document not found: 999" in result.output + def test_bb_documents_show_outputs_document_and_object_details(tmp_path) -> None: home = tmp_path / "home"