Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .ai-context/COMMANDS.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ bb init
bb status
bb documents import --dry-run --file path/to/document.pdf
bb documents import --file path/to/document.pdf
bb documents list
bb documents show 1
```

`bb` is the side-by-side command surface for new `BB_` schema work. It should
Expand All @@ -45,6 +47,8 @@ surfaces remain available. Use `bb init` to apply current migrations and
prepare v2 `financial/` storage roots for the active data home.
`bb documents import` is parser-free: it hashes the explicit file, records v2
document/object metadata, and copies the canonical object into managed storage.
Use `bb documents list` and `bb documents show DOCUMENT_ID` to inspect imported
v2 documents without opening SQLite directly.

## BankBuddy CLI

Expand Down
6 changes: 4 additions & 2 deletions .ai-context/STATUS.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ section is `Unreleased`.
- Banking CLI commands for banks, accounts, statement refs, imports,
transactions, categories, reports, exports, storage migration, and status.
- Side-by-side `bb` CLI for v2 financial intelligence initialization,
foundation status, storage readiness, generic document import, and `BB_`
schema visibility.
foundation status, storage readiness, generic document import, document
inventory inspection, and `BB_` schema visibility.
- Supported banking imports for Bank of America PDF/CSV, Apple Card PDF, ICICI
`.xls`, and HDFC `.xls`.
- Statement inventory and statement coverage auditing.
Expand All @@ -35,6 +35,8 @@ section is `Unreleased`.
without depending on the legacy `bankbuddy` command surface.
- Generic `bb documents import` for parser-free explicit-file intake into
`BB_DOCUMENT` and `BB_DOCUMENT_OBJECT` with SHA-256 canonical storage keys.
- `bb documents list/show` for read-only inspection of generic v2 document and
canonical object metadata.
- Prospective relicensing from MIT to AGPL-3.0-or-later.
- Canonical data-home layout with `database/`, `bank/`, and `tax/` directories.
- First TaxBuddy CLI slice and `tax_documents` metadata index.
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ and versions are tracked in the repo-root `VERSION` file.

### Added

- Added `bb documents list` and `bb documents show DOCUMENT_ID` for read-only
inspection of generic v2 document records and canonical object metadata.
- Added `bb documents import --dry-run --file ...` and
`bb documents import --file ...` for parser-free v2 document intake with
SHA-256 canonical object storage and idempotent duplicate handling.
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ bb init
bb status
bb documents import --dry-run --file path/to/document.pdf
bb documents import --file path/to/document.pdf
bb documents list
bb documents show 1
bankbuddy --help
bankbuddy status
bankbuddy init
Expand Down Expand Up @@ -170,6 +172,8 @@ Use `bb documents import` for parser-free v2 document intake. It hashes the
file, records a `BB_DOCUMENT`, stores a canonical object under
`financial/canonical`, and leaves bank-specific parsing or inference for later
workflows.
Use `bb documents list` and `bb documents show DOCUMENT_ID` to inspect the
generic v2 document inventory and canonical object metadata.

Switch the current shell by exporting `BANKBUDDY_ENV`:

Expand Down
114 changes: 114 additions & 0 deletions src/bankbuddy/bb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
from bankbuddy import __version__
from bankbuddy.database import initialize_database
from bankbuddy.bb.documents import DocumentImportError
from bankbuddy.bb.documents import DocumentSummary
from bankbuddy.bb.documents import get_document_summary
from bankbuddy.bb.documents import import_document
from bankbuddy.bb.documents import list_documents
from bankbuddy.bb.documents import plan_document_import
from bankbuddy.paths import resolve_app_paths
from bankbuddy.bb.storage import ensure_financial_storage_dirs
Expand Down Expand Up @@ -169,6 +172,31 @@ def documents() -> None:
"""Manage v2 documents."""


@documents.command("list")
@click.pass_context
def documents_list(ctx: click.Context) -> None:
"""List imported v2 documents."""

runtime = runtime_from_context(ctx)
paths = resolve_app_paths(environment=runtime.environment)
rows = list_documents(paths)
render_document_table(rows)


@documents.command("show")
@click.argument("document_id", type=int)
@click.pass_context
def documents_show(ctx: click.Context, document_id: int) -> None:
"""Show one imported v2 document."""

runtime = runtime_from_context(ctx)
paths = resolve_app_paths(environment=runtime.environment)
summary = get_document_summary(paths, document_id)
if summary is None:
raise click.ClickException(f"Document not found: {document_id}")
render_document_summary(summary)


@documents.command("import")
@click.option("--dry-run", is_flag=True, help="Plan the import without writes.")
@click.option(
Expand Down Expand Up @@ -230,6 +258,92 @@ def _print_document_import_plan(plan, *, dry_run: bool) -> None:
click.echo(f"Canonical object: {plan.canonical_relative_path}")


def render_document_table(rows: list[DocumentSummary]) -> None:
"""Render v2 documents as a compact pretty table."""

table = [
[
str(row.document.document_id),
row.document.original_file_name,
row.document.document_status,
str(row.canonical_object.byte_size)
if row.canonical_object and row.canonical_object.byte_size is not None
else "-",
row.canonical_object.media_type if row.canonical_object else "-",
row.document.file_hash[:12],
]
for row in rows
]
render_pretty_table(
["ID", "File", "Status", "Size", "Media Type", "SHA-256"],
table,
align_right={0, 3},
)


def render_document_summary(summary: DocumentSummary) -> None:
"""Render one v2 document detail view."""

document = summary.document
canonical_object = summary.canonical_object
click.echo(f"Document ID: {document.document_id}")
click.echo(f"Original file: {document.original_file_name}")
click.echo(f"SHA-256: {document.file_hash}")
click.echo(f"Status: {document.document_status}")
click.echo(f"Type: {_display_value(document.document_type)}")
click.echo(f"Jurisdiction: {_display_value(document.jurisdiction_code)}")
click.echo(f"Tax year: {_display_value(document.tax_year)}")
if canonical_object is None:
click.echo("Canonical object ID: -")
click.echo("Canonical object: -")
return
click.echo(f"Canonical object ID: {canonical_object.document_object_id}")
click.echo(f"Canonical object: financial/canonical/{canonical_object.object_key}")
click.echo(f"Media type: {_display_value(canonical_object.media_type)}")
click.echo(f"Size: {_display_value(canonical_object.byte_size)} bytes")


def render_pretty_table(
headers: list[str],
rows: list[list[str]],
*,
align_right: set[int] | None = None,
) -> None:
"""Render a small pretty table with vertical separators."""

align_right = align_right or set()
widths = [
max(len(headers[index]), *(len(row[index]) for row in rows))
for index in range(len(headers))
]
click.echo(_pretty_row(headers, widths, align_right=set()))
click.echo("-+-".join("-" * width for width in widths))
for row in rows:
click.echo(_pretty_row(row, widths, align_right=align_right))


def _pretty_row(
values: list[str],
widths: list[int],
*,
align_right: set[int],
) -> str:
cells = []
for index, value in enumerate(values):
width = widths[index]
if index in align_right:
cells.append(value.rjust(width))
else:
cells.append(value.ljust(width))
return " | ".join(cells)


def _display_value(value) -> str:
if value is None:
return "-"
return str(value)


def _v2_storage_ready(paths) -> bool:
return all(
path.is_dir()
Expand Down
45 changes: 45 additions & 0 deletions src/bankbuddy/bb/dao.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,51 @@ def find_document_by_hash(self, file_hash: str) -> DocumentRecord | None:
return None
return _document_from_row(row)

def get_document(self, document_id: int) -> DocumentRecord | None:
"""Return one v2 document by id."""

row = self._conn.execute(
"""
select
document_id,
file_hash,
original_file_name,
canonical_file_name,
source_uri,
document_type,
jurisdiction_code,
tax_year,
document_status
from BB_DOCUMENT
where document_id = ?
""",
(document_id,),
).fetchone()
if row is None:
return None
return _document_from_row(row)

def list_documents(self) -> list[DocumentRecord]:
"""Return v2 documents ordered by id."""

rows = self._conn.execute(
"""
select
document_id,
file_hash,
original_file_name,
canonical_file_name,
source_uri,
document_type,
jurisdiction_code,
tax_year,
document_status
from BB_DOCUMENT
order by document_id
"""
).fetchall()
return [_document_from_row(row) for row in rows]

def create_entity(self, record: EntityCreate) -> EntityRecord:
"""Create a v2 entity row."""

Expand Down
46 changes: 46 additions & 0 deletions src/bankbuddy/bb/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ class DocumentImportResult:
duplicate: bool


@dataclass(frozen=True)
class DocumentSummary:
"""Read-only document summary with canonical object metadata."""

document: DocumentRecord
canonical_object: DocumentObjectRecord | None


class DocumentImportError(ValueError):
"""Raised when a generic document import cannot be planned or completed."""

Expand Down Expand Up @@ -125,6 +133,44 @@ def import_document(paths: AppPaths, source_path: Path) -> DocumentImportResult:
)


def list_documents(paths: AppPaths) -> list[DocumentSummary]:
"""Return imported v2 documents with canonical object metadata."""

if not paths.database.exists():
return []

with connect_database(paths) as conn:
documents = FinancialIntelligenceDAO(conn)
storage = FinancialStorageDAO(conn)
return [
DocumentSummary(
document=document,
canonical_object=storage.find_canonical_document_object(
document.document_id
),
)
for document in documents.list_documents()
]


def get_document_summary(paths: AppPaths, document_id: int) -> DocumentSummary | None:
"""Return one imported v2 document with canonical object metadata."""

if not paths.database.exists():
return None

with connect_database(paths) as conn:
documents = FinancialIntelligenceDAO(conn)
storage = FinancialStorageDAO(conn)
document = documents.get_document(document_id)
if document is None:
return None
return DocumentSummary(
document=document,
canonical_object=storage.find_canonical_document_object(document_id),
)


def hash_file(path: Path) -> str:
"""Return the SHA-256 hex digest for a local file."""

Expand Down
33 changes: 33 additions & 0 deletions src/bankbuddy/bb/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,39 @@ def find_document_object(
return None
return _document_object_from_row(row)

def find_canonical_document_object(
self,
document_id: int,
) -> DocumentObjectRecord | None:
"""Return the canonical object for a document when one exists."""

row = self._conn.execute(
"""
select
BB_DOCUMENT_OBJECT.document_object_id,
BB_DOCUMENT_OBJECT.document_id,
BB_STORAGE_ROOT.storage_root_code,
BB_DOCUMENT_OBJECT.object_key,
BB_DOCUMENT_OBJECT.object_role,
BB_DOCUMENT_OBJECT.content_hash,
BB_DOCUMENT_OBJECT.byte_size,
BB_DOCUMENT_OBJECT.media_type,
BB_DOCUMENT_OBJECT.original_file_name,
BB_DOCUMENT_OBJECT.storage_root_id
from BB_DOCUMENT_OBJECT
join BB_STORAGE_ROOT using (storage_root_id)
where
BB_DOCUMENT_OBJECT.document_id = ?
and BB_DOCUMENT_OBJECT.object_role = 'canonical'
order by BB_DOCUMENT_OBJECT.document_object_id
limit 1
""",
(document_id,),
).fetchone()
if row is None:
return None
return _document_object_from_row(row)

def create_document_view(
self,
record: DocumentViewCreate,
Expand Down
Loading
Loading