From 000c7fb5900cac24210a4694356f1c0285f93162 Mon Sep 17 00:00:00 2001 From: "Joseph T. French" Date: Sat, 25 Apr 2026 21:38:25 -0500 Subject: [PATCH 1/7] feat: Implement rendering projections and update fact retrieval logic for statement envelopes --- .../graphql/types/information_block.py | 48 +++ robosystems/models/api/information_block.py | 116 ++++++ .../operations/information_block/statement.py | 364 ++++++++++++++++-- .../test_statement_handlers.py | 38 +- 4 files changed, 528 insertions(+), 38 deletions(-) diff --git a/robosystems/graphql/types/information_block.py b/robosystems/graphql/types/information_block.py index 6f55d46f..486ce8d2 100644 --- a/robosystems/graphql/types/information_block.py +++ b/robosystems/graphql/types/information_block.py @@ -43,6 +43,15 @@ from robosystems.models.api.information_block import ( InformationModelResponse as PydanticInformationModel, ) +from robosystems.models.api.information_block import ( + RenderingLite as PydanticRendering, +) +from robosystems.models.api.information_block import ( + RenderingPeriodLite as PydanticRenderingPeriod, +) +from robosystems.models.api.information_block import ( + RenderingRowLite as PydanticRenderingRow, +) from robosystems.models.api.information_block import ( RuleLite as PydanticRule, ) @@ -52,9 +61,15 @@ from robosystems.models.api.information_block import ( RuleVariableLite as PydanticRuleVariable, ) +from robosystems.models.api.information_block import ( + ValidationLite as PydanticValidation, +) from robosystems.models.api.information_block import ( VerificationResultLite as PydanticVerificationResult, ) +from robosystems.models.api.information_block import ( + ViewProjections as PydanticViewProjections, +) # ── Leaf types — auto-derived from Pydantic ──────────────────────────────── @@ -115,6 +130,31 @@ class InformationBlockVerificationResult: """Persisted outcome of a rule evaluation.""" +@strawberry.experimental.pydantic.type(model=PydanticRenderingRow, all_fields=True) +class InformationBlockRenderingRow: + """One row of a server-side rendered statement.""" + + +@strawberry.experimental.pydantic.type(model=PydanticRenderingPeriod, all_fields=True) +class InformationBlockRenderingPeriod: + """One period column in a rendered statement.""" + + +@strawberry.experimental.pydantic.type(model=PydanticValidation, all_fields=True) +class InformationBlockValidation: + """Outcome of guard-rail validation on a rendered statement.""" + + +@strawberry.experimental.pydantic.type(model=PydanticRendering, all_fields=True) +class InformationBlockRendering: + """Pre-computed rendering projection — rows + periods + validation.""" + + +@strawberry.experimental.pydantic.type(model=PydanticViewProjections, all_fields=True) +class InformationBlockViewProjections: + """Charlie's six type-of View arms surfaced in the envelope.""" + + # Mechanics + template are exposed as ``scalars.JSON`` with a ``kind`` # discriminator embedded in the payload — see the module docstring for # why this is preferred over a typed Strawberry union. @@ -181,6 +221,8 @@ class InformationBlock: fact_set: InformationBlockFactSet | None verification_results: list[InformationBlockVerificationResult] + view: InformationBlockViewProjections + @classmethod def from_pydantic(cls, envelope: PydanticInformationBlock) -> InformationBlock: return cls( @@ -212,6 +254,7 @@ def from_pydantic(cls, envelope: PydanticInformationBlock) -> InformationBlock: InformationBlockVerificationResult.from_pydantic(vr) for vr in envelope.verification_results ], + view=InformationBlockViewProjections.from_pydantic(envelope.view), ) @@ -223,9 +266,14 @@ def from_pydantic(cls, envelope: PydanticInformationBlock) -> InformationBlock: "InformationBlockElement", "InformationBlockFact", "InformationBlockFactSet", + "InformationBlockRendering", + "InformationBlockRenderingPeriod", + "InformationBlockRenderingRow", "InformationBlockRule", "InformationBlockRuleTarget", "InformationBlockRuleVariable", + "InformationBlockValidation", "InformationBlockVerificationResult", + "InformationBlockViewProjections", "InformationModel", ] diff --git a/robosystems/models/api/information_block.py b/robosystems/models/api/information_block.py index 3730b9c2..30b8749d 100644 --- a/robosystems/models/api/information_block.py +++ b/robosystems/models/api/information_block.py @@ -485,6 +485,106 @@ class ArtifactResponse(BaseModel): mechanics: ArtifactMechanics +# ── View projections ─────────────────────────────────────────────────────── + + +class RenderingRowLite(BaseModel): + """One row of a server-side rendered statement. + + Mirrors :class:`FactRow` from the legacy + :mod:`robosystems.operations.roboledger.reports.fact_grid` but lives at + the API boundary so envelope consumers don't depend on the + fact-grid module. ``values`` is one entry per period column in + :class:`RenderingLite.periods`. + """ + + model_config = ConfigDict(from_attributes=True) + + element_id: str + element_qname: str | None = None + element_name: str + classification: str | None = Field( + None, + description=( + "FASB elementsOfFinancialStatements trait identifier — 'asset', " + "'liability', 'equity', 'revenue', 'expense'. Surfaced so the " + "viewer can color-code or group rows without a follow-up trait " + "lookup." + ), + ) + balance_type: str | None = None + values: list[float | None] = Field(default_factory=list) + is_subtotal: bool = False + depth: int = 0 + + +class RenderingPeriodLite(BaseModel): + """One period column in a rendered statement.""" + + model_config = ConfigDict(from_attributes=True) + + start: date + end: date + label: str | None = None + + +class ValidationLite(BaseModel): + """Outcome of guard-rail validation on a rendered statement. + + Distinct from :class:`VerificationResultLite` (which surfaces the + rule-engine outcomes from ``public.verification_results``). This lite + type carries the synchronous guard-rail checks computed at + envelope-build time — accounting equation, totals foot, etc. + """ + + model_config = ConfigDict(from_attributes=True) + + passed: bool = True + checks: list[str] = Field(default_factory=list) + failures: list[str] = Field(default_factory=list) + warnings: list[str] = Field(default_factory=list) + + +class RenderingLite(BaseModel): + """Pre-computed rendering projection of an Information Block. + + Computed server-side at envelope-build time for blocks where rendering + is deterministic (the statement family today; future block types add + their own rendering builders). The frontend's ``BlockView`` + ``Rendering`` projection consumes this directly — no client-side + rollup, depth computation, or calculation walk needed. + """ + + model_config = ConfigDict(from_attributes=True) + + rows: list[RenderingRowLite] = Field(default_factory=list) + periods: list[RenderingPeriodLite] = Field(default_factory=list) + validation: ValidationLite | None = None + unmapped_count: int = 0 + + +class ViewProjections(BaseModel): + """Charlie's six ``type-of View`` arms, surfaced at the envelope boundary. + + Each projection is computed server-side at envelope-build time when + its source data is available. The frontend's ``BlockView`` dispatcher + routes to the projection component matching the user's selected view + mode; missing projections (those still in backlog) render as empty + states without breaking the dispatcher. + + Today: ``rendering`` is computed for the statement family. + Other arms (``fact_table``, ``model_structure``, ``verification_results``, + ``report_elements``, ``business_rules``) come online as their backend + support lands; ``fact_table`` is trivially derivable from + ``InformationBlockEnvelope.facts`` and may stay as a frontend-only + projection. + """ + + model_config = ConfigDict(from_attributes=True) + + rendering: RenderingLite | None = None + + # ── Envelope root ────────────────────────────────────────────────────────── @@ -544,6 +644,17 @@ class InformationBlockEnvelope(BaseModel): ) verification_results: list[VerificationResultLite] = Field(default_factory=list) + view: ViewProjections = Field( + default_factory=ViewProjections, + description=( + "Server-computed view projections (Charlie's six type-of View " + "arms). ``view.rendering`` carries pre-computed rows + periods + " + "validation for blocks where rendering is deterministic (the " + "statement family today). Other projections come online as " + "their backend support lands — see :class:`ViewProjections`." + ), + ) + # ── Request models ───────────────────────────────────────────────────────── @@ -702,11 +813,16 @@ class EvaluateRulesResponse(BaseModel): "InformationBlockEnvelope", "InformationModelResponse", "MetricMechanics", + "RenderingLite", + "RenderingPeriodLite", + "RenderingRowLite", "RuleLite", "RuleTargetLite", "RuleVariableLite", "ScheduleMechanics", "StatementMechanics", "UpdateInformationBlockRequest", + "ValidationLite", "VerificationResultLite", + "ViewProjections", ] diff --git a/robosystems/operations/information_block/statement.py b/robosystems/operations/information_block/statement.py index df6dbeb6..53d94734 100644 --- a/robosystems/operations/information_block/statement.py +++ b/robosystems/operations/information_block/statement.py @@ -15,29 +15,57 @@ ``dispatch_create``/``update``/``delete`` handlers in the registry entry are the not-implemented stubs built by ``make_not_implemented_handler``. + +**Rendering projection.** As of Plan B (2026-04-25) the envelope +populates ``view.rendering`` with the server-computed statement grid +(rows + periods + validation). This replaces the legacy +``getStatement(reportId, structureType)`` REST path: frontend +``BlockView`` consumes ``envelope.view.rendering`` directly, no +client-side rollup or hierarchy walk needed. The pure in-memory rollup +helpers (``_build_rows``, ``_facts_to_balance_dict``, ``_natural_sign``) +are imported from +:mod:`robosystems.operations.roboledger.reports.fact_grid`; the +hierarchy + calculations + classifications are derived from the +already-loaded envelope atoms (no redundant SQL). """ from __future__ import annotations from collections.abc import Callable +from datetime import date from functools import partial from typing import TYPE_CHECKING -from sqlalchemy import select +from sqlalchemy import select, text from robosystems.models.api.information_block import ( ArtifactResponse, InformationBlockEnvelope, InformationModelResponse, + RenderingLite, + RenderingPeriodLite, + RenderingRowLite, StatementMechanics, + ValidationLite, + ViewProjections, ) -from robosystems.models.extensions.roboledger import Fact, Report +from robosystems.models.extensions import Association, Element +from robosystems.models.extensions.roboledger import Fact from robosystems.operations.information_block.envelope import ( association_to_connection, element_to_lite, fact_to_lite, load_base_envelope_atoms, ) +from robosystems.operations.roboledger.reports.fact_grid import ( + FactRow, + PeriodSpec, + ReportFact, + _Balance, # type: ignore[reportPrivateUsage] + _build_rows, # type: ignore[reportPrivateUsage] + _HierarchyNode, # type: ignore[reportPrivateUsage] +) +from robosystems.operations.roboledger.reports.guard_rails import validate_report if TYPE_CHECKING: from sqlalchemy.orm import Session @@ -67,16 +95,17 @@ def _build_statement_envelope( expected block_type — lets :func:`get_information_block` cleanly return nothing to the caller. - Surfaces facts from the **most recent** Report that has at least one - fact for this structure's elements. Scoping by element membership — - rather than taking the latest Report of any type — avoids an empty - envelope when a tenant's most recent report is for a different - statement (e.g. asking for the BS envelope when the newest report is - an IS). On the library sentinel the search_path is ``public`` and the - Report table is empty, so ``facts`` comes back empty, which is the - correct behaviour for the sentinel. A future revision of this - behaviour will replace the heuristic with explicit ``fact_set_id`` - selection once write paths stamp FactSet rows on every report. + **Read path (Plan B, Apr 2026).** Facts are loaded by `fact_set_id` — + the canonical Block-instance pin per Charlie's PDF synonymy ("Block + and Fact Set are synonyms"). ``load_base_envelope_atoms`` already + loads the latest FactSet for this Structure (ordered by + ``period_end`` desc, ``created_at`` desc); we filter facts by that + FactSet's id. On the library sentinel and on tenant graphs with no + generated reports the FactSet is null and ``facts`` comes back empty + — the correct behaviour for both. This replaces the prior + "latest Report touching these elements" heuristic; writes have + stamped both ``report_id`` and ``fact_set_id`` since Phase gamma.1 so + the switch is a strict simplification (one less query per envelope). """ atoms = load_base_envelope_atoms( session, structure_id, expected_block_type=block_type @@ -88,26 +117,17 @@ def _build_statement_envelope( element_ids = atoms.element_ids facts: list[Fact] = [] - if element_ids: - latest_report_id = session.execute( - select(Report.id) - .join(Fact, Fact.report_id == Report.id) - .where(Fact.element_id.in_(element_ids)) - .order_by(Report.created_at.desc()) - .limit(1) - ).scalar() - - if latest_report_id is not None: - facts = list( - session.execute( - select(Fact).where( - Fact.report_id == latest_report_id, - Fact.element_id.in_(element_ids), - ) + if element_ids and atoms.fact_set is not None: + facts = list( + session.execute( + select(Fact).where( + Fact.fact_set_id == atoms.fact_set.id, + Fact.element_id.in_(element_ids), ) - .scalars() - .all() ) + .scalars() + .all() + ) # Mechanics are read from the typed ``artifact_mechanics`` column when # populated; library-seeded rows that haven't been enriched fall back @@ -117,6 +137,18 @@ def _build_statement_envelope( else: mechanics = StatementMechanics(kind="statement_renderer") + # Compute the Rendering view projection from already-loaded atoms + + # the loaded facts. Adds one classification lookup query; everything + # else (hierarchy, calculations, root order) is derived in-memory. + rendering = _build_statement_rendering( + session, + elements=atoms.elements, + associations=atoms.associations, + facts=facts, + structure_id=structure.id, + block_type=block_type, + ) + display_name, _display_plural = STATEMENT_DISPLAY[block_type] return InformationBlockEnvelope( id=structure.id, @@ -145,7 +177,279 @@ def _build_statement_envelope( rules=atoms.rules, fact_set=atoms.fact_set, verification_results=atoms.verification_results, + view=ViewProjections(rendering=rendering), + ) + + +# ── Rendering projection — server-side computed at envelope build ───────── + + +def _build_statement_rendering( + session: Session, + *, + elements: list[Element], + associations: list[Association], + facts: list[Fact], + structure_id: str, + block_type: str, +) -> RenderingLite: + """Compute the Rendering view projection for a statement-family block. + + Reuses the pure in-memory rollup logic from + :mod:`robosystems.operations.roboledger.reports.fact_grid` (``_build_rows``) + and the guard-rail checks from + :mod:`robosystems.operations.roboledger.reports.guard_rails` + (``validate_report``). Hierarchy and calculations are derived from the + already-loaded ``associations`` (no redundant SQL); classifications are + fetched in a single trait-axis query. + + Empty-fact case: returns ``RenderingLite`` with an empty ``rows`` list + and an empty ``periods`` list — the envelope still validates and the + frontend renders an empty state. + """ + # 1) Empty-fact short-circuit — no rendering work to do, no + # classification query needed. + if not facts: + return RenderingLite(rows=[], periods=[], validation=None, unmapped_count=0) + + # 2) Look up element metadata + per-element classification (FASB + # elementsOfFinancialStatements trait axis). Single batched query. + element_ids = [e.id for e in elements] + classification_by_id = _load_element_classifications(session, element_ids) + elements_by_id = {e.id: e for e in elements} + + # 3) Derive periods from facts. Order by (period_end, period_start) so + # comparative columns appear in chronological order. + period_keys: set[tuple[date, date]] = set() + for f in facts: + pe = f.period_end + ps = f.period_start if f.period_start is not None else pe + period_keys.add((ps, pe)) + if not period_keys: + return RenderingLite(rows=[], periods=[], validation=None, unmapped_count=0) + + ordered_periods = sorted(period_keys, key=lambda pk: (pk[1], pk[0])) + periods: list[PeriodSpec] = [ + PeriodSpec(start=ps, end=pe, label="") for ps, pe in ordered_periods + ] + + # 3) Convert envelope Fact ORM rows to ReportFact dataclasses (the + # shape ``_build_rows`` consumes downstream). Facts already carry + # natural-sign values from generate_report_facts at write time, so + # _build_rows is called with pre_signed=True. + report_facts: list[ReportFact] = [] + for f in facts: + elem = elements_by_id.get(f.element_id) + if elem is None: + continue + pe = f.period_end + ps = f.period_start if f.period_start is not None else pe + report_facts.append( + ReportFact( + element_id=f.element_id, + element_qname=elem.qname or "", + element_name=elem.name, + classification=classification_by_id.get(f.element_id, ""), + balance_type=elem.balance_type or "debit", + value=float(f.value), + period_start=ps, + period_end=pe, + period_type=f.period_type, + ) + ) + + # 4) Build hierarchy + calculations from already-loaded associations. + hierarchy = _build_hierarchy_from_atoms( + structure_id, elements_by_id, classification_by_id, associations ) + if not hierarchy: + return RenderingLite(rows=[], periods=[], validation=None, unmapped_count=0) + + calculations = _calculations_from_associations(associations) + + # 5) Per-period balance dicts (one per column). + period_balances = [ + _facts_to_balance_dict_for_period(report_facts, p.start, p.end) for p in periods + ] + + # 6) The pure in-memory rollup walker — produces FactRow per row with + # depth + is_subtotal + per-period values. + rows: list[FactRow] = _build_rows( + hierarchy, period_balances, calculations, pre_signed=True + ) + + # 7) Validation — guard-rail checks on the rendered rows. + validation_result = validate_report(block_type, rows) + + return RenderingLite( + rows=[ + RenderingRowLite( + element_id=r.element_id, + element_qname=r.element_qname, + element_name=r.element_name, + classification=r.classification or None, + balance_type=r.balance_type, + values=list(r.values), + is_subtotal=r.is_subtotal, + depth=r.depth, + ) + for r in rows + ], + periods=[ + RenderingPeriodLite(start=p.start, end=p.end, label=p.label or None) + for p in periods + ], + validation=ValidationLite( + passed=validation_result.passed, + checks=list(validation_result.checks), + failures=list(validation_result.failures), + warnings=list(validation_result.warnings), + ), + unmapped_count=0, + ) + + +def _facts_to_balance_dict_for_period( + facts: list[ReportFact], + period_start: date, + period_end: date, +) -> dict[str, _Balance]: + """Build a `_Balance` dict for one period from in-memory `ReportFact` rows. + + Mirrors :func:`fact_grid._facts_to_balance_dict` but lives here so the + envelope rendering path stays self-contained. Facts are already + natural-signed by ``generate_report_facts``, so ``balance_type`` is + set to "debit" to keep ``_build_rows`` from re-applying sign + conversion (it dispatches on balance_type when ``pre_signed=False``; + with ``pre_signed=True`` the balance is returned as-is). + """ + balances: dict[str, _Balance] = {} + for fact in facts: + if fact.period_start == period_start and fact.period_end == period_end: + balances[fact.element_id] = _Balance( + element_id=fact.element_id, + qname=fact.element_qname, + name=fact.element_name, + classification=fact.classification, + balance_type="debit", + total_debits=0.0, + total_credits=0.0, + net_balance=fact.value, + ) + return balances + + +def _build_hierarchy_from_atoms( + structure_id: str, + elements_by_id: dict[str, Element], + classification_by_id: dict[str, str], + associations: list[Association], +) -> list[_HierarchyNode]: + """Build the presentation hierarchy from already-loaded atoms. + + Replaces the SQL-heavy ``fact_grid._load_reporting_structure``: works + off the associations + elements that ``load_base_envelope_atoms`` + already loaded, no extra queries. + + Roots are presentation associations whose ``from_element_id`` equals + the ``structure_id`` (the root-anchor convention seeded by + :mod:`robosystems.taxonomy.seed`). Children are walked depth-first + via the parent → child mapping, sorted by ``order_value``. + """ + # Presentation children grouped by parent_id (preserving order_value). + children_by_parent: dict[str, list[tuple[float, str]]] = {} + for a in associations: + if a.association_type != "presentation": + continue + if a.from_element_id is None or a.to_element_id is None: + continue + order = a.order_value if a.order_value is not None else float("inf") + children_by_parent.setdefault(a.from_element_id, []).append( + (order, a.to_element_id) + ) + + for parent_id in children_by_parent: + children_by_parent[parent_id].sort(key=lambda pair: pair[0]) + + # Roots = children of the structure_id anchor (sorted). + root_ids = [child_id for _, child_id in children_by_parent.get(structure_id, [])] + + def _make_node(element_id: str, depth: int) -> _HierarchyNode: + elem = elements_by_id.get(element_id) + node = _HierarchyNode( + element_id=element_id, + qname=(elem.qname if elem is not None else None) or "", + name=elem.name if elem is not None else "", + classification=classification_by_id.get(element_id, ""), + balance_type=(elem.balance_type if elem is not None else None) or "debit", + is_abstract=bool(elem.is_abstract) if elem is not None else False, + depth=depth, + ) + for _order, child_id in children_by_parent.get(element_id, []): + # Skip self-references (defensive — root anchors live above). + if child_id == element_id: + continue + node.children.append(_make_node(child_id, depth + 1)) + return node + + return [_make_node(rid, 0) for rid in root_ids] + + +def _calculations_from_associations( + associations: list[Association], +) -> dict[str, list[tuple[str, float]]]: + """Project calculation associations into the dict shape `_build_rows` expects. + + Mirrors :func:`fact_grid._load_calculations` but works off the already- + loaded associations list — no SQL. + """ + calculations: dict[str, list[tuple[str, float]]] = {} + ordered = sorted( + associations, + key=lambda a: a.order_value if a.order_value is not None else float("inf"), + ) + for a in ordered: + if a.association_type != "calculation": + continue + if a.from_element_id is None or a.to_element_id is None: + continue + weight = a.weight if a.weight is not None else 1.0 + calculations.setdefault(a.from_element_id, []).append((a.to_element_id, weight)) + return calculations + + +def _load_element_classifications( + session: Session, element_ids: list[str] +) -> dict[str, str]: + """Fetch primary FASB elementsOfFinancialStatements trait per element. + + Returns ``{element_id: identifier}`` where identifier is one of + ``'asset'`` / ``'liability'`` / ``'equity'`` / ``'revenue'`` / + ``'expense'`` (the FASB SFAC 6 axis). Elements with no primary trait + on this axis are absent from the dict — callers default to the empty + string for the resulting :class:`RenderingRowLite.classification`. + + Single batched query; mirrors the LEFT JOIN that the legacy + :func:`fact_grid._load_reporting_structure` does inline, but separates + the trait lookup from the hierarchy walk. + """ + if not element_ids: + return {} + placeholders = ", ".join(f":e{i}" for i in range(len(element_ids))) + params = {f"e{i}": eid for i, eid in enumerate(element_ids)} + rows = session.execute( + text(f""" + SELECT et.element_id, cls.identifier + FROM element_traits et + JOIN classifications cls + ON cls.id = et.trait_id + AND cls.category = 'elementsOfFinancialStatements' + WHERE et.is_primary = TRUE + AND et.element_id IN ({placeholders}) + """), + params, + ).all() + return {row.element_id: row.identifier for row in rows} def make_statement_handlers( diff --git a/tests/operations/information_block/test_statement_handlers.py b/tests/operations/information_block/test_statement_handlers.py index 45064f21..a241382f 100644 --- a/tests/operations/information_block/test_statement_handlers.py +++ b/tests/operations/information_block/test_statement_handlers.py @@ -256,7 +256,16 @@ def test_loads_elements_and_associations_from_library_seed(self) -> None: assert {e.id for e in envelope.elements} == {"elem_revenue", "elem_sales"} assert envelope.facts == [] - def test_facts_populated_from_most_recent_report(self) -> None: + def test_facts_populated_from_latest_fact_set(self) -> None: + """Plan B: envelope reads facts via the canonical fact_set_id pin. + + The previous behaviour scanned for the latest Report touching any of + the structure's elements and filtered by report_id. After Plan B + (Apr 2026) the read path uses the FactSet (= the Block instance per + Charlie's PDF) loaded by ``load_latest_fact_set_for_structure``, + eliminating one query and aligning with the canonical pin used by + Report Block items. + """ session = MagicMock() structure = _make_statement_structure( structure_id="struct_balance_sheet", @@ -285,6 +294,15 @@ def test_facts_populated_from_most_recent_report(self) -> None: element.balance_type = "debit" element.period_type = "instant" + fact_set = MagicMock() + fact_set.id = "fset_balance_sheet_2026q1" + fact_set.structure_id = "struct_balance_sheet" + fact_set.period_start = date(2026, 1, 1) + fact_set.period_end = date(2026, 3, 31) + fact_set.factset_type = "report" + fact_set.entity_id = "ent_demo" + fact_set.report_id = "rep_latest" + fact = MagicMock() fact.id = "fact_1" fact.element_id = "elem_cash" @@ -294,21 +312,22 @@ def test_facts_populated_from_most_recent_report(self) -> None: fact.period_type = "instant" fact.unit = "USD" fact.fact_scope = "in_scope" - fact.fact_set_id = None + fact.fact_set_id = "fset_balance_sheet_2026q1" - # Query order (1 association, report found): - # taxonomy → associations → elements → rules → classifications → - # fact_set → verification_results → latest_report_id → facts + # Query order (Plan B — fact_set_id read path): + # taxonomy → associations → elements → rules → assoc-classifications → + # fact_set → verification_results → facts (filtered by fact_set.id) → + # element-classifications (rendering projection trait lookup) session.execute.side_effect = [ _exec_result(scalar="US GAAP"), # taxonomy name _exec_result(scalars_all=[assoc]), # associations _exec_result(scalars_all=[element]), # elements _exec_result(scalars_all=[]), # rules _exec_result(all_rows=[]), # association classifications - _exec_result(scalar=None), # latest fact set → None + _exec_result(scalar=fact_set), # latest fact set _exec_result(scalars_all=[]), # verification results - _exec_result(scalar="rep_latest"), # latest_report_id - _exec_result(scalars_all=[fact]), # facts + _exec_result(scalars_all=[fact]), # facts (filtered by fact_set.id) + _exec_result(all_rows=[]), # element classifications (Plan B) ] build = statement_handlers.make_statement_handlers("balance_sheet") @@ -320,6 +339,9 @@ def test_facts_populated_from_most_recent_report(self) -> None: assert envelope.facts[0].element_id == "elem_cash" assert envelope.facts[0].value == 100_000.0 assert envelope.facts[0].period_end == date(2026, 3, 31) + assert envelope.facts[0].fact_set_id == "fset_balance_sheet_2026q1" + assert envelope.fact_set is not None + assert envelope.fact_set.id == "fset_balance_sheet_2026q1" @pytest.mark.parametrize( "block_type", From 0496b080d978b0d54747d3eace53526c96a9b44c Mon Sep 17 00:00:00 2001 From: "Joseph T. French" Date: Sat, 25 Apr 2026 22:45:18 -0500 Subject: [PATCH 2/7] feat: Implement report lifecycle and FactSet relationships in the reporting model --- .../versions/0006_report_package_lifecycle.py | 153 ++++++++++++++++++ .../adapters/sec/processors/classify.py | 38 ++++- .../adapters/sec/processors/xbrl_graph.py | 5 + .../models/extensions/roboledger/report.py | 57 ++++++- .../operations/extensions/materialize.py | 20 ++- .../operations/information_block/envelope.py | 38 ++++- .../operations/information_block/metric.py | 8 +- .../operations/information_block/reads.py | 33 +++- .../operations/information_block/schedule.py | 13 +- .../operations/information_block/statement.py | 14 +- .../operations/information_block/types.py | 8 +- robosystems/schemas/extensions/roboledger.py | 11 ++ .../operations/extensions/test_materialize.py | 1 + .../information_block/test_reads.py | 2 +- 14 files changed, 365 insertions(+), 36 deletions(-) create mode 100644 migrations/extensions/versions/0006_report_package_lifecycle.py diff --git a/migrations/extensions/versions/0006_report_package_lifecycle.py b/migrations/extensions/versions/0006_report_package_lifecycle.py new file mode 100644 index 00000000..31d36489 --- /dev/null +++ b/migrations/extensions/versions/0006_report_package_lifecycle.py @@ -0,0 +1,153 @@ +"""report package lifecycle + +Plan-C foundation. Adds the filing lifecycle (draft → under_review → +filed → archived) and restatement chain (supersedes_id / +superseded_by_id) to the existing ``reports`` table, so the Report +itself becomes the package container — no separate ReportBlock concept. +The Report is the unit that materialises to the graph, so unifying the +package + materialisation concerns onto one row keeps the graph version +chain in 1:1 correspondence with the OLTP chain. + +``filing_status`` is orthogonal to the existing ``generation_status``: +``generation_status`` tracks computation (pending → generating → +complete → published); ``filing_status`` tracks business state +(draft → under_review → filed → archived). Both are useful and +non-overlapping. + +``reports`` is tenant-scoped (schema-per-graph-id), so columns and +indexes are added in the public schema (ORM metadata anchor) and in +every existing tenant schema. New tenants pick up the columns via +``provision_tenant_schema`` from the SQLAlchemy model. + +Revision ID: 0006 +Revises: 0005 +Create Date: 2026-04-25 +""" + +from __future__ import annotations + +import sqlalchemy as sa +from alembic import op +from sqlalchemy import text + +from migrations.extensions.helpers import TenantOps, for_each_tenant_schema + +# revision identifiers, used by Alembic. +revision = "0006" +down_revision = "0005" +branch_labels = None +depends_on = None + + +_FILING_STATUS_CHECK = "filing_status IN ('draft', 'under_review', 'filed', 'archived')" + + +def _add_lifecycle_columns_in_tenant(conn, schema: str) -> None: + t = TenantOps(conn, schema) + + t.add_column("reports", "filing_status", "VARCHAR", nullable=False, default="'draft'") + t.add_column("reports", "filed_at", "TIMESTAMP") + t.add_column("reports", "filed_by", "VARCHAR") + t.add_column("reports", "supersedes_id", "VARCHAR") + t.add_column("reports", "superseded_by_id", "VARCHAR") + + t.add_check( + "reports", + f"check_{schema}_report_filing_status", + _FILING_STATUS_CHECK, + ) + + conn.execute( + text( + f"CREATE INDEX IF NOT EXISTS idx_{schema}_reports_filing_status " + f"ON {schema}.reports (filing_status)" + ) + ) + conn.execute( + text( + f"CREATE INDEX IF NOT EXISTS idx_{schema}_reports_supersedes " + f"ON {schema}.reports (supersedes_id)" + ) + ) + + t.add_foreign_key( + "reports", + f"fk_{schema}_reports_supersedes", + ["supersedes_id"], + "reports", + ["id"], + ) + t.add_foreign_key( + "reports", + f"fk_{schema}_reports_superseded_by", + ["superseded_by_id"], + "reports", + ["id"], + ) + + +def _drop_lifecycle_columns_in_tenant(conn, schema: str) -> None: + t = TenantOps(conn, schema) + t.drop_constraint("reports", f"fk_{schema}_reports_superseded_by") + t.drop_constraint("reports", f"fk_{schema}_reports_supersedes") + t.drop_index(f"idx_{schema}_reports_supersedes") + t.drop_index(f"idx_{schema}_reports_filing_status") + t.drop_constraint("reports", f"check_{schema}_report_filing_status") + t.drop_column("reports", "superseded_by_id") + t.drop_column("reports", "supersedes_id") + t.drop_column("reports", "filed_by") + t.drop_column("reports", "filed_at") + t.drop_column("reports", "filing_status") + + +def upgrade() -> None: + # Public schema — ORM metadata anchor; tenant schemas hold the data. + op.add_column( + "reports", + sa.Column( + "filing_status", + sa.String(), + nullable=False, + server_default="draft", + ), + ) + op.add_column("reports", sa.Column("filed_at", sa.DateTime(), nullable=True)) + op.add_column("reports", sa.Column("filed_by", sa.String(), nullable=True)) + op.add_column("reports", sa.Column("supersedes_id", sa.String(), nullable=True)) + op.add_column("reports", sa.Column("superseded_by_id", sa.String(), nullable=True)) + op.create_check_constraint( + "check_report_filing_status", "reports", _FILING_STATUS_CHECK + ) + op.create_index( + "idx_reports_filing_status", "reports", ["filing_status"], unique=False + ) + op.create_index("idx_reports_supersedes", "reports", ["supersedes_id"], unique=False) + op.create_foreign_key( + "fk_reports_supersedes", "reports", "reports", ["supersedes_id"], ["id"] + ) + op.create_foreign_key( + "fk_reports_superseded_by", + "reports", + "reports", + ["superseded_by_id"], + ["id"], + ) + + conn = op.get_bind() + for_each_tenant_schema(conn, _add_lifecycle_columns_in_tenant) + + +def downgrade() -> None: + conn = op.get_bind() + for_each_tenant_schema(conn, _drop_lifecycle_columns_in_tenant) + + op.drop_constraint("fk_reports_superseded_by", "reports", type_="foreignkey") + op.drop_constraint("fk_reports_supersedes", "reports", type_="foreignkey") + op.drop_index("idx_reports_supersedes", table_name="reports") + op.drop_index("idx_reports_filing_status", table_name="reports") + op.drop_constraint("check_report_filing_status", "reports", type_="check") + op.drop_column("reports", "superseded_by_id") + op.drop_column("reports", "supersedes_id") + op.drop_column("reports", "filed_by") + op.drop_column("reports", "filed_at") + op.drop_column("reports", "filing_status") diff --git a/robosystems/adapters/sec/processors/classify.py b/robosystems/adapters/sec/processors/classify.py index caa36ae2..b260a1ef 100644 --- a/robosystems/adapters/sec/processors/classify.py +++ b/robosystems/adapters/sec/processors/classify.py @@ -512,6 +512,7 @@ class ClassifyResult: factsets_df: pd.DataFrame structure_factset_rels_df: pd.DataFrame factset_fact_rels_df: pd.DataFrame + report_factset_rels_df: pd.DataFrame def classify(self, output_dir: Path) -> ClassifyResult: """Run classification on a filing's parquet output. @@ -534,6 +535,7 @@ def classify(self, output_dir: Path) -> ClassifyResult: factsets_df=pd.DataFrame(), structure_factset_rels_df=pd.DataFrame(), factset_fact_rels_df=pd.DataFrame(), + report_factset_rels_df=pd.DataFrame(), ) # Check that required parquet files exist @@ -607,9 +609,12 @@ def classify(self, output_dir: Path) -> ClassifyResult: relationships.extend(semantic_rels) # Layer 3: Build structure-level FactSets - factsets_df, struct_fs_rels_df, fs_fact_rels_df = self._build_structure_factsets( - ctx - ) + ( + factsets_df, + struct_fs_rels_df, + fs_fact_rels_df, + report_fs_rels_df, + ) = self._build_structure_factsets(ctx) classifications_df = ( pd.DataFrame(classifications) if classifications else pd.DataFrame() @@ -640,6 +645,7 @@ def classify(self, output_dir: Path) -> ClassifyResult: factsets_df=factsets_df, structure_factset_rels_df=struct_fs_rels_df, factset_fact_rels_df=fs_fact_rels_df, + report_factset_rels_df=report_fs_rels_df, ) def _classify_semantic( @@ -729,19 +735,32 @@ def _classify_semantic( def _build_structure_factsets( self, ctx: TempLadybugContext - ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: + ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: """Build pre-computed FactSets per Structure. For each Structure, collects all Elements from its associations, then finds all Facts in the report that reference those elements. Creates a - FactSet per Structure as a rendering manifest. + FactSet per Structure as a rendering manifest. Each FactSet is also + linked back to the filing's Report so the package can be traversed + via ``REPORT_HAS_FACT_SET``. Returns: - (factsets_df, structure_factset_rels_df, factset_fact_rels_df) + (factsets_df, structure_factset_rels_df, factset_fact_rels_df, + report_factset_rels_df) """ factsets: list[dict] = [] structure_factset_rels: list[dict] = [] factset_fact_rels: list[dict] = [] + report_factset_rels: list[dict] = [] + + # SEC filings produce one Report per ingestion; pull its identifier + # so each new FactSet can emit a REPORT_HAS_FACT_SET edge. + report_ids: list[str] = [] + try: + report_rows = ctx.execute("MATCH (r:Report) RETURN r.identifier AS report_id") + report_ids = [row["report_id"] for row in report_rows if row.get("report_id")] + except Exception as e: + logger.debug(f"FactSet report lookup failed: {e}") # Get all structures and their elements (both FROM and TO) try: @@ -802,6 +821,8 @@ def _build_structure_factsets( fs_id = generate_uuid7() factsets.append({"identifier": fs_id}) structure_factset_rels.append({"from": structure_id, "to": fs_id}) + for report_id in report_ids: + report_factset_rels.append({"from": report_id, "to": fs_id}) for fact_row in facts: fact_id = fact_row.get("fact_id") @@ -815,5 +836,8 @@ def _build_structure_factsets( fs_fact_df = ( pd.DataFrame(factset_fact_rels) if factset_fact_rels else pd.DataFrame() ) + report_fs_df = ( + pd.DataFrame(report_factset_rels) if report_factset_rels else pd.DataFrame() + ) - return factsets_df, struct_fs_df, fs_fact_df + return factsets_df, struct_fs_df, fs_fact_df, report_fs_df diff --git a/robosystems/adapters/sec/processors/xbrl_graph.py b/robosystems/adapters/sec/processors/xbrl_graph.py index 3231cb3c..4d87a195 100644 --- a/robosystems/adapters/sec/processors/xbrl_graph.py +++ b/robosystems/adapters/sec/processors/xbrl_graph.py @@ -311,6 +311,11 @@ def classify_associations(self): result.factset_fact_rels_df, "relationships/FACT_SET_CONTAINS_FACT.parquet", ) + if not result.report_factset_rels_df.empty: + self.parquet_writer.write_dataframe( + result.report_factset_rels_df, + "relationships/REPORT_HAS_FACT_SET.parquet", + ) logger.info( f"Wrote {len(result.factsets_df)} structure FactSets " f"with {len(result.factset_fact_rels_df)} fact links" diff --git a/robosystems/models/extensions/roboledger/report.py b/robosystems/models/extensions/roboledger/report.py index d4b6a504..ee0d6893 100644 --- a/robosystems/models/extensions/roboledger/report.py +++ b/robosystems/models/extensions/roboledger/report.py @@ -1,14 +1,41 @@ -"""Report model — generated report configurations. +"""Report model — the package-mode container and unit of materialization. -Stores the configuration needed to produce a report. A report is tied to a -Taxonomy (which contains multiple Structures like IS, BS, CF). Facts are -generated for all mapped elements across all structures in the taxonomy. -References Report/Fact/FactSet nodes in the graph after materialization. +A Report is a named, period-scoped, lockable container for the FactSets +generated for one reporting period. It is the digital equivalent of a +signed financial-report package ("Q1 2026 Financial Statements") and +the atomic unit that materialises to the graph (one graph Report node ++ its FactSet/Fact nodes per Report row). + +The Report has two orthogonal lifecycles: + +* ``generation_status`` (pending → generating → complete → published) — + the *computation* lifecycle. Tracks whether facts have been generated. +* ``filing_status`` (draft → under_review → filed → archived) — the + *business* lifecycle. Tracks whether the package has been reviewed and + filed. ``filed`` is the immutable, locked state. + +Restatements create a new Report with ``supersedes_id`` pointing at the +prior filed version; the prior row's ``superseded_by_id`` closes the +link. The graph version chain follows the OLTP chain 1:1. + +The package-mode viewer renders a Report by loading its attached +FactSets (one per Structure on the Report) and rehydrating each as an +``InformationBlockEnvelope`` via ``get_information_block_for_fact_set``. """ from datetime import UTC, datetime -from sqlalchemy import Boolean, Column, Date, DateTime, Float, Index, String +from sqlalchemy import ( + Boolean, + CheckConstraint, + Column, + Date, + DateTime, + Float, + ForeignKey, + Index, + String, +) from sqlalchemy.dialects.postgresql import JSONB from robosystems.db.extensions import ExtensionsBase @@ -20,6 +47,12 @@ class Report(ExtensionsBase): __table_args__ = ( Index("idx_reports_taxonomy", "taxonomy_id"), Index("idx_reports_status", "generation_status"), + Index("idx_reports_filing_status", "filing_status"), + Index("idx_reports_supersedes", "supersedes_id"), + CheckConstraint( + "filing_status IN ('draft', 'under_review', 'filed', 'archived')", + name="check_report_filing_status", + ), ) # Identity @@ -47,6 +80,18 @@ class Report(ExtensionsBase): last_generated = Column(DateTime, nullable=True) generation_status = Column(String, nullable=False, default="pending") + # Filing lifecycle — orthogonal to generation_status. ``filed`` is + # the immutable locked state; ``archived`` is for superseded versions. + filing_status = Column(String, nullable=False, default="draft") + filed_at = Column(DateTime, nullable=True) + filed_by = Column(String, nullable=True) + + # Restatement chain — restating a filed Report creates a new row with + # ``supersedes_id`` pointing at the prior version; the prior row's + # ``superseded_by_id`` closes the link. + supersedes_id = Column(String, ForeignKey("reports.id"), nullable=True) + superseded_by_id = Column(String, ForeignKey("reports.id"), nullable=True) + # AI provenance ai_generated = Column(Boolean, nullable=False, default=False) ai_intent = Column(String, nullable=True) diff --git a/robosystems/operations/extensions/materialize.py b/robosystems/operations/extensions/materialize.py index bba6a67f..3c059c21 100644 --- a/robosystems/operations/extensions/materialize.py +++ b/robosystems/operations/extensions/materialize.py @@ -90,6 +90,7 @@ class MaterializeResult: "FACT_HAS_UNIT", "FACT_HAS_ENTITY", "STRUCTURE_HAS_FACT_SET", + "REPORT_HAS_FACT_SET", "FACT_SET_CONTAINS_FACT", # Investor layer "ENTITY_HAS_PORTFOLIO", @@ -157,6 +158,7 @@ class MaterializeResult: "FACT_HAS_UNIT": "roboledger", "FACT_HAS_ENTITY": "roboledger", "STRUCTURE_HAS_FACT_SET": "roboledger", + "REPORT_HAS_FACT_SET": "roboledger", "FACT_SET_CONTAINS_FACT": "roboledger", # roboinvestor edges "ENTITY_HAS_PORTFOLIO": "roboinvestor", @@ -937,12 +939,20 @@ def _staging_sql(graph_id: str, entity_id: str, connstr: str) -> dict[str, str]: tables["STRUCTURE_HAS_FACT_SET"] = f""" CREATE OR REPLACE TABLE STRUCTURE_HAS_FACT_SET AS - SELECT DISTINCT + SELECT structure_id AS src, - fact_set_id AS dst - FROM postgres_scan('{c}', '{s}', 'facts') - WHERE fact_set_id IS NOT NULL - AND structure_id IS NOT NULL + id AS dst + FROM postgres_scan('{c}', '{s}', 'fact_sets') + WHERE structure_id IS NOT NULL + """ + + tables["REPORT_HAS_FACT_SET"] = f""" + CREATE OR REPLACE TABLE REPORT_HAS_FACT_SET AS + SELECT + report_id AS src, + id AS dst + FROM postgres_scan('{c}', '{s}', 'fact_sets') + WHERE report_id IS NOT NULL """ tables["FACT_SET_CONTAINS_FACT"] = f""" diff --git a/robosystems/operations/information_block/envelope.py b/robosystems/operations/information_block/envelope.py index bd178eb9..914c04cf 100644 --- a/robosystems/operations/information_block/envelope.py +++ b/robosystems/operations/information_block/envelope.py @@ -205,6 +205,25 @@ def load_latest_fact_set_for_structure( return fact_set_to_lite(row) if row is not None else None +def load_fact_set_by_id_for_structure( + session: Session, structure_id: str, fact_set_id: str +) -> FactSetLite | None: + """Fetch a specific FactSet pinned to a Structure. + + Used by the Report Block rehydration path to load the exact FactSet + snapshot that a ``ReportBlockItem`` pins, instead of the latest one. + Returns ``None`` when the FactSet doesn't exist or doesn't belong to + the named Structure — callers surface that as a clean miss to the + envelope reader. + """ + row = session.execute( + select(FactSet).where( + FactSet.id == fact_set_id, FactSet.structure_id == structure_id + ) + ).scalar() + return fact_set_to_lite(row) if row is not None else None + + def rule_to_lite(rule: Rule) -> RuleLite: """Project a :class:`Rule` ORM row onto :class:`RuleLite`. @@ -309,7 +328,11 @@ class BaseEnvelopeAtoms: def load_base_envelope_atoms( - session: Session, structure_id: str, *, expected_block_type: str + session: Session, + structure_id: str, + *, + expected_block_type: str, + fact_set_id: str | None = None, ) -> BaseEnvelopeAtoms | None: """Load every atom shared by Information Block envelope builders. @@ -319,6 +342,11 @@ def load_base_envelope_atoms( order: Structure -> taxonomy name -> associations -> elements -> rules -> classifications -> fact_set -> verification_results, matching the order each individual handler used to inline. + + When ``fact_set_id`` is provided the atoms are pinned to that specific + FactSet (used by Report Block rehydration). Returns ``None`` if the + named FactSet doesn't belong to this Structure — callers treat the + pin mismatch as a clean miss. """ from robosystems.models.extensions import Taxonomy @@ -360,7 +388,12 @@ def load_base_envelope_atoms( session, [a.id for a in associations] ) - fact_set = load_latest_fact_set_for_structure(session, structure_id) + if fact_set_id is not None: + fact_set = load_fact_set_by_id_for_structure(session, structure_id, fact_set_id) + if fact_set is None: + return None + else: + fact_set = load_latest_fact_set_for_structure(session, structure_id) verification_results = load_verification_results_for_structure(session, structure_id) return BaseEnvelopeAtoms( @@ -384,6 +417,7 @@ def load_base_envelope_atoms( "fact_to_lite", "load_base_envelope_atoms", "load_classifications_for_associations", + "load_fact_set_by_id_for_structure", "load_latest_fact_set_for_structure", "load_rules_for_structure", "load_verification_results_for_structure", diff --git a/robosystems/operations/information_block/metric.py b/robosystems/operations/information_block/metric.py index 3c5e2efb..3b669ccc 100644 --- a/robosystems/operations/information_block/metric.py +++ b/robosystems/operations/information_block/metric.py @@ -35,17 +35,21 @@ def build_envelope( - session: Session, structure_id: str + session: Session, structure_id: str, fact_set_id: str | None = None ) -> InformationBlockEnvelope | None: """Reload a metric Structure and pack it into the Information Block envelope. Mechanics are read off the typed ``artifact_mechanics`` column; ``facts`` stays empty until the derivation evaluator is implemented. The envelope shape is stable so callers and UI can already render a - metric block as a placeholder. + metric block as a placeholder. ``fact_set_id`` is accepted for + signature parity with the registry contract; metric blocks have no + FactSet today so the pin is a no-op. """ from sqlalchemy import select + del fact_set_id # currently unused; metric FactSet wiring lands later + structure = session.get(Structure, structure_id) if structure is None or structure.structure_type != METRIC_BLOCK_TYPE: return None diff --git a/robosystems/operations/information_block/reads.py b/robosystems/operations/information_block/reads.py index befeade1..27eaedc7 100644 --- a/robosystems/operations/information_block/reads.py +++ b/robosystems/operations/information_block/reads.py @@ -18,16 +18,22 @@ from robosystems.models.api.information_block import InformationBlockEnvelope from robosystems.models.extensions import Structure +from robosystems.models.extensions.roboledger import FactSet from robosystems.operations.information_block import registry as registry_module def get_information_block( - session: Session, structure_id: str + session: Session, + structure_id: str, + fact_set_id: str | None = None, ) -> InformationBlockEnvelope | None: """Fetch one block by id, dispatching via the structure's block_type. Returns ``None`` when the structure doesn't exist or its type isn't - registered — callers map that to a GraphQL null / REST 404. + registered — callers map that to a GraphQL null / REST 404. When + ``fact_set_id`` is provided the envelope is rehydrated from that + specific FactSet instead of the latest one (used by Report Block + rehydration to surface a frozen snapshot). """ structure = session.get(Structure, structure_id) if structure is None: @@ -39,7 +45,27 @@ def get_information_block( # surface as ``None`` rather than raising — the envelope just can't # be built. return None - return entry.dispatch_build_envelope(session, structure_id) + return entry.dispatch_build_envelope(session, structure_id, fact_set_id) + + +def get_information_block_for_fact_set( + session: Session, fact_set_id: str +) -> InformationBlockEnvelope | None: + """Rehydrate the Information Block envelope pinned to a FactSet. + + Looks up the FactSet's Structure, then dispatches the registered + block-type handler with the explicit fact_set pin. Returns ``None`` + when the FactSet doesn't exist, has no Structure, or its + ``structure_type`` isn't a registered block type. + + Used by the Report Block read path: each ReportBlockItem pins a + ``fact_set_id``; assembling the envelope for that item is exactly + this lookup. + """ + fact_set = session.get(FactSet, fact_set_id) + if fact_set is None or fact_set.structure_id is None: + return None + return get_information_block(session, fact_set.structure_id, fact_set_id) def list_information_blocks( @@ -118,5 +144,6 @@ def list_information_blocks( __all__ = [ "get_information_block", + "get_information_block_for_fact_set", "list_information_blocks", ] diff --git a/robosystems/operations/information_block/schedule.py b/robosystems/operations/information_block/schedule.py index aa667c88..e3d362b9 100644 --- a/robosystems/operations/information_block/schedule.py +++ b/robosystems/operations/information_block/schedule.py @@ -146,17 +146,24 @@ def _load_schedule_mechanics( def build_envelope( - session: Session, structure_id: str + session: Session, structure_id: str, fact_set_id: str | None = None ) -> InformationBlockEnvelope | None: """Reload a schedule Structure and pack its Information Block envelope. Returns ``None`` when the structure doesn't exist or isn't a schedule, so the generic reader can cleanly distinguish misses from errors. Mechanics are read from the typed ``artifact_mechanics`` column with - fallback to legacy ``metadata_`` JSONB. + fallback to legacy ``metadata_`` JSONB. ``fact_set_id`` pins the + envelope to a specific FactSet snapshot (Report Block rehydration); + facts are still read from the Structure's full set since schedule + facts are stamped with ``structure_id`` directly — the pin only + scopes the ``fact_set`` projection. """ atoms = load_base_envelope_atoms( - session, structure_id, expected_block_type=SCHEDULE_BLOCK_TYPE + session, + structure_id, + expected_block_type=SCHEDULE_BLOCK_TYPE, + fact_set_id=fact_set_id, ) if atoms is None: return None diff --git a/robosystems/operations/information_block/statement.py b/robosystems/operations/information_block/statement.py index 53d94734..3280fe20 100644 --- a/robosystems/operations/information_block/statement.py +++ b/robosystems/operations/information_block/statement.py @@ -86,6 +86,7 @@ def _build_statement_envelope( session: Session, structure_id: str, + fact_set_id: str | None = None, *, block_type: str, ) -> InformationBlockEnvelope | None: @@ -108,7 +109,10 @@ def _build_statement_envelope( the switch is a strict simplification (one less query per envelope). """ atoms = load_base_envelope_atoms( - session, structure_id, expected_block_type=block_type + session, + structure_id, + expected_block_type=block_type, + fact_set_id=fact_set_id, ) if atoms is None: return None @@ -454,13 +458,15 @@ def _load_element_classifications( def make_statement_handlers( block_type: str, -) -> Callable[[Session, str], InformationBlockEnvelope | None]: +) -> Callable[..., InformationBlockEnvelope | None]: """Build the envelope handler for one statement type. ``functools.partial`` binds the ``block_type`` keyword on the envelope - builder so the registry entry holds a two-argument callable matching + builder so the registry entry holds a callable matching :class:`BlockTypeRegistryEntry.dispatch_build_envelope`'s signature - ``(session, structure_id) -> envelope | None``. + ``(session, structure_id, fact_set_id?=None) -> envelope | None``. + The ``fact_set_id`` arg is used by Report Block rehydration to pin + the envelope to a specific FactSet snapshot. The create / update / delete handlers for statement block types are not built here — the registry installs not-implemented stubs via diff --git a/robosystems/operations/information_block/types.py b/robosystems/operations/information_block/types.py index 51afa5ec..10b44155 100644 --- a/robosystems/operations/information_block/types.py +++ b/robosystems/operations/information_block/types.py @@ -115,11 +115,13 @@ class BlockTypeRegistryEntry: Returns the deleted structure_id for the response envelope. Block types that don't support deletion raise :class:`NotImplementedError`.""" - dispatch_build_envelope: Callable[[Session, str], InformationBlockEnvelope | None] + dispatch_build_envelope: Callable[..., InformationBlockEnvelope | None] """Handler that reads the block and packs its envelope. Signature: - ``(session, structure_id) -> InformationBlockEnvelope | None``. + ``(session, structure_id, fact_set_id=None) -> InformationBlockEnvelope | None``. Returns None when the structure_id doesn't exist or its row doesn't - belong to this block type.""" + belong to this block type. ``fact_set_id`` pins the envelope to a + specific FactSet snapshot (used by Report Block rehydration); when + omitted the latest FactSet is used.""" surfaces_in_library: bool = False """When True, ``list_information_blocks`` surfaces this block type on diff --git a/robosystems/schemas/extensions/roboledger.py b/robosystems/schemas/extensions/roboledger.py index df4b9e11..5c0a3cc4 100644 --- a/robosystems/schemas/extensions/roboledger.py +++ b/robosystems/schemas/extensions/roboledger.py @@ -186,6 +186,17 @@ description="Structure has a pre-computed set of facts for rendering", properties=[], ), + # Report → FactSet (the package-mode container edge). A Report groups + # N FactSets — one per statement Structure produced for the period. + # Lets the graph traverse "give me the FactSets of this Report" in one + # hop, instead of the two-hop ``Report → Fact ← FactSet`` path. + Relationship( + name="REPORT_HAS_FACT_SET", + from_node="Report", + to_node="FactSet", + description="Report contains FactSets (one per statement Structure)", + properties=[], + ), Relationship( name="REPORT_USES_TAXONOMY", from_node="Report", diff --git a/tests/operations/extensions/test_materialize.py b/tests/operations/extensions/test_materialize.py index 3535ee8e..3e147039 100644 --- a/tests/operations/extensions/test_materialize.py +++ b/tests/operations/extensions/test_materialize.py @@ -222,6 +222,7 @@ def test_relationship_categories(self): "FACT_HAS_UNIT", "FACT_HAS_ENTITY", "STRUCTURE_HAS_FACT_SET", + "REPORT_HAS_FACT_SET", "FACT_SET_CONTAINS_FACT", } # RoboInvestor edges (entity↔portfolio + security issuance + portfolio structure) diff --git a/tests/operations/information_block/test_reads.py b/tests/operations/information_block/test_reads.py index 88ef1aa8..376bf0fa 100644 --- a/tests/operations/information_block/test_reads.py +++ b/tests/operations/information_block/test_reads.py @@ -76,7 +76,7 @@ def test_dispatches_to_schedule_handler(self) -> None: with patch.dict(REGISTRY_PATH, {"schedule": patched}): result = get_information_block(session, "struct_1") assert result is expected - mock_build.assert_called_once_with(session, "struct_1") + mock_build.assert_called_once_with(session, "struct_1", None) # ── list_information_blocks ──────────────────────────────────────────────── From 4243254d60a004074b3a4404297c169eabade839 Mon Sep 17 00:00:00 2001 From: "Joseph T. French" Date: Sat, 25 Apr 2026 23:00:20 -0500 Subject: [PATCH 3/7] feat: Add report package handling and filing lifecycle operations --- robosystems/graphql/resolvers/ledger.py | 21 +++ robosystems/graphql/types/report_package.py | 110 ++++++++++++++++ .../models/api/extensions/report_package.py | 124 ++++++++++++++++++ robosystems/models/api/extensions/reports.py | 7 + .../operations/roboledger/commands/reports.py | 97 ++++++++++++++ .../operations/roboledger/reads/reports.py | 108 +++++++++++++++ .../extensions/roboledger/operations.py | 116 ++++++++++++++++ 7 files changed, 583 insertions(+) create mode 100644 robosystems/graphql/types/report_package.py create mode 100644 robosystems/models/api/extensions/report_package.py diff --git a/robosystems/graphql/resolvers/ledger.py b/robosystems/graphql/resolvers/ledger.py index 3e4325ce..1b4b7a27 100644 --- a/robosystems/graphql/resolvers/ledger.py +++ b/robosystems/graphql/resolvers/ledger.py @@ -57,6 +57,7 @@ TrialBalance, UnmappedElement, ) +from robosystems.graphql.types.report_package import ReportPackage from robosystems.operations.roboledger.fiscal_calendar import FiscalCalendarService from robosystems.operations.roboledger.reads import ( account_rollups as reads_account_rollups, @@ -657,6 +658,26 @@ def report( return None return Report.from_pydantic(response) + @strawberry.field + def report_package( + self, + info: Info[GraphQLContext, None], + report_id: str, + ) -> ReportPackage | None: + """Rehydrate a Report as a package — Report metadata + N rendered + Information Block envelopes (one per attached FactSet). Drives the + ``/reports/[id]`` package viewer; replaces the per-statement + ``getStatement`` round-trip path. + """ + try: + with _open_session(info, "roboledger") as session: + response = reads_reports.get_report_package(session, report_id) + except (ValueError, ProgrammingError): + _raise_ledger_not_initialized() + if response is None: + return None + return ReportPackage.from_pydantic(response) + @strawberry.field def statement( self, diff --git a/robosystems/graphql/types/report_package.py b/robosystems/graphql/types/report_package.py new file mode 100644 index 00000000..4d2da23c --- /dev/null +++ b/robosystems/graphql/types/report_package.py @@ -0,0 +1,110 @@ +"""GraphQL types for the Report-package read. + +The Report is the package container; its items are its FactSets, each +rehydrated as an :class:`InformationBlock`. The resolver lives in +``resolvers/ledger.py`` next to the existing ``report`` field; types +are split into this file because they're hand-written rather than +auto-derived from Pydantic (the ``block`` field needs the manual +``InformationBlock.from_pydantic`` projection). +""" + +from __future__ import annotations + +import strawberry + +from robosystems.graphql.types.information_block import InformationBlock +from robosystems.models.api.extensions.report_package import ( + ReportPackageEnvelope as PydanticReportPackageEnvelope, +) +from robosystems.models.api.extensions.report_package import ( + ReportPackageItem as PydanticReportPackageItem, +) + + +@strawberry.type +class ReportPackageItem: + """One item in a Report package — a pinned FactSet rendered as an + ``InformationBlock`` envelope plus assembly metadata.""" + + fact_set_id: str + structure_id: str | None + display_order: int + block: InformationBlock + + @classmethod + def from_pydantic(cls, item: PydanticReportPackageItem) -> ReportPackageItem: + return cls( + fact_set_id=item.fact_set_id, + structure_id=item.structure_id, + display_order=item.display_order, + block=InformationBlock.from_pydantic(item.block), + ) + + +@strawberry.type +class ReportPackage: + """A Report rehydrated as a package — metadata + ordered rendered items.""" + + id: strawberry.ID + name: str + description: str | None + taxonomy_id: str + period_type: str + period_start: str | None + period_end: str | None + + generation_status: str + last_generated: str | None + + filing_status: str + filed_at: str | None + filed_by: str | None + + supersedes_id: str | None + superseded_by_id: str | None + + source_graph_id: str | None + source_report_id: str | None + shared_at: str | None + + entity_name: str | None + ai_generated: bool + created_at: str + created_by: str + + items: list[ReportPackageItem] + + @classmethod + def from_pydantic(cls, envelope: PydanticReportPackageEnvelope) -> ReportPackage: + return cls( + id=strawberry.ID(envelope.id), + name=envelope.name, + description=envelope.description, + taxonomy_id=envelope.taxonomy_id, + period_type=envelope.period_type, + period_start=envelope.period_start.isoformat() if envelope.period_start else None, + period_end=envelope.period_end.isoformat() if envelope.period_end else None, + generation_status=envelope.generation_status, + last_generated=( + envelope.last_generated.isoformat() if envelope.last_generated else None + ), + filing_status=envelope.filing_status, + filed_at=envelope.filed_at.isoformat() if envelope.filed_at else None, + filed_by=envelope.filed_by, + supersedes_id=envelope.supersedes_id, + superseded_by_id=envelope.superseded_by_id, + source_graph_id=envelope.source_graph_id, + source_report_id=envelope.source_report_id, + shared_at=envelope.shared_at.isoformat() if envelope.shared_at else None, + entity_name=envelope.entity_name, + ai_generated=envelope.ai_generated, + created_at=envelope.created_at.isoformat(), + created_by=envelope.created_by, + items=[ReportPackageItem.from_pydantic(it) for it in envelope.items], + ) + + +__all__ = [ + "ReportPackage", + "ReportPackageItem", +] diff --git a/robosystems/models/api/extensions/report_package.py b/robosystems/models/api/extensions/report_package.py new file mode 100644 index 00000000..849a3080 --- /dev/null +++ b/robosystems/models/api/extensions/report_package.py @@ -0,0 +1,124 @@ +"""Report package mode — request/response models. + +A Report is the package container (see +``models/extensions/roboledger/report.py`` and +``local/docs/specs/financial-viewer.md`` §"Report Block"). Its items +are its FactSets, found via ``fact_sets.report_id``. Reading a Report +in package mode rehydrates each FactSet into a full +``InformationBlockEnvelope`` so the frontend renders the package +without per-section refetches. + +Models live in their own file (instead of ``reports.py``) because +``InformationBlockEnvelope`` lives in ``models.api.information_block`` +and that module already pulls from ``models.api.extensions.schedules``; +importing the envelope at module load time of any file aggregated by +``models/api/extensions/__init__.py`` would re-create the circular +import that bit Phase 1. +""" + +from __future__ import annotations + +from datetime import date, datetime + +from pydantic import BaseModel, Field + +from robosystems.models.api.information_block import InformationBlockEnvelope + +# ── Request models ───────────────────────────────────────────────────────── + + +class FileReportRequest(BaseModel): + """Transition a Report to ``filed`` — locks the package. + + Acceptable from ``draft`` or ``under_review``. ``filed_by`` and + ``filed_at`` are stamped from the auth context + server clock; the + request itself carries no fields today (kept as a model for OpenAPI + shape consistency and to avoid breaking changes if we add fields). + """ + + report_id: str = Field(..., description="The Report to file.") + + +class TransitionFilingStatusRequest(BaseModel): + """Generic filing-status transition — escape hatch for non-file moves. + + Used for ``draft → under_review`` (submit for review) and + ``filed → archived`` (supersede / retire). Filing the package goes + through :class:`FileReportRequest` so ``filed_at`` / ``filed_by`` + audit fields land cleanly. + """ + + report_id: str + target_status: str = Field(..., description="under_review | archived") + + +# ── Response models ──────────────────────────────────────────────────────── + + +class ReportPackageItem(BaseModel): + """One item in a Report package — pinned envelope + ordering metadata.""" + + fact_set_id: str = Field(..., description="The FactSet snapshot pinned to this item.") + structure_id: str | None = Field( + None, + description=( + "The Structure shape this item renders. Comes from " + "``fact_sets.structure_id``; null only for legacy rows where the " + "FactSet wasn't structure-linked." + ), + ) + display_order: int = Field( + 0, + description=( + "Display position in the package. Derived from " + "``Structure.structure_type`` ordering (BS → IS → CF → Equity → " + "Schedule), with ties broken by ``fact_set.created_at``." + ), + ) + block: InformationBlockEnvelope = Field( + ..., + description=( + "The rehydrated InformationBlockEnvelope for ``(structure_id, " + "fact_set_id)``. Pre-built server-side so package-mode renders " + "without per-item refetches." + ), + ) + + +class ReportPackageEnvelope(BaseModel): + """A Report rehydrated as a package — metadata + N rendered items.""" + + id: str + name: str + description: str | None = None + taxonomy_id: str + period_type: str + period_start: date | None = None + period_end: date | None = None + + # Generation lifecycle (computation) + generation_status: str + last_generated: datetime | None = None + + # Filing lifecycle (business) + filing_status: str + filed_at: datetime | None = None + filed_by: str | None = None + + # Restatement chain + supersedes_id: str | None = None + superseded_by_id: str | None = None + + # Sharing provenance (populated for received reports) + source_graph_id: str | None = None + source_report_id: str | None = None + shared_at: datetime | None = None + + # Authoring + entity_name: str | None = None + ai_generated: bool = False + created_at: datetime + created_by: str + + # Package items — one per FactSet attached to the Report. + items: list[ReportPackageItem] = Field(default_factory=list) diff --git a/robosystems/models/api/extensions/reports.py b/robosystems/models/api/extensions/reports.py index 05ea1716..0c7d49a0 100644 --- a/robosystems/models/api/extensions/reports.py +++ b/robosystems/models/api/extensions/reports.py @@ -96,6 +96,13 @@ class ReportResponse(BaseModel): structures: list[StructureSummary] = Field(default_factory=list) # Entity context entity_name: str | None = None + # Filing lifecycle (orthogonal to ``generation_status``) + filing_status: str = "draft" + filed_at: datetime | None = None + filed_by: str | None = None + # Restatement chain + supersedes_id: str | None = None + superseded_by_id: str | None = None # Sharing provenance (populated for received reports) source_graph_id: str | None = None source_report_id: str | None = None diff --git a/robosystems/operations/roboledger/commands/reports.py b/robosystems/operations/roboledger/commands/reports.py index c3442cc6..cf0f6e4a 100644 --- a/robosystems/operations/roboledger/commands/reports.py +++ b/robosystems/operations/roboledger/commands/reports.py @@ -436,6 +436,103 @@ def regenerate_report( return resp +# ── Filing lifecycle ────────────────────────────────────────────────────── + + +class InvalidFilingTransitionError(Exception): + """Raised when a filing-status transition isn't on the legal lifecycle graph.""" + + +# Legal transitions per the Plan-C lifecycle: +# draft ↔ under_review → filed ↔ archived +# ``filed`` is reached via :func:`file_report` so audit fields land cleanly; +# this map covers the non-file moves available to the generic transition op. +_LEGAL_NON_FILE_TRANSITIONS: dict[str, set[str]] = { + "draft": {"under_review"}, + "under_review": {"draft"}, + "filed": {"archived"}, +} + + +class ReportNotFiledError(Exception): + """Raised when an op requires a ``filed`` Report and got something else.""" + + +def file_report(session: Session, report_id: str, filed_by: str) -> ReportResponse: + """Transition a Report to ``filed`` — locks the package. + + Allowed from ``draft`` or ``under_review``. Stamps ``filed_at`` and + ``filed_by`` for audit. Raises :class:`ReportNotFoundError` when the + Report doesn't exist and :class:`InvalidFilingTransitionError` when + the current status isn't a legal source for filing. + + ``filing_status`` is orthogonal to ``generation_status`` — filing a + Report doesn't require ``generation_status='complete'``, but a UI + built on top of this normally gates the action on completion. + """ + from datetime import UTC, datetime + + from robosystems.operations.roboledger.reads.reports import ( + load_structures, + report_to_response, + resolve_entity_name, + ) + + report_def = session.get(Report, report_id) + if report_def is None: + raise ReportNotFoundError(report_id) + + if report_def.filing_status not in {"draft", "under_review"}: + raise InvalidFilingTransitionError( + f"Report '{report_id}' is in '{report_def.filing_status}'; " + f"can only file from 'draft' or 'under_review'." + ) + + report_def.filing_status = "filed" + report_def.filed_at = datetime.now(UTC) + report_def.filed_by = filed_by + session.flush() + + structures = load_structures(session, report_def.taxonomy_id) + entity_name = resolve_entity_name(session, report_def) + return report_to_response(report_def, structures, entity_name) + + +def transition_filing_status( + session: Session, report_id: str, target_status: str +) -> ReportResponse: + """Move a Report along the non-file legs of the filing lifecycle. + + Use :func:`file_report` to reach ``filed`` (so audit fields land). + Other transitions (submit for review, withdraw, archive) are routed + through here so the legal-transition graph stays in one place. + """ + from robosystems.operations.roboledger.reads.reports import ( + load_structures, + report_to_response, + resolve_entity_name, + ) + + report_def = session.get(Report, report_id) + if report_def is None: + raise ReportNotFoundError(report_id) + + legal_targets = _LEGAL_NON_FILE_TRANSITIONS.get(report_def.filing_status, set()) + if target_status not in legal_targets: + raise InvalidFilingTransitionError( + f"Report '{report_id}' cannot transition from " + f"'{report_def.filing_status}' to '{target_status}'. " + f"Legal targets from here: {sorted(legal_targets)}." + ) + + report_def.filing_status = target_status + session.flush() + + structures = load_structures(session, report_def.taxonomy_id) + entity_name = resolve_entity_name(session, report_def) + return report_to_response(report_def, structures, entity_name) + + def delete_report(session: Session, report_id: str, acting_user_id: str) -> bool: """Delete a report and its generated facts. diff --git a/robosystems/operations/roboledger/reads/reports.py b/robosystems/operations/roboledger/reads/reports.py index ad4457ef..be7a4902 100644 --- a/robosystems/operations/roboledger/reads/reports.py +++ b/robosystems/operations/roboledger/reads/reports.py @@ -9,10 +9,16 @@ from __future__ import annotations from datetime import date, timedelta +from typing import TYPE_CHECKING from sqlalchemy import select, text from sqlalchemy.orm import Session +if TYPE_CHECKING: + from robosystems.models.api.extensions.report_package import ( + ReportPackageEnvelope, + ) + from robosystems.models.api.extensions.reports import ( FactRowResponse, LiveFinancialStatementResponse, @@ -226,6 +232,11 @@ def report_to_response( last_generated=report_def.last_generated, structures=structures, entity_name=entity_name, + filing_status=report_def.filing_status, + filed_at=report_def.filed_at, + filed_by=report_def.filed_by, + supersedes_id=report_def.supersedes_id, + superseded_by_id=report_def.superseded_by_id, source_graph_id=report_def.source_graph_id, source_report_id=report_def.source_report_id, shared_at=report_def.shared_at, @@ -259,6 +270,103 @@ def get_report(session: Session, report_id: str) -> ReportResponse | None: return report_to_response(report_def, structures, entity_name) +# Display order for the package mode — drives ``ReportPackageItem.display_order`` +# when a Structure has no explicit ordering metadata of its own. New +# statement-family block types should be added here as they're seeded. +_STRUCTURE_TYPE_DISPLAY_ORDER: dict[str, int] = { + "balance_sheet": 1, + "income_statement": 2, + "cash_flow_statement": 3, + "equity_statement": 4, + "schedule": 100, +} + + +def get_report_package( + session: Session, report_id: str +) -> ReportPackageEnvelope | None: + """Rehydrate a Report as a package — metadata + N rendered items. + + Loads the Report row, then queries its FactSets via + ``fact_sets.report_id`` (the Report owns its FactSets — see + ``models/extensions/roboledger/fact_set.py``). Each FactSet is + rehydrated into a full ``InformationBlockEnvelope`` via + :func:`get_information_block_for_fact_set` so the frontend renders + the package without per-section refetches. + + Returns ``None`` when the Report doesn't exist. Items are ordered by + ``Structure.structure_type`` (BS → IS → CF → Equity → Schedule) with + ties broken by ``fact_set.created_at``. + """ + from robosystems.models.api.extensions.report_package import ( + ReportPackageEnvelope, + ReportPackageItem, + ) + from robosystems.models.extensions.roboledger import FactSet + from robosystems.operations.information_block.reads import ( + get_information_block_for_fact_set, + ) + + report_def = session.get(Report, report_id) + if report_def is None: + return None + + entity_name = resolve_entity_name(session, report_def) + + # Load FactSets (with their Structures) attached to this Report. + rows = session.execute( + select(FactSet, Structure) + .join(Structure, Structure.id == FactSet.structure_id, isouter=True) + .where(FactSet.report_id == report_id) + .order_by(FactSet.created_at) + ).all() + + items: list[ReportPackageItem] = [] + for fs, structure in rows: + envelope = get_information_block_for_fact_set(session, fs.id) + if envelope is None: + # FactSets whose Structure isn't a registered block type are + # skipped — they're a real data row but the package mode has no + # way to render them. + continue + structure_type = structure.structure_type if structure is not None else None + items.append( + ReportPackageItem( + fact_set_id=fs.id, + structure_id=fs.structure_id, + display_order=_STRUCTURE_TYPE_DISPLAY_ORDER.get(structure_type or "", 50), + block=envelope, + ) + ) + + items.sort(key=lambda it: (it.display_order, it.fact_set_id)) + + return ReportPackageEnvelope( + id=report_def.id, + name=report_def.name, + description=report_def.description, + taxonomy_id=report_def.taxonomy_id, + period_type=report_def.period_type, + period_start=report_def.period_start, + period_end=report_def.period_end, + generation_status=report_def.generation_status, + last_generated=report_def.last_generated, + filing_status=report_def.filing_status, + filed_at=report_def.filed_at, + filed_by=report_def.filed_by, + supersedes_id=report_def.supersedes_id, + superseded_by_id=report_def.superseded_by_id, + source_graph_id=report_def.source_graph_id, + source_report_id=report_def.source_report_id, + shared_at=report_def.shared_at, + entity_name=entity_name, + ai_generated=report_def.ai_generated, + created_at=report_def.created_at, + created_by=report_def.created_by, + items=items, + ) + + def get_statement( session: Session, report_id: str, structure_type: str ) -> StatementResponse | None: diff --git a/robosystems/routers/extensions/roboledger/operations.py b/robosystems/routers/extensions/roboledger/operations.py index 8a5b41b7..d85506fe 100644 --- a/robosystems/routers/extensions/roboledger/operations.py +++ b/robosystems/routers/extensions/roboledger/operations.py @@ -127,6 +127,10 @@ CreatePublishListRequest, UpdatePublishListRequest, ) +from robosystems.models.api.extensions.report_package import ( + FileReportRequest, + TransitionFilingStatusRequest, +) from robosystems.models.api.extensions.reports import ( CreateReportRequest, RegenerateReportRequest, @@ -268,6 +272,7 @@ update_publish_list as cmd_update_publish_list, ) from robosystems.operations.roboledger.commands.reports import ( + InvalidFilingTransitionError, NoEntityError, NotAuthorizedError, PublishListEmptyError, @@ -284,12 +289,18 @@ from robosystems.operations.roboledger.commands.reports import ( delete_report as cmd_delete_report, ) +from robosystems.operations.roboledger.commands.reports import ( + file_report as cmd_file_report, +) from robosystems.operations.roboledger.commands.reports import ( regenerate_report as cmd_regenerate_report, ) from robosystems.operations.roboledger.commands.reports import ( share_report as cmd_share_report, ) +from robosystems.operations.roboledger.commands.reports import ( + transition_filing_status as cmd_transition_filing_status, +) from robosystems.operations.roboledger.commands.schedules import ( ScheduleNotFoundError, ) @@ -1588,6 +1599,111 @@ def _runner(): return await _dispatch(ctx, _runner, cache) +@router.post( + "/file-report", + response_model=OperationEnvelope, + operation_id="opFileReport", + summary="File Report", + description=( + "Transitions the Report's filing_status to 'filed' — locks the package. " + "Allowed from 'draft' or 'under_review'. Stamps filed_at + filed_by." + ), + tags=[_OP_TAG], + dependencies=[_RATE_LIMIT], + responses={**OPERATION_ERROR_RESPONSES}, +) +@endpoint_metrics_decorator( + "/extensions/roboledger/{graph_id}/operations/file-report", + method="POST", + business_event_type="ledger_file_report", +) +async def file_report_op( + body: FileReportRequest, + graph_id: str = Path(..., pattern=GRAPH_OR_SUBGRAPH_ID_PATTERN), + user: User = Depends(get_current_user_with_graph), + _ext: GraphExtensionContext = Depends(_require_roboledger), + idempotency_key: str | None = Header(None, alias="Idempotency-Key"), + cache: IdempotencyCache = Depends(get_idempotency_cache), +) -> OperationEnvelope: + ctx = _ctx( + graph_id=graph_id, + user_id=str(user.id), + op="file-report", + idempotency_key=idempotency_key, + body=body, + ) + + def _runner(): + try: + with extensions_session(graph_id) as session: + try: + return cmd_file_report(session, body.report_id, filed_by=str(user.id)) + except ReportNotFoundError: + raise HTTPException( + status_code=404, detail=f"Report '{body.report_id}' not found." + ) + except InvalidFilingTransitionError as e: + raise HTTPException(status_code=422, detail=str(e)) + except (ValueError, ProgrammingError): + raise _ledger_404() + + return await _dispatch(ctx, _runner, cache) + + +@router.post( + "/transition-filing-status", + response_model=OperationEnvelope, + operation_id="opTransitionFilingStatus", + summary="Transition Filing Status", + description=( + "Move a Report along the non-file legs of the filing lifecycle " + "(draft ↔ under_review, filed → archived). Use 'file-report' to " + "reach 'filed' so audit fields land cleanly." + ), + tags=[_OP_TAG], + dependencies=[_RATE_LIMIT], + responses={**OPERATION_ERROR_RESPONSES}, +) +@endpoint_metrics_decorator( + "/extensions/roboledger/{graph_id}/operations/transition-filing-status", + method="POST", + business_event_type="ledger_transition_filing_status", +) +async def transition_filing_status_op( + body: TransitionFilingStatusRequest, + graph_id: str = Path(..., pattern=GRAPH_OR_SUBGRAPH_ID_PATTERN), + user: User = Depends(get_current_user_with_graph), + _ext: GraphExtensionContext = Depends(_require_roboledger), + idempotency_key: str | None = Header(None, alias="Idempotency-Key"), + cache: IdempotencyCache = Depends(get_idempotency_cache), +) -> OperationEnvelope: + ctx = _ctx( + graph_id=graph_id, + user_id=str(user.id), + op="transition-filing-status", + idempotency_key=idempotency_key, + body=body, + ) + + def _runner(): + try: + with extensions_session(graph_id) as session: + try: + return cmd_transition_filing_status( + session, body.report_id, body.target_status + ) + except ReportNotFoundError: + raise HTTPException( + status_code=404, detail=f"Report '{body.report_id}' not found." + ) + except InvalidFilingTransitionError as e: + raise HTTPException(status_code=422, detail=str(e)) + except (ValueError, ProgrammingError): + raise _ledger_404() + + return await _dispatch(ctx, _runner, cache) + + # ═══════════════════════════════════════════════════════════════════════════ # Publish Lists # ═══════════════════════════════════════════════════════════════════════════ From 9959969e4dea22b6b4b8da4d0431e94f88b9506b Mon Sep 17 00:00:00 2001 From: "Joseph T. French" Date: Sat, 25 Apr 2026 23:39:13 -0500 Subject: [PATCH 4/7] feat: Add tests for report filing lifecycle commands and package read functionality --- .../test_envelope_fact_set.py | 29 +++ .../information_block/test_reads.py | 61 +++++ .../commands/test_reports_filing.py | 182 +++++++++++++++ .../roboledger/reads/test_reports_package.py | 212 ++++++++++++++++++ 4 files changed, 484 insertions(+) create mode 100644 tests/operations/roboledger/commands/test_reports_filing.py create mode 100644 tests/operations/roboledger/reads/test_reports_package.py diff --git a/tests/operations/information_block/test_envelope_fact_set.py b/tests/operations/information_block/test_envelope_fact_set.py index ab57b8ab..93bb6dd4 100644 --- a/tests/operations/information_block/test_envelope_fact_set.py +++ b/tests/operations/information_block/test_envelope_fact_set.py @@ -14,6 +14,7 @@ from robosystems.models.api.information_block import FactSetLite from robosystems.operations.information_block.envelope import ( fact_set_to_lite, + load_fact_set_by_id_for_structure, load_latest_fact_set_for_structure, ) @@ -82,3 +83,31 @@ def test_projects_scalar_row_to_lite(self) -> None: assert lite is not None assert lite.id == "fs_2026Q1" assert lite.factset_type == "report" + + +class TestLoadFactSetByIdForStructure: + """Pin lookup for the Report-Block read path — match Structure + id, or None.""" + + def test_returns_none_when_no_match(self) -> None: + """The query joins on both id AND structure_id; a mismatched pin + surfaces as no row.""" + session = MagicMock() + result = MagicMock() + result.scalar.return_value = None + session.execute.return_value = result + + assert load_fact_set_by_id_for_structure(session, "struct_bs", "fs_wrong") is None + session.execute.assert_called_once() + + def test_projects_match_to_lite(self) -> None: + session = MagicMock() + result = MagicMock() + result.scalar.return_value = _make_fact_set( + fs_id="fs_pinned", structure_id="struct_bs" + ) + session.execute.return_value = result + + lite = load_fact_set_by_id_for_structure(session, "struct_bs", "fs_pinned") + assert lite is not None + assert lite.id == "fs_pinned" + assert lite.structure_id == "struct_bs" diff --git a/tests/operations/information_block/test_reads.py b/tests/operations/information_block/test_reads.py index 376bf0fa..9307dddf 100644 --- a/tests/operations/information_block/test_reads.py +++ b/tests/operations/information_block/test_reads.py @@ -17,6 +17,7 @@ ) from robosystems.operations.information_block.reads import ( get_information_block, + get_information_block_for_fact_set, list_information_blocks, ) from robosystems.operations.information_block.registry import SCHEDULE_BLOCK @@ -221,3 +222,63 @@ def test_list_surfaces_statement_blocks_on_tenant_graph(self) -> None: ) assert len(result) == 1 assert result[0].block_type == "balance_sheet" + + +# ── get_information_block_for_fact_set ───────────────────────────────────── + + +class TestGetInformationBlockForFactSet: + """The Report-Block read path: pin envelope rehydration to a FactSet id.""" + + def _patched_session( + self, + *, + fact_set: object | None, + structure: object | None = None, + ) -> MagicMock: + """Session whose ``get`` returns the FactSet first, Structure second.""" + session = MagicMock() + session.get.side_effect = [fact_set, structure] + return session + + def test_returns_none_when_fact_set_missing(self) -> None: + session = MagicMock() + session.get.return_value = None + assert get_information_block_for_fact_set(session, "fs_missing") is None + + def test_returns_none_when_fact_set_has_no_structure(self) -> None: + """Legacy FactSets with a null ``structure_id`` can't drive a handler.""" + fs = MagicMock() + fs.structure_id = None + session = MagicMock() + session.get.return_value = fs + assert get_information_block_for_fact_set(session, "fs_legacy") is None + + def test_returns_none_when_block_type_unregistered(self) -> None: + fs = MagicMock() + fs.id = "fs_01" + fs.structure_id = "struct_coa" + structure = MagicMock() + structure.structure_type = "chart_of_accounts" + session = self._patched_session(fact_set=fs, structure=structure) + + assert get_information_block_for_fact_set(session, "fs_01") is None + + def test_dispatches_with_fact_set_pin(self) -> None: + """The pin (fact_set_id) is forwarded to the handler unchanged.""" + fs = MagicMock() + fs.id = "fs_01" + fs.structure_id = "struct_1" + structure = MagicMock() + structure.structure_type = "schedule" + session = self._patched_session(fact_set=fs, structure=structure) + + expected = _envelope("struct_1") + mock_build = MagicMock(return_value=expected) + patched = _schedule_entry_with_build(mock_build) + + with patch.dict(REGISTRY_PATH, {"schedule": patched}): + result = get_information_block_for_fact_set(session, "fs_01") + + assert result is expected + mock_build.assert_called_once_with(session, "struct_1", "fs_01") diff --git a/tests/operations/roboledger/commands/test_reports_filing.py b/tests/operations/roboledger/commands/test_reports_filing.py new file mode 100644 index 00000000..9a535ad7 --- /dev/null +++ b/tests/operations/roboledger/commands/test_reports_filing.py @@ -0,0 +1,182 @@ +"""Tests for the filing lifecycle commands — file_report + transition_filing_status.""" + +from __future__ import annotations + +from datetime import UTC, date, datetime +from unittest.mock import MagicMock, patch + +import pytest + +from robosystems.operations.roboledger.commands.reports import ( + InvalidFilingTransitionError, + ReportNotFoundError, + file_report, + transition_filing_status, +) + + +def _make_report_def( + *, + filing_status: str = "draft", + filed_at: datetime | None = None, + filed_by: str | None = None, +) -> MagicMock: + """A MagicMock that mimics a Report ORM row well enough for these tests.""" + r = MagicMock() + r.id = "rpt_01" + r.name = "Q1 2026 Statements" + r.taxonomy_id = "tax_usgaap_reporting" + r.generation_status = "complete" + r.period_type = "quarterly" + r.period_start = date(2026, 1, 1) + r.period_end = date(2026, 3, 31) + r.comparative = True + r.periods = None + r.mapping_id = "map_01" + r.ai_generated = False + r.created_at = datetime(2026, 4, 1, tzinfo=UTC) + r.last_generated = datetime(2026, 4, 1, tzinfo=UTC) + r.filing_status = filing_status + r.filed_at = filed_at + r.filed_by = filed_by + r.supersedes_id = None + r.superseded_by_id = None + r.source_graph_id = None + r.source_report_id = None + r.shared_at = None + return r + + +def _patch_response_helpers(): + """Patch the helpers report_to_response calls so we don't need real DB data. + + ``file_report`` / ``transition_filing_status`` import these helpers at + call time (lazy import to break a module-level cycle), so the patch + has to land on the source module — patching the importer's namespace + is too late. + """ + return patch.multiple( + "robosystems.operations.roboledger.reads.reports", + load_structures=MagicMock(return_value=[]), + resolve_entity_name=MagicMock(return_value=None), + report_to_response=MagicMock(side_effect=lambda r, _s, _e: r), + ) + + +# ── file_report ──────────────────────────────────────────────────────────── + + +def test_file_report_transitions_draft_to_filed() -> None: + session = MagicMock() + session.get.return_value = _make_report_def(filing_status="draft") + + with _patch_response_helpers(): + result = file_report(session, "rpt_01", filed_by="user_01") + + assert result.filing_status == "filed" + assert result.filed_by == "user_01" + assert result.filed_at is not None + session.flush.assert_called_once() + + +def test_file_report_transitions_under_review_to_filed() -> None: + session = MagicMock() + session.get.return_value = _make_report_def(filing_status="under_review") + + with _patch_response_helpers(): + result = file_report(session, "rpt_01", filed_by="user_01") + + assert result.filing_status == "filed" + + +def test_file_report_rejects_already_filed() -> None: + session = MagicMock() + session.get.return_value = _make_report_def( + filing_status="filed", + filed_at=datetime(2026, 4, 10, tzinfo=UTC), + filed_by="user_old", + ) + + with pytest.raises(InvalidFilingTransitionError) as exc: + file_report(session, "rpt_01", filed_by="user_01") + + assert "filed" in str(exc.value) + + +def test_file_report_rejects_archived() -> None: + session = MagicMock() + session.get.return_value = _make_report_def(filing_status="archived") + + with pytest.raises(InvalidFilingTransitionError): + file_report(session, "rpt_01", filed_by="user_01") + + +def test_file_report_raises_when_report_missing() -> None: + session = MagicMock() + session.get.return_value = None + + with pytest.raises(ReportNotFoundError): + file_report(session, "rpt_missing", filed_by="user_01") + + +# ── transition_filing_status ────────────────────────────────────────────── + + +def test_transition_draft_to_under_review() -> None: + session = MagicMock() + session.get.return_value = _make_report_def(filing_status="draft") + + with _patch_response_helpers(): + result = transition_filing_status(session, "rpt_01", "under_review") + + assert result.filing_status == "under_review" + + +def test_transition_under_review_back_to_draft() -> None: + session = MagicMock() + session.get.return_value = _make_report_def(filing_status="under_review") + + with _patch_response_helpers(): + result = transition_filing_status(session, "rpt_01", "draft") + + assert result.filing_status == "draft" + + +def test_transition_filed_to_archived() -> None: + session = MagicMock() + session.get.return_value = _make_report_def( + filing_status="filed", + filed_at=datetime(2026, 4, 10, tzinfo=UTC), + filed_by="user_01", + ) + + with _patch_response_helpers(): + result = transition_filing_status(session, "rpt_01", "archived") + + assert result.filing_status == "archived" + + +def test_transition_rejects_filing_via_generic_path() -> None: + """``filed`` must be reached via :func:`file_report` so audit fields land.""" + session = MagicMock() + session.get.return_value = _make_report_def(filing_status="under_review") + + with pytest.raises(InvalidFilingTransitionError): + transition_filing_status(session, "rpt_01", "filed") + + +def test_transition_rejects_archive_from_draft() -> None: + """Archive is only legal from filed — direct draft → archived is illegal.""" + session = MagicMock() + session.get.return_value = _make_report_def(filing_status="draft") + + with pytest.raises(InvalidFilingTransitionError): + transition_filing_status(session, "rpt_01", "archived") + + +def test_transition_raises_when_report_missing() -> None: + session = MagicMock() + session.get.return_value = None + + with pytest.raises(ReportNotFoundError): + transition_filing_status(session, "rpt_missing", "under_review") diff --git a/tests/operations/roboledger/reads/test_reports_package.py b/tests/operations/roboledger/reads/test_reports_package.py new file mode 100644 index 00000000..49732fd3 --- /dev/null +++ b/tests/operations/roboledger/reads/test_reports_package.py @@ -0,0 +1,212 @@ +"""Tests for ``get_report_package`` — Plan-C package-mode read. + +Verifies the read path that drives ``/reports/[id]`` in package mode: +load Report metadata, look up its FactSets via ``fact_sets.report_id``, +rehydrate each as an ``InformationBlockEnvelope`` via +``get_information_block_for_fact_set``, and order items by +``Structure.structure_type`` (BS → IS → CF → Equity → Schedule). +""" + +from __future__ import annotations + +from datetime import UTC, date, datetime +from unittest.mock import MagicMock, patch + +from robosystems.models.api.information_block import ( + ArtifactResponse, + InformationBlockEnvelope, + InformationModelResponse, + StatementMechanics, +) +from robosystems.operations.roboledger.reads.reports import get_report_package + + +def _make_report_def() -> MagicMock: + r = MagicMock() + r.id = "rpt_01" + r.name = "Q1 2026 Statements" + r.description = None + r.taxonomy_id = "tax_usgaap_reporting" + r.period_type = "quarterly" + r.period_start = date(2026, 1, 1) + r.period_end = date(2026, 3, 31) + r.generation_status = "complete" + r.last_generated = datetime(2026, 4, 1, tzinfo=UTC) + r.filing_status = "draft" + r.filed_at = None + r.filed_by = None + r.supersedes_id = None + r.superseded_by_id = None + r.source_graph_id = None + r.source_report_id = None + r.shared_at = None + r.entity_name = None + r.ai_generated = False + r.created_at = datetime(2026, 4, 1, tzinfo=UTC) + r.created_by = "user_01" + return r + + +def _make_fact_set(*, fs_id: str, structure_id: str) -> MagicMock: + fs = MagicMock() + fs.id = fs_id + fs.structure_id = structure_id + fs.report_id = "rpt_01" + fs.created_at = datetime(2026, 4, 1, tzinfo=UTC) + return fs + + +def _make_structure(*, structure_id: str, structure_type: str) -> MagicMock: + s = MagicMock() + s.id = structure_id + s.structure_type = structure_type + return s + + +def _envelope(structure_id: str, block_type: str) -> InformationBlockEnvelope: + return InformationBlockEnvelope( + id=structure_id, + block_type=block_type, + name=f"{block_type}-{structure_id}", + display_name=block_type.replace("_", " ").title(), + category="Reporting", + information_model=InformationModelResponse(concept_arrangement="roll_up"), + artifact=ArtifactResponse(mechanics=StatementMechanics(kind="statement_renderer")), + ) + + +def _patch_response_helpers(): + return patch.multiple( + "robosystems.operations.roboledger.reads.reports", + resolve_entity_name=MagicMock(return_value=None), + ) + + +# ── Cases ─────────────────────────────────────────────────────────────── + + +def test_returns_none_when_report_missing() -> None: + session = MagicMock() + session.get.return_value = None + + with _patch_response_helpers(): + assert get_report_package(session, "rpt_missing") is None + + +def test_assembles_items_ordered_by_structure_type() -> None: + """IS comes back from the SQL query before BS; the result still puts + BS (display_order=1) ahead of IS (display_order=2).""" + session = MagicMock() + session.get.return_value = _make_report_def() + + fs_is = _make_fact_set(fs_id="fs_is", structure_id="struct_is") + fs_bs = _make_fact_set(fs_id="fs_bs", structure_id="struct_bs") + struct_is = _make_structure( + structure_id="struct_is", structure_type="income_statement" + ) + struct_bs = _make_structure(structure_id="struct_bs", structure_type="balance_sheet") + + # Return order: IS first, BS second — to prove the read sorts deterministically. + session.execute.return_value.all.return_value = [ + (fs_is, struct_is), + (fs_bs, struct_bs), + ] + + envelopes = { + "fs_is": _envelope("struct_is", "income_statement"), + "fs_bs": _envelope("struct_bs", "balance_sheet"), + } + + with ( + _patch_response_helpers(), + patch( + "robosystems.operations.information_block.reads.get_information_block_for_fact_set", + side_effect=lambda _s, fs_id: envelopes[fs_id], + ), + ): + pkg = get_report_package(session, "rpt_01") + + assert pkg is not None + assert [it.fact_set_id for it in pkg.items] == ["fs_bs", "fs_is"] + assert pkg.items[0].display_order == 1 # balance_sheet + assert pkg.items[1].display_order == 2 # income_statement + + +def test_skips_fact_sets_with_unregistered_block_types() -> None: + """A FactSet whose Structure isn't a registered block_type can't render — + the item is skipped silently rather than failing the whole package read.""" + session = MagicMock() + session.get.return_value = _make_report_def() + + fs_known = _make_fact_set(fs_id="fs_bs", structure_id="struct_bs") + fs_unknown = _make_fact_set(fs_id="fs_x", structure_id="struct_x") + struct_bs = _make_structure(structure_id="struct_bs", structure_type="balance_sheet") + struct_x = _make_structure( + structure_id="struct_x", structure_type="chart_of_accounts" + ) + session.execute.return_value.all.return_value = [ + (fs_known, struct_bs), + (fs_unknown, struct_x), + ] + + with ( + _patch_response_helpers(), + patch( + "robosystems.operations.information_block.reads.get_information_block_for_fact_set", + side_effect=lambda _s, fs_id: ( + _envelope("struct_bs", "balance_sheet") if fs_id == "fs_bs" else None + ), + ), + ): + pkg = get_report_package(session, "rpt_01") + + assert pkg is not None + assert [it.fact_set_id for it in pkg.items] == ["fs_bs"] + + +def test_carries_filing_lifecycle_fields_through() -> None: + """The package envelope surfaces filing_status, filed_at, filed_by, + supersedes/superseded chain, etc. from the underlying Report row.""" + session = MagicMock() + rd = _make_report_def() + rd.filing_status = "filed" + rd.filed_at = datetime(2026, 4, 15, tzinfo=UTC) + rd.filed_by = "user_filer" + rd.supersedes_id = "rpt_prior" + session.get.return_value = rd + session.execute.return_value.all.return_value = [] + + with _patch_response_helpers(): + pkg = get_report_package(session, "rpt_01") + + assert pkg is not None + assert pkg.filing_status == "filed" + assert pkg.filed_by == "user_filer" + assert pkg.supersedes_id == "rpt_prior" + assert pkg.items == [] + + +def test_unknown_structure_type_falls_back_to_default_order() -> None: + """Custom / future structure_types not in the display-order map land at the + fallback slot (50) — between statements (1-4) and schedules (100).""" + session = MagicMock() + session.get.return_value = _make_report_def() + + fs = _make_fact_set(fs_id="fs_custom", structure_id="struct_custom") + structure = _make_structure(structure_id="struct_custom", structure_type="custom") + session.execute.return_value.all.return_value = [(fs, structure)] + + envelope = _envelope("struct_custom", "custom") + + with ( + _patch_response_helpers(), + patch( + "robosystems.operations.information_block.reads.get_information_block_for_fact_set", + return_value=envelope, + ), + ): + pkg = get_report_package(session, "rpt_01") + + assert pkg is not None + assert len(pkg.items) == 1 + assert pkg.items[0].display_order == 50 From 11d67572675dec886d7286cd0475443c68f6a512 Mon Sep 17 00:00:00 2001 From: "Joseph T. French" Date: Sun, 26 Apr 2026 00:07:46 -0500 Subject: [PATCH 5/7] feat: Update dependencies and enhance report package handling with FactSet validation --- .vscode/extensions.json | 2 +- pyproject.toml | 2 +- .../adapters/sec/processors/classify.py | 12 +- robosystems/graphql/types/report_package.py | 33 ++-- .../operations/information_block/envelope.py | 16 +- .../operations/information_block/metric.py | 7 +- .../operations/information_block/schedule.py | 34 ++-- .../operations/roboledger/reads/reports.py | 6 +- tests/graphql/extensions/test_ledger.py | 104 +++++++++++ .../test_schedule_handlers.py | 65 ++++++- .../test_statement_handlers.py | 24 +-- .../extensions/roboledger/test_operations.py | 174 ++++++++++++++++++ uv.lock | 8 +- 13 files changed, 428 insertions(+), 59 deletions(-) diff --git a/.vscode/extensions.json b/.vscode/extensions.json index c0b0d667..d42d3003 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -6,6 +6,7 @@ "christian-kohler.npm-intellisense", "christian-kohler.path-intellisense", "dbaeumer.vscode-eslint", + "detachhead.basedpyright", "editorconfig.editorconfig", "esbenp.prettier-vscode", "fill-labs.dependi", @@ -14,7 +15,6 @@ "kddejong.vscode-cfn-lint", "ms-azuretools.vscode-containers", "ms-python.python", - "ms-python.vscode-pylance", "nefrob.vscode-just-syntax", "rvest.vs-code-prettier-eslint", "vitest.explorer", diff --git a/pyproject.toml b/pyproject.toml index c434765c..b3aeacec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -106,7 +106,7 @@ dependencies = [ [project.optional-dependencies] dev = [ # RoboSystems client for demo and testing - "robosystems-client==0.3.15", + "robosystems-client==0.3.16", # Testing framework "pytest>=8.4.0,<9.0", diff --git a/robosystems/adapters/sec/processors/classify.py b/robosystems/adapters/sec/processors/classify.py index b260a1ef..f6431c6c 100644 --- a/robosystems/adapters/sec/processors/classify.py +++ b/robosystems/adapters/sec/processors/classify.py @@ -754,11 +754,19 @@ def _build_structure_factsets( report_factset_rels: list[dict] = [] # SEC filings produce one Report per ingestion; pull its identifier - # so each new FactSet can emit a REPORT_HAS_FACT_SET edge. + # so each new FactSet can emit a REPORT_HAS_FACT_SET edge. Dedupe + # defensively — if a filing somehow loaded multiple Report nodes, + # we still emit one edge per (report, fact_set) pair without + # multiplying duplicates. report_ids: list[str] = [] try: report_rows = ctx.execute("MATCH (r:Report) RETURN r.identifier AS report_id") - report_ids = [row["report_id"] for row in report_rows if row.get("report_id")] + seen: set[str] = set() + for row in report_rows: + rid = row.get("report_id") + if rid and rid not in seen: + seen.add(rid) + report_ids.append(rid) except Exception as e: logger.debug(f"FactSet report lookup failed: {e}") diff --git a/robosystems/graphql/types/report_package.py b/robosystems/graphql/types/report_package.py index 4d2da23c..cc256ff8 100644 --- a/robosystems/graphql/types/report_package.py +++ b/robosystems/graphql/types/report_package.py @@ -6,10 +6,17 @@ are split into this file because they're hand-written rather than auto-derived from Pydantic (the ``block`` field needs the manual ``InformationBlock.from_pydantic`` projection). + +Date / datetime fields use Strawberry's native scalar types so the +GraphQL schema is consistent with the auto-derived ``Report`` type +(which exposes them via Strawberry's pydantic integration). Codegen +on the client side maps these to typed Date / DateTime values. """ from __future__ import annotations +import datetime as _dt + import strawberry from robosystems.graphql.types.information_block import InformationBlock @@ -50,14 +57,14 @@ class ReportPackage: description: str | None taxonomy_id: str period_type: str - period_start: str | None - period_end: str | None + period_start: _dt.date | None + period_end: _dt.date | None generation_status: str - last_generated: str | None + last_generated: _dt.datetime | None filing_status: str - filed_at: str | None + filed_at: _dt.datetime | None filed_by: str | None supersedes_id: str | None @@ -65,11 +72,11 @@ class ReportPackage: source_graph_id: str | None source_report_id: str | None - shared_at: str | None + shared_at: _dt.datetime | None entity_name: str | None ai_generated: bool - created_at: str + created_at: _dt.datetime created_by: str items: list[ReportPackageItem] @@ -82,23 +89,21 @@ def from_pydantic(cls, envelope: PydanticReportPackageEnvelope) -> ReportPackage description=envelope.description, taxonomy_id=envelope.taxonomy_id, period_type=envelope.period_type, - period_start=envelope.period_start.isoformat() if envelope.period_start else None, - period_end=envelope.period_end.isoformat() if envelope.period_end else None, + period_start=envelope.period_start, + period_end=envelope.period_end, generation_status=envelope.generation_status, - last_generated=( - envelope.last_generated.isoformat() if envelope.last_generated else None - ), + last_generated=envelope.last_generated, filing_status=envelope.filing_status, - filed_at=envelope.filed_at.isoformat() if envelope.filed_at else None, + filed_at=envelope.filed_at, filed_by=envelope.filed_by, supersedes_id=envelope.supersedes_id, superseded_by_id=envelope.superseded_by_id, source_graph_id=envelope.source_graph_id, source_report_id=envelope.source_report_id, - shared_at=envelope.shared_at.isoformat() if envelope.shared_at else None, + shared_at=envelope.shared_at, entity_name=envelope.entity_name, ai_generated=envelope.ai_generated, - created_at=envelope.created_at.isoformat(), + created_at=envelope.created_at, created_by=envelope.created_by, items=[ReportPackageItem.from_pydantic(it) for it in envelope.items], ) diff --git a/robosystems/operations/information_block/envelope.py b/robosystems/operations/information_block/envelope.py index 914c04cf..5d3f9d39 100644 --- a/robosystems/operations/information_block/envelope.py +++ b/robosystems/operations/information_block/envelope.py @@ -354,6 +354,16 @@ def load_base_envelope_atoms( if structure is None or structure.structure_type != expected_block_type: return None + # Validate the FactSet pin (when provided) before doing the heavy + # association / element / rule loads — a mismatched pin is a clean + # miss and shouldn't pay for atoms that won't be returned. + if fact_set_id is not None: + fact_set = load_fact_set_by_id_for_structure(session, structure_id, fact_set_id) + if fact_set is None: + return None + else: + fact_set = load_latest_fact_set_for_structure(session, structure_id) + taxonomy_name = session.execute( select(Taxonomy.name).where(Taxonomy.id == structure.taxonomy_id) ).scalar() @@ -388,12 +398,6 @@ def load_base_envelope_atoms( session, [a.id for a in associations] ) - if fact_set_id is not None: - fact_set = load_fact_set_by_id_for_structure(session, structure_id, fact_set_id) - if fact_set is None: - return None - else: - fact_set = load_latest_fact_set_for_structure(session, structure_id) verification_results = load_verification_results_for_structure(session, structure_id) return BaseEnvelopeAtoms( diff --git a/robosystems/operations/information_block/metric.py b/robosystems/operations/information_block/metric.py index 3b669ccc..6507b139 100644 --- a/robosystems/operations/information_block/metric.py +++ b/robosystems/operations/information_block/metric.py @@ -45,10 +45,15 @@ def build_envelope( metric block as a placeholder. ``fact_set_id`` is accepted for signature parity with the registry contract; metric blocks have no FactSet today so the pin is a no-op. + + TODO: honor ``fact_set_id`` once metric FactSets land — without the + pin a metric block surfaced inside a *filed* Report Block would + render today's evaluation rather than the snapshot reviewed at file + time. """ from sqlalchemy import select - del fact_set_id # currently unused; metric FactSet wiring lands later + del fact_set_id # see TODO above; metric FactSet wiring lands later structure = session.get(Structure, structure_id) if structure is None or structure.structure_type != METRIC_BLOCK_TYPE: diff --git a/robosystems/operations/information_block/schedule.py b/robosystems/operations/information_block/schedule.py index e3d362b9..7bfda222 100644 --- a/robosystems/operations/information_block/schedule.py +++ b/robosystems/operations/information_block/schedule.py @@ -153,11 +153,15 @@ def build_envelope( Returns ``None`` when the structure doesn't exist or isn't a schedule, so the generic reader can cleanly distinguish misses from errors. Mechanics are read from the typed ``artifact_mechanics`` column with - fallback to legacy ``metadata_`` JSONB. ``fact_set_id`` pins the - envelope to a specific FactSet snapshot (Report Block rehydration); - facts are still read from the Structure's full set since schedule - facts are stamped with ``structure_id`` directly — the pin only - scopes the ``fact_set`` projection. + fallback to legacy ``metadata_`` JSONB. + + ``fact_set_id`` pins the envelope to a specific FactSet snapshot — + the Report-Block rehydration path uses this to surface the frozen + fact slice that was reviewed at file time. When provided, facts are + filtered by ``fact_set_id`` so that viewing a filed Report shows the + exact snapshot rather than today's facts. The default (no pin) + publishes every in-scope fact for the Structure, which is what the + live closing-book mode expects. """ atoms = load_base_envelope_atoms( session, @@ -188,16 +192,16 @@ def build_envelope( # Schedules publish only in-scope facts — historical facts were # already reflected in opening balances and shouldn't surface as # envelope data (they'd confuse agents into re-drafting closed work). - facts = ( - session.execute( - select(Fact).where( - Fact.structure_id == structure_id, - Fact.fact_scope == "in_scope", - ) - ) - .scalars() - .all() - ) + # When a FactSet pin is supplied (Report-Block rehydration), facts + # are also scoped to that pinned snapshot so a filed Report renders + # the exact slice reviewed at file time, not today's drafts. + fact_filters = [ + Fact.structure_id == structure_id, + Fact.fact_scope == "in_scope", + ] + if fact_set_id is not None: + fact_filters.append(Fact.fact_set_id == fact_set_id) + facts = session.execute(select(Fact).where(*fact_filters)).scalars().all() return InformationBlockEnvelope( id=structure.id, diff --git a/robosystems/operations/roboledger/reads/reports.py b/robosystems/operations/roboledger/reads/reports.py index be7a4902..eb0e422d 100644 --- a/robosystems/operations/roboledger/reads/reports.py +++ b/robosystems/operations/roboledger/reads/reports.py @@ -314,9 +314,13 @@ def get_report_package( entity_name = resolve_entity_name(session, report_def) # Load FactSets (with their Structures) attached to this Report. + # Inner join is correct: a FactSet without a Structure can't drive a + # registered handler (``get_information_block_for_fact_set`` returns + # None for a null ``structure_id``), so the orphan is dropped here + # rather than dragged through the rehydration step. rows = session.execute( select(FactSet, Structure) - .join(Structure, Structure.id == FactSet.structure_id, isouter=True) + .join(Structure, Structure.id == FactSet.structure_id) .where(FactSet.report_id == report_id) .order_by(FactSet.created_at) ).all() diff --git a/tests/graphql/extensions/test_ledger.py b/tests/graphql/extensions/test_ledger.py index 795444db..fbfb101f 100644 --- a/tests/graphql/extensions/test_ledger.py +++ b/tests/graphql/extensions/test_ledger.py @@ -564,3 +564,107 @@ def test_bounds_apply_to_agents_too(self) -> None: ) assert result.errors is not None assert any("limit must be between" in str(e.message) for e in result.errors) + + +class TestReportPackageResolver: + """GraphQL ``reportPackage(reportId)`` field — Plan-C package mode read.""" + + def _make_envelope(self): + """Minimal ReportPackageEnvelope for resolver projection tests.""" + from datetime import UTC + from datetime import date as _date + from datetime import datetime as _dt + + from robosystems.models.api.extensions.report_package import ( + ReportPackageEnvelope, + ) + + return ReportPackageEnvelope( + id="rpt_01", + name="Q1 2026 Statements", + description=None, + taxonomy_id="tax_usgaap_reporting", + period_type="quarterly", + period_start=_date(2026, 1, 1), + period_end=_date(2026, 3, 31), + generation_status="complete", + last_generated=_dt(2026, 4, 1, tzinfo=UTC), + filing_status="filed", + filed_at=_dt(2026, 4, 15, tzinfo=UTC), + filed_by="usr_01", + supersedes_id=None, + superseded_by_id=None, + source_graph_id=None, + source_report_id=None, + shared_at=None, + entity_name="Acme LLC", + ai_generated=False, + created_at=_dt(2026, 4, 1, tzinfo=UTC), + created_by="usr_01", + items=[], + ) + + def test_returns_package_when_found(self) -> None: + envelope = self._make_envelope() + with ( + _patch_session(), + patch( + "robosystems.operations.roboledger.reads.reports.get_report_package", + return_value=envelope, + ), + ): + result = schema.execute_sync( + """ + query Q($id: String!) { + reportPackage(reportId: $id) { + id name filingStatus filedBy entityName items { factSetId } + } + } + """, + variable_values={"id": "rpt_01"}, + context_value=_ctx(), + ) + + assert result.errors is None + assert result.data is not None + pkg = result.data["reportPackage"] + assert pkg["id"] == "rpt_01" + assert pkg["filingStatus"] == "filed" + assert pkg["filedBy"] == "usr_01" + assert pkg["entityName"] == "Acme LLC" + assert pkg["items"] == [] + + def test_returns_null_when_report_missing(self) -> None: + with ( + _patch_session(), + patch( + "robosystems.operations.roboledger.reads.reports.get_report_package", + return_value=None, + ), + ): + result = schema.execute_sync( + "query Q($id: String!) { reportPackage(reportId: $id) { id } }", + variable_values={"id": "rpt_missing"}, + context_value=_ctx(), + ) + assert result.errors is None + assert result.data == {"reportPackage": None} + + def test_raises_typed_error_when_schema_not_initialized(self) -> None: + with ( + _patch_session(), + patch( + "robosystems.operations.roboledger.reads.reports.get_report_package", + side_effect=ProgrammingError("stmt", {}, Exception("schema missing")), + ), + ): + result = schema.execute_sync( + "query Q($id: String!) { reportPackage(reportId: $id) { id } }", + variable_values={"id": "rpt_01"}, + context_value=_ctx(), + ) + + assert result.errors is not None + err = result.errors[0] + assert "Ledger not initialized" in err.message + assert err.extensions == {"code": "LEDGER_NOT_INITIALIZED"} diff --git a/tests/operations/information_block/test_schedule_handlers.py b/tests/operations/information_block/test_schedule_handlers.py index a47bc7ac..51e17ae2 100644 --- a/tests/operations/information_block/test_schedule_handlers.py +++ b/tests/operations/information_block/test_schedule_handlers.py @@ -153,10 +153,10 @@ def test_packs_mechanics_from_typed_artifact_mechanics_column(self) -> None: structure.metadata_ = {"should_be_ignored": True} session.get.return_value = structure session.execute.side_effect = [ + _exec_result(scalar=None), # latest fact set → None _exec_result(scalar="US GAAP"), # taxonomy name _exec_result(scalars_all=[]), # associations _exec_result(scalars_all=[]), # rules - _exec_result(scalar=None), # latest fact set → None _exec_result(scalars_all=[]), # verification results _exec_result(scalar=5), # periods_with_entries count _exec_result(scalars_all=[]), # facts @@ -209,10 +209,10 @@ def test_legacy_fallback_raises_when_entry_template_missing(self) -> None: structure.metadata_ = {} # no entry_template key session.get.return_value = structure session.execute.side_effect = [ + _exec_result(scalar=None), # latest fact set → None _exec_result(scalar="US GAAP"), # taxonomy name _exec_result(scalars_all=[]), # associations _exec_result(scalars_all=[]), # rules - _exec_result(scalar=None), # latest fact set → None _exec_result(scalars_all=[]), # verification results _exec_result(scalar=0), # periods_with_entries ] @@ -247,10 +247,10 @@ def test_falls_back_to_metadata_jsonb_when_artifact_mechanics_null(self) -> None } session.get.return_value = structure session.execute.side_effect = [ + _exec_result(scalar=None), # latest fact set → None _exec_result(scalar="US GAAP"), # taxonomy name _exec_result(scalars_all=[]), # associations _exec_result(scalars_all=[]), # rules - _exec_result(scalar=None), # latest fact set → None _exec_result(scalars_all=[]), # verification results _exec_result(scalar=0), # periods_with_entries _exec_result(scalars_all=[]), # facts @@ -263,3 +263,62 @@ def test_falls_back_to_metadata_jsonb_when_artifact_mechanics_null(self) -> None assert mechanics.schedule_metadata is None assert mechanics.periods_with_entries == 0 assert envelope.information_model.concept_arrangement == "roll_forward" + + def test_facts_filtered_by_fact_set_pin_for_report_package(self) -> None: + """When a fact_set_id is supplied (Report-Block rehydration), the + facts query must filter by ``Fact.fact_set_id``. Without the pin + a filed Report would render today's drafts instead of the + snapshot the package was reviewed against.""" + from sqlalchemy.sql.elements import BinaryExpression + + session = MagicMock() + structure = MagicMock() + structure.id = "struct_dep" + structure.structure_type = "schedule" + structure.name = "Equipment Depreciation" + structure.description = None + structure.taxonomy_id = "tax_01" + structure.concept_arrangement = "roll_forward" + structure.member_arrangement = None + structure.parenthetical_note = None + structure.artifact_mechanics = { + "kind": "closing_entry_generator", + "entry_template": { + "debit_element_id": "elem_dep", + "credit_element_id": "elem_accum", + }, + } + structure.metadata_ = {} + session.get.return_value = structure + + fact_set = MagicMock() + fact_set.id = "fs_pinned" + fact_set.structure_id = "struct_dep" + fact_set.period_start = date(2026, 1, 1) + fact_set.period_end = date(2026, 1, 31) + fact_set.factset_type = "schedule" + fact_set.entity_id = "ent_demo" + fact_set.report_id = None + + session.execute.side_effect = [ + _exec_result(scalar=fact_set), # fact_set lookup (pinned) + _exec_result(scalar="US GAAP"), # taxonomy name + _exec_result(scalars_all=[]), # associations + _exec_result(scalars_all=[]), # rules + _exec_result(scalars_all=[]), # verification results + _exec_result(scalar=0), # periods_with_entries + _exec_result(scalars_all=[]), # facts (filtered by pin) + ] + + schedule_handlers.build_envelope(session, "struct_dep", fact_set_id="fs_pinned") + + # Inspect the facts query: the pin filter must appear in the WHERE. + facts_query = session.execute.call_args_list[6].args[0] + where_clause = str(facts_query.compile(compile_kwargs={"literal_binds": True})) + assert "fact_set_id = 'fs_pinned'" in where_clause + # And the structure / scope filters must still be present. + assert "structure_id = 'struct_dep'" in where_clause + assert "fact_scope = 'in_scope'" in where_clause + # Sanity check: BinaryExpression import is exercised so we don't + # silently lose the type if the SQLAlchemy WHERE shape changes. + del BinaryExpression diff --git a/tests/operations/information_block/test_statement_handlers.py b/tests/operations/information_block/test_statement_handlers.py index a241382f..9df2b40c 100644 --- a/tests/operations/information_block/test_statement_handlers.py +++ b/tests/operations/information_block/test_statement_handlers.py @@ -159,12 +159,13 @@ def test_returns_envelope_with_empty_facts_when_no_reports_exist(self) -> None: ) session.get.return_value = structure # Query order (no associations → no elements or classifications queries): - # taxonomy name → associations → rules → fact_set → verification_results + # fact_set (validated first so a stale pin doesn't pay for atom loads) + # → taxonomy name → associations → rules → verification_results session.execute.side_effect = [ + _exec_result(scalar=None), # latest fact set → None _exec_result(scalar="US GAAP"), # taxonomy name _exec_result(scalars_all=[]), # associations _exec_result(scalars_all=[]), # rules - _exec_result(scalar=None), # latest fact set → None _exec_result(scalars_all=[]), # verification results ] @@ -232,17 +233,16 @@ def test_loads_elements_and_associations_from_library_seed(self) -> None: element_sales.period_type = "duration" # Query order (1 association → elements + classifications queries run): - # taxonomy → associations → elements → rules → classifications → - # fact_set → verification_results → latest_report_id + # fact_set → taxonomy → associations → elements → rules → classifications → + # verification_results session.execute.side_effect = [ + _exec_result(scalar=None), # latest fact set → None _exec_result(scalar="US GAAP"), # taxonomy name _exec_result(scalars_all=[association]), # associations _exec_result(scalars_all=[element_revenue, element_sales]), # elements _exec_result(scalars_all=[]), # rules _exec_result(all_rows=[]), # association classifications - _exec_result(scalar=None), # latest fact set → None _exec_result(scalars_all=[]), # verification results - _exec_result(scalar=None), # latest report id → None (no reports) ] build = statement_handlers.make_statement_handlers("income_statement") @@ -314,17 +314,19 @@ def test_facts_populated_from_latest_fact_set(self) -> None: fact.fact_scope = "in_scope" fact.fact_set_id = "fset_balance_sheet_2026q1" - # Query order (Plan B — fact_set_id read path): - # taxonomy → associations → elements → rules → assoc-classifications → - # fact_set → verification_results → facts (filtered by fact_set.id) → + # Query order (Plan B — fact_set_id read path; FactSet validated + # first so the pin path can short-circuit on mismatch): + # fact_set → taxonomy → associations → elements → rules → + # assoc-classifications → verification_results → + # facts (filtered by fact_set.id) → # element-classifications (rendering projection trait lookup) session.execute.side_effect = [ + _exec_result(scalar=fact_set), # latest fact set _exec_result(scalar="US GAAP"), # taxonomy name _exec_result(scalars_all=[assoc]), # associations _exec_result(scalars_all=[element]), # elements _exec_result(scalars_all=[]), # rules _exec_result(all_rows=[]), # association classifications - _exec_result(scalar=fact_set), # latest fact set _exec_result(scalars_all=[]), # verification results _exec_result(scalars_all=[fact]), # facts (filtered by fact_set.id) _exec_result(all_rows=[]), # element classifications (Plan B) @@ -357,10 +359,10 @@ def test_display_metadata_is_block_type_specific(self, block_type: str) -> None: session.get.return_value = structure # No associations → no elements or classifications queries. session.execute.side_effect = [ + _exec_result(scalar=None), # latest fact set → None _exec_result(scalar="US GAAP"), _exec_result(scalars_all=[]), # associations _exec_result(scalars_all=[]), # rules - _exec_result(scalar=None), # latest fact set → None _exec_result(scalars_all=[]), # verification results ] diff --git a/tests/routers/extensions/roboledger/test_operations.py b/tests/routers/extensions/roboledger/test_operations.py index 30229ed3..3b2ccbda 100644 --- a/tests/routers/extensions/roboledger/test_operations.py +++ b/tests/routers/extensions/roboledger/test_operations.py @@ -27,10 +27,17 @@ JournalEntryResponse, UpdateJournalEntryRequest, ) +from robosystems.models.api.extensions.report_package import ( + FileReportRequest, + TransitionFilingStatusRequest, +) +from robosystems.models.api.extensions.reports import ReportResponse from robosystems.routers.extensions.roboledger.operations import ( AutoMapElementsOperation, auto_map_elements_op, delete_journal_entry_op, + file_report_op, + transition_filing_status_op, update_entity_op, update_journal_entry_op, ) @@ -601,3 +608,170 @@ async def test_422_when_posted(self) -> None: # `create-event-block(event_type='journal_entry_reversed')`. See # tests/operations/event_block/python_handlers/test_journal_entry_reversed.py # for coverage of the event-driven path. + + +# ── Filing lifecycle ops (Plan C) ────────────────────────────────────────── + + +def _make_filed_report_response() -> ReportResponse: + """Stand-in for the ReportResponse returned by file_report / transition.""" + return ReportResponse( + id="rpt_01", + name="Q1 2026 Statements", + taxonomy_id="tax_usgaap_reporting", + generation_status="complete", + period_type="quarterly", + period_start=date(2026, 1, 1), + period_end=date(2026, 3, 31), + comparative=True, + created_at=datetime(2026, 4, 1, tzinfo=UTC), + filing_status="filed", + filed_at=datetime(2026, 4, 15, tzinfo=UTC), + filed_by="usr_test123", + ) + + +class TestFileReportOp: + @pytest.mark.asyncio + async def test_happy_path_wraps_filed_report_in_envelope(self) -> None: + body = FileReportRequest(report_id="rpt_01") + with ( + patch( + "robosystems.routers.extensions.roboledger.operations.cmd_file_report", + return_value=_make_filed_report_response(), + ), + _mock_session_ctx() as mock_session, + ): + mock_session.return_value.__enter__ = MagicMock(return_value=MagicMock()) + mock_session.return_value.__exit__ = MagicMock(return_value=False) + + envelope = await file_report_op( + body=body, + graph_id=GRAPH_ID, + user=_make_user(), + idempotency_key=None, + cache=_FakeCache(), + ) + + assert isinstance(envelope, OperationEnvelope) + assert envelope.operation == "file-report" + assert envelope.status == "completed" + assert envelope.result is not None + assert envelope.result["filing_status"] == "filed" + assert envelope.result["filed_by"] == "usr_test123" + + @pytest.mark.asyncio + async def test_404_when_report_missing(self) -> None: + from robosystems.operations.roboledger.commands.reports import ReportNotFoundError + + body = FileReportRequest(report_id="rpt_missing") + with ( + patch( + "robosystems.routers.extensions.roboledger.operations.cmd_file_report", + side_effect=ReportNotFoundError("rpt_missing"), + ), + _mock_session_ctx() as mock_session, + ): + mock_session.return_value.__enter__ = MagicMock(return_value=MagicMock()) + mock_session.return_value.__exit__ = MagicMock(return_value=False) + + with pytest.raises(HTTPException) as exc: + await file_report_op( + body=body, + graph_id=GRAPH_ID, + user=_make_user(), + idempotency_key=None, + cache=_FakeCache(), + ) + assert exc.value.status_code == 404 + assert "rpt_missing" in exc.value.detail + + @pytest.mark.asyncio + async def test_422_when_transition_illegal(self) -> None: + from robosystems.operations.roboledger.commands.reports import ( + InvalidFilingTransitionError, + ) + + body = FileReportRequest(report_id="rpt_01") + with ( + patch( + "robosystems.routers.extensions.roboledger.operations.cmd_file_report", + side_effect=InvalidFilingTransitionError( + "Report 'rpt_01' is in 'archived'; can only file from 'draft' or 'under_review'." + ), + ), + _mock_session_ctx() as mock_session, + ): + mock_session.return_value.__enter__ = MagicMock(return_value=MagicMock()) + mock_session.return_value.__exit__ = MagicMock(return_value=False) + + with pytest.raises(HTTPException) as exc: + await file_report_op( + body=body, + graph_id=GRAPH_ID, + user=_make_user(), + idempotency_key=None, + cache=_FakeCache(), + ) + assert exc.value.status_code == 422 + assert "archived" in exc.value.detail + + +class TestTransitionFilingStatusOp: + @pytest.mark.asyncio + async def test_happy_path_returns_transitioned_report(self) -> None: + response = _make_filed_report_response() + response.filing_status = "under_review" + body = TransitionFilingStatusRequest( + report_id="rpt_01", target_status="under_review" + ) + with ( + patch( + "robosystems.routers.extensions.roboledger.operations.cmd_transition_filing_status", + return_value=response, + ), + _mock_session_ctx() as mock_session, + ): + mock_session.return_value.__enter__ = MagicMock(return_value=MagicMock()) + mock_session.return_value.__exit__ = MagicMock(return_value=False) + + envelope = await transition_filing_status_op( + body=body, + graph_id=GRAPH_ID, + user=_make_user(), + idempotency_key=None, + cache=_FakeCache(), + ) + assert envelope.operation == "transition-filing-status" + assert envelope.result is not None + assert envelope.result["filing_status"] == "under_review" + + @pytest.mark.asyncio + async def test_422_when_transition_illegal(self) -> None: + from robosystems.operations.roboledger.commands.reports import ( + InvalidFilingTransitionError, + ) + + body = TransitionFilingStatusRequest(report_id="rpt_01", target_status="filed") + with ( + patch( + "robosystems.routers.extensions.roboledger.operations.cmd_transition_filing_status", + side_effect=InvalidFilingTransitionError( + "Report 'rpt_01' cannot transition from 'under_review' to 'filed'." + ), + ), + _mock_session_ctx() as mock_session, + ): + mock_session.return_value.__enter__ = MagicMock(return_value=MagicMock()) + mock_session.return_value.__exit__ = MagicMock(return_value=False) + + with pytest.raises(HTTPException) as exc: + await transition_filing_status_op( + body=body, + graph_id=GRAPH_ID, + user=_make_user(), + idempotency_key=None, + cache=_FakeCache(), + ) + assert exc.value.status_code == 422 + assert "filed" in exc.value.detail diff --git a/uv.lock b/uv.lock index 2150315a..acf17df5 100644 --- a/uv.lock +++ b/uv.lock @@ -3464,7 +3464,7 @@ requires-dist = [ { name = "retrying", specifier = ">=1.4.0,<2.0" }, { name = "rich", marker = "extra == 'dev'", specifier = ">=14.0.0,<15.0" }, { name = "robosystems-client", specifier = ">=0.3.14" }, - { name = "robosystems-client", marker = "extra == 'dev'", specifier = "==0.3.15" }, + { name = "robosystems-client", marker = "extra == 'dev'", specifier = "==0.3.16" }, { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.12.0,<1.0" }, { name = "sqlalchemy", specifier = ">=2.0.0,<3.0" }, { name = "sse-starlette", specifier = ">=3.3.0,<4.0" }, @@ -3487,7 +3487,7 @@ lambda = [ [[package]] name = "robosystems-client" -version = "0.3.15" +version = "0.3.16" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, @@ -3496,9 +3496,9 @@ dependencies = [ { name = "python-dateutil" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a3/17/2b0c9de685ad30f6c4fdb06f3e806f33c2dcee7017589834e256729e877a/robosystems_client-0.3.15.tar.gz", hash = "sha256:bbc60e8211dda9bd98ab3cd68545ab310f91667c96ab0973eed51a0302410a48", size = 293072, upload-time = "2026-04-25T23:53:28.51Z" } +sdist = { url = "https://files.pythonhosted.org/packages/91/12/e1295852dfe565ee7953d63a1119377340cc58f62697b107ffa11b6d465a/robosystems_client-0.3.16.tar.gz", hash = "sha256:5b477dd1bdc6bd37e31bbca6d4c302da23f5a04a0a88e574c267a2c5fa8d35f6", size = 295865, upload-time = "2026-04-26T04:50:17.721Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/28/68/69d5f5b11841d15d08d636a7e314e814642a87f99093a0647c4430b815f3/robosystems_client-0.3.15-py3-none-any.whl", hash = "sha256:dacd89b6e17302ec2e97b5fd895af7fbc92a7b2bb942c35f1a6721e414091fa7", size = 677538, upload-time = "2026-04-25T23:53:26.408Z" }, + { url = "https://files.pythonhosted.org/packages/b0/76/0ff7ee4fed99c83e15756b36f56dc3fa08fd391191807e9549798e1dad5d/robosystems_client-0.3.16-py3-none-any.whl", hash = "sha256:a03cee03ea20f140c91d9c6f5d5a0e6af13a24a0185a742c833ff17140399c21", size = 684171, upload-time = "2026-04-26T04:50:15.937Z" }, ] [[package]] From 57eb8f7f61a334cde7c942360bd4d60c6353ff54 Mon Sep 17 00:00:00 2001 From: "Joseph T. French" Date: Sun, 26 Apr 2026 00:46:51 -0500 Subject: [PATCH 6/7] feat(roboledger_demo): Add synthetic data generation and accounting policies - Implemented `oltp_writer.py` to write demo data into a DuckDB file for OLTPLoader. - Created `policies.py` containing accounting policy documents for Cascade Advisory Group LLC. - Added `prompt.md` for AI month-end close workflow instructions. - Developed `validate.py` for Information Block validation, ensuring integrity of schedules and rules. - Updated `justfile` to include a new demo command for RoboLedger. - Refactored taxonomy command documentation to reflect changes in demo structure. - Modified test cases to use the new session context for RoboLedger operations. --- .claude/worktrees/elegant-johnson-846da4 | 1 - .gitignore | 1 + README.md | 2 +- examples/README.md | 2 +- .../{close_demo => roboledger_demo}/README.md | 11 +- .../__init__.py | 0 .../{close_demo => roboledger_demo}/_reset.py | 2 +- .../{close_demo => roboledger_demo}/data.py | 0 .../{close_demo => roboledger_demo}/main.py | 107 ++++++++++++++++-- .../mappings.py | 0 .../oltp_writer.py | 6 +- .../policies.py | 0 .../{close_demo => roboledger_demo}/prompt.md | 0 .../validate.py | 6 +- justfile | 8 +- .../roboledger/commands/taxonomies.py | 2 +- .../extensions/roboledger/test_operations.py | 20 +++- 17 files changed, 133 insertions(+), 35 deletions(-) delete mode 160000 .claude/worktrees/elegant-johnson-846da4 rename examples/{close_demo => roboledger_demo}/README.md (90%) rename examples/{close_demo => roboledger_demo}/__init__.py (100%) rename examples/{close_demo => roboledger_demo}/_reset.py (99%) rename examples/{close_demo => roboledger_demo}/data.py (100%) rename examples/{close_demo => roboledger_demo}/main.py (86%) rename examples/{close_demo => roboledger_demo}/mappings.py (100%) rename examples/{close_demo => roboledger_demo}/oltp_writer.py (97%) rename examples/{close_demo => roboledger_demo}/policies.py (100%) rename examples/{close_demo => roboledger_demo}/prompt.md (100%) rename examples/{close_demo => roboledger_demo}/validate.py (98%) diff --git a/.claude/worktrees/elegant-johnson-846da4 b/.claude/worktrees/elegant-johnson-846da4 deleted file mode 160000 index 67eea26c..00000000 --- a/.claude/worktrees/elegant-johnson-846da4 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 67eea26c2c61801272c84cd69e3d2bd3b2f73e98 diff --git a/.gitignore b/.gitignore index 45ab1f03..40576c99 100644 --- a/.gitignore +++ b/.gitignore @@ -132,6 +132,7 @@ dmypy.json # AI - Claude Code .claude/settings.local.json .claude/scheduled_tasks.lock +.claude/worktrees/ CLAUDE.local.md .gemini/ diff --git a/README.md b/README.md index 57ab93e3..9ae583ec 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ See RoboSystems in action with runnable demos that create graphs, load data, and ```bash just demo-sec # Loads NVIDIA's SEC XBRL data via Dagster pipeline -just demo-close # Entity accounting month close demo +just demo-roboledger # End-to-end RoboLedger demo: bulk OLTP, schedules, FY 2025 filed report, AI close just demo-custom-graph # Builds custom graph schema with relationship networks ``` diff --git a/examples/README.md b/examples/README.md index 3b990b21..0f532e8d 100644 --- a/examples/README.md +++ b/examples/README.md @@ -12,7 +12,7 @@ just start just demo # Or run individual demos -just demo-close +just demo-roboledger just demo-custom-graph just demo-sec NVDA 2025 ``` diff --git a/examples/close_demo/README.md b/examples/roboledger_demo/README.md similarity index 90% rename from examples/close_demo/README.md rename to examples/roboledger_demo/README.md index 2303a9df..1ec17c07 100644 --- a/examples/close_demo/README.md +++ b/examples/roboledger_demo/README.md @@ -1,6 +1,6 @@ -# AI Month-End Close Demo +# RoboLedger End-to-End Demo -Demonstrates the full AI-assisted month-end close workflow using synthetic data for a boutique consulting firm (Cascade Advisory Group LLC). +Demonstrates the full RoboLedger workflow — bulk OLTP import, taxonomy & schedule blocks, fiscal calendar, a filed FY 2025 annual report, and an AI-driven month-end close — using synthetic data for a boutique consulting firm (Cascade Advisory Group LLC). Data is generated on a **rolling 16-month window ending at the current month**, so the demo always covers "recent history" no matter when it's run. The OLTP load path is the same `OLTPLoader` the QuickBooks pipeline uses in production — synthetic data is written to a DuckDB file in the exact shape dbt produces, then handed off to the loader. @@ -13,13 +13,13 @@ The demo also initializes a **fiscal calendar** with `closed_through = month_bef just start # Run the demo setup (creates graph, loads data, creates schedules, uploads policies) -uv run python -m examples.close_demo.main +uv run python -m examples.roboledger_demo.main # Or load into an existing graph -uv run python -m examples.close_demo.main +uv run python -m examples.roboledger_demo.main # Dry run (validate data only) -uv run python -m examples.close_demo.main --dry-run +uv run python -m examples.roboledger_demo.main --dry-run ``` ## What Gets Created @@ -36,6 +36,7 @@ uv run python -m examples.close_demo.main --dry-run | **Schedules** | 6 | 2 depreciation + 4 prepaid amortization schedules (staggered renewals) | | **Schedule Facts** | mixed | Historical (pre-target) vs in_scope (target onward) — close workflow only acts on in_scope | | **Documents** | 4 | Close procedures, depreciation policy, prepaid policy, revenue policy | +| **FY 2025 Report** | 1 | Annual report — generated, packaged, and **filed** as a Plan C capstone (Report Block lifecycle end-to-end). The current period stays queued for the AI close workflow. | ## The Company diff --git a/examples/close_demo/__init__.py b/examples/roboledger_demo/__init__.py similarity index 100% rename from examples/close_demo/__init__.py rename to examples/roboledger_demo/__init__.py diff --git a/examples/close_demo/_reset.py b/examples/roboledger_demo/_reset.py similarity index 99% rename from examples/close_demo/_reset.py rename to examples/roboledger_demo/_reset.py index cd4d41e8..7122599b 100644 --- a/examples/close_demo/_reset.py +++ b/examples/roboledger_demo/_reset.py @@ -1,4 +1,4 @@ -"""Demo-only reset logic for close_demo. +"""Demo-only reset logic for roboledger_demo. This is NOT a production operation. It selectively wipes demo-generated state while preserving graph infrastructure (entity, library-seeded diff --git a/examples/close_demo/data.py b/examples/roboledger_demo/data.py similarity index 100% rename from examples/close_demo/data.py rename to examples/roboledger_demo/data.py diff --git a/examples/close_demo/main.py b/examples/roboledger_demo/main.py similarity index 86% rename from examples/close_demo/main.py rename to examples/roboledger_demo/main.py index 25da3fec..6e0056df 100644 --- a/examples/close_demo/main.py +++ b/examples/roboledger_demo/main.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 -"""Cascade Advisory Group LLC — AI Month-End Close Demo +"""Cascade Advisory Group LLC — RoboLedger End-to-End Demo Sets up a complete demo environment with synthetic consulting company data, -CoA→GAAP mappings, depreciation/prepaid schedules, and accounting policy -documents. After running, use Claude Desktop or MCP tools to simulate -a month-end close. +CoA→GAAP mappings, depreciation/prepaid schedules, accounting policy +documents, and a filed FY 2025 annual report. After running, use Claude +Desktop or MCP tools to simulate the close workflow on the queued period. Data is generated for a rolling 16-month window ending at the current month, so the demo stays evergreen. OLTP load goes through the same `OLTPLoader` @@ -21,10 +21,10 @@ access in `_reset.py` — this is intentionally NOT a product operation. Usage: - uv run python -m examples.close_demo.main # Create new graph + load - uv run python -m examples.close_demo.main # Load into existing graph - uv run python -m examples.close_demo.main --dry-run # Validate data only - uv run python -m examples.close_demo.main --ai # Use MappingAgent instead of hardcoded mappings (requires Bedrock) + uv run python -m examples.roboledger_demo.main # Create new graph + load + uv run python -m examples.roboledger_demo.main # Load into existing graph + uv run python -m examples.roboledger_demo.main --dry-run # Validate data only + uv run python -m examples.roboledger_demo.main --ai # Use MappingAgent instead of hardcoded mappings (requires Bedrock) Requires: Docker stack running (just start) """ @@ -507,7 +507,7 @@ def initialize_fiscal_calendar(graph_id: str) -> str: graph_id, closed_through=closed_through, earliest_data_period=demo_start_period, - note="close_demo initialization", + note="roboledger_demo initialization", ) fc = result.get("fiscal_calendar", {}) @@ -673,6 +673,86 @@ def materialize_graph(graph_id: str) -> None: print(f" WARNING: Materialization failed: {result.error or result.message}") +# --------------------------------------------------------------------------- +# Step 7: Generate + file FY 2025 annual report (Plan C capstone) +# --------------------------------------------------------------------------- + + +def generate_fy2025_report(graph_id: str) -> str | None: + """Create a published, filed FY 2025 annual report. + + Exercises the Report Block lifecycle end-to-end: ``create-report`` → + ``get-report-package`` → ``file-report``. The result is a frozen, + filed snapshot of the prior year visible at ``/reports/{id}`` in the + package viewer, alongside the queued-for-close current period. + + Returns the report_id (or None on failure) so the caller can print + the viewer URL. + """ + # NOTE: ``LedgerClient.create_report`` discards the synchronous result + # (it returns only operation_id + status). The server runs create-report + # synchronously and inlines the report row in the envelope's ``result``, + # so we bypass the wrapper and call the generated API directly to get + # the report_id back. Drop this once the SDK helper is fixed to return + # the full envelope. + from robosystems_client.api.extensions_robo_ledger.op_create_report import ( + sync_detailed as api_create_report, + ) + from robosystems_client.models import CreateReportRequest + + client = _get_ledger_client() + + # Find the coa_mapping structure created during the taxonomy seed + structures = client.list_structures(graph_id, structure_type="coa_mapping") + if not structures: + print(" ERROR: No coa_mapping structure — was the CoA created?") + return None + mapping_id = structures[0]["id"] + + # Find the FAC reporting taxonomy (seeded by the platform on graph create) + taxonomies = client.list_taxonomies(graph_id, taxonomy_type="reporting_standard") + fac_tax = next((t for t in taxonomies if t.get("name", "").startswith("fac")), None) + if not fac_tax: + print(" ERROR: No FAC reporting taxonomy seeded on this graph") + return None + taxonomy_id = fac_tax["id"] + + body = CreateReportRequest( + name="FY 2025 Annual Report", + mapping_id=mapping_id, + taxonomy_id=taxonomy_id, + period_start=date(2025, 1, 1), + period_end=date(2025, 12, 31), + period_type="annual", + comparative=False, + ) + response = api_create_report(graph_id=graph_id, body=body, client=client._get_client()) + envelope = client._call_op("Create report", response) + payload = envelope.result if isinstance(envelope.result, dict) else {} + report_id = payload.get("id") or payload.get("report_id") + if not report_id: + print(f" WARNING: No report_id in result: {payload}") + return None + print(f" Generated: {report_id}") + + # Pull the package to confirm it rehydrates + package = client.get_report_package(graph_id, report_id) + if package: + items = package.get("items", []) or [] + print( + f" Package: {len(items)} block(s) — {', '.join(i.get('block_type', '?') for i in items)}" + ) + + # File it — flips filing_status draft → filed + try: + client.file_report(graph_id, report_id) + print(" Filed: ✓") + except Exception as e: + print(f" WARNING: file_report failed: {e}") + + return report_id + + # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- @@ -692,7 +772,7 @@ def main() -> None: # Validate data txns = get_all_transactions() - print(f"\n{COMPANY_NAME} — Close Demo Setup") + print(f"\n{COMPANY_NAME} — RoboLedger Demo Setup") print("=" * 60) print(f" Accounts: {len(ACCOUNTS)}") print(f" Transactions: {len(txns)}") @@ -776,6 +856,10 @@ def main() -> None: print("\nMaterializing to graph...") materialize_graph(graph_id) + # Generate + file FY 2025 annual report (Plan C capstone) + print("\nGenerating FY 2025 annual report...") + fy2025_report_id = generate_fy2025_report(graph_id) + # Summary print("\n" + "=" * 60) print(f" Graph ID: {graph_id}") @@ -789,6 +873,9 @@ def main() -> None: print("\n Ready for AI close workflow!") print(f"\n Close target: {close_target} ({close_label})") + if fy2025_report_id: + print(f" FY 2025: filed ({fy2025_report_id})") + print(f" Viewer URL: /reports/{fy2025_report_id}?graph={graph_id}") print("\n Next steps:") print(" 1. Open Claude Desktop (or MCP client)") print(f" 2. Switch to workspace: {graph_id}") diff --git a/examples/close_demo/mappings.py b/examples/roboledger_demo/mappings.py similarity index 100% rename from examples/close_demo/mappings.py rename to examples/roboledger_demo/mappings.py diff --git a/examples/close_demo/oltp_writer.py b/examples/roboledger_demo/oltp_writer.py similarity index 97% rename from examples/close_demo/oltp_writer.py rename to examples/roboledger_demo/oltp_writer.py index b83b1fc7..64efa4ca 100644 --- a/examples/close_demo/oltp_writer.py +++ b/examples/roboledger_demo/oltp_writer.py @@ -24,9 +24,9 @@ def write_demo_duckdb( Args: duckdb_path: Destination .duckdb file (overwritten if it exists). accounts: List of (code, name, classification, sub_classification, - balance_type, parent_code) tuples from close_demo.data.ACCOUNTS. + balance_type, parent_code) tuples from roboledger_demo.data.ACCOUNTS. transactions: List of (date, type, description, merchant, lines) tuples - from close_demo.data.get_all_transactions(), where `lines` is + from roboledger_demo.data.get_all_transactions(), where `lines` is [(account_code, debit_cents, credit_cents), ...]. Returns: @@ -270,7 +270,7 @@ def _insert_line_items(con, rows: list[tuple]) -> None: def default_duckdb_path() -> Path: """Return the default working path for the demo DuckDB file.""" - return Path("/tmp/close_demo/close_demo.duckdb") + return Path("/tmp/roboledger_demo/roboledger_demo.duckdb") if __name__ == "__main__": diff --git a/examples/close_demo/policies.py b/examples/roboledger_demo/policies.py similarity index 100% rename from examples/close_demo/policies.py rename to examples/roboledger_demo/policies.py diff --git a/examples/close_demo/prompt.md b/examples/roboledger_demo/prompt.md similarity index 100% rename from examples/close_demo/prompt.md rename to examples/roboledger_demo/prompt.md diff --git a/examples/close_demo/validate.py b/examples/roboledger_demo/validate.py similarity index 98% rename from examples/close_demo/validate.py rename to examples/roboledger_demo/validate.py index 1446177e..a71bdf64 100644 --- a/examples/close_demo/validate.py +++ b/examples/roboledger_demo/validate.py @@ -1,4 +1,4 @@ -"""Information Block validation — Step 7 of the close demo. +"""Information Block validation — Step 7 of the roboledger demo. Runs after schedules are created to verify: - FactSet rows exist for every schedule @@ -9,7 +9,7 @@ Can also be run standalone against any graph: - uv run python -m examples.close_demo.validate + uv run python -m examples.roboledger_demo.validate """ from __future__ import annotations @@ -481,7 +481,7 @@ def _main() -> None: else: graph_id = creds.get("graphs", {}).get("cascade_demo", "") if not graph_id: - print("Usage: uv run python -m examples.close_demo.validate ") + print("Usage: uv run python -m examples.roboledger_demo.validate ") sys.exit(1) print(f"{_BOLD}Information Block Validation{_RESET} graph={graph_id}") diff --git a/justfile b/justfile index a125490d..ece13de9 100644 --- a/justfile +++ b/justfile @@ -328,7 +328,7 @@ migrate-reset db="platform" env=_local_env: # Run all demos demo: - @just demo-close + @just demo-roboledger @just demo-custom-graph @just demo-sec @@ -351,9 +351,9 @@ demo-sec-subscribe plan="sec-starter": demo-sec-query *args: uv run examples/sec_demo/query_examples.py {{ args }} -# Run AI close demo — sets up synthetic consulting company with schedules, mappings, and policies -demo-close *args="": - EXTENSIONS_ENABLED=true UV_ENV_FILE={{_local_env}} uv run python -m examples.close_demo.main {{args}} +# Run RoboLedger end-to-end demo — synthetic consulting company, schedules, mappings, policies, FY 2025 filed report, and a queued period for AI close +demo-roboledger *args="": + EXTENSIONS_ENABLED=true UV_ENV_FILE={{_local_env}} uv run python -m examples.roboledger_demo.main {{args}} # Run custom graph demo end-to-end (flags: new-user,new-graph,skip-queries) demo-custom-graph flags="new-graph" real_s3="false" base_url="http://localhost:8000": diff --git a/robosystems/operations/roboledger/commands/taxonomies.py b/robosystems/operations/roboledger/commands/taxonomies.py index 739e09fb..adb59a02 100644 --- a/robosystems/operations/roboledger/commands/taxonomies.py +++ b/robosystems/operations/roboledger/commands/taxonomies.py @@ -6,7 +6,7 @@ transport errors. They are the single source of truth for taxonomy-layer writes. The -REST operation surface, MCP tools, agents, and seeders (close_demo) +REST operation surface, MCP tools, agents, and seeders (roboledger_demo) all delegate here. """ diff --git a/tests/routers/extensions/roboledger/test_operations.py b/tests/routers/extensions/roboledger/test_operations.py index 3b2ccbda..b36ee2ff 100644 --- a/tests/routers/extensions/roboledger/test_operations.py +++ b/tests/routers/extensions/roboledger/test_operations.py @@ -640,7 +640,9 @@ async def test_happy_path_wraps_filed_report_in_envelope(self) -> None: "robosystems.routers.extensions.roboledger.operations.cmd_file_report", return_value=_make_filed_report_response(), ), - _mock_session_ctx() as mock_session, + patch( + "robosystems.routers.extensions.roboledger.operations.extensions_session" + ) as mock_session, ): mock_session.return_value.__enter__ = MagicMock(return_value=MagicMock()) mock_session.return_value.__exit__ = MagicMock(return_value=False) @@ -670,7 +672,9 @@ async def test_404_when_report_missing(self) -> None: "robosystems.routers.extensions.roboledger.operations.cmd_file_report", side_effect=ReportNotFoundError("rpt_missing"), ), - _mock_session_ctx() as mock_session, + patch( + "robosystems.routers.extensions.roboledger.operations.extensions_session" + ) as mock_session, ): mock_session.return_value.__enter__ = MagicMock(return_value=MagicMock()) mock_session.return_value.__exit__ = MagicMock(return_value=False) @@ -700,7 +704,9 @@ async def test_422_when_transition_illegal(self) -> None: "Report 'rpt_01' is in 'archived'; can only file from 'draft' or 'under_review'." ), ), - _mock_session_ctx() as mock_session, + patch( + "robosystems.routers.extensions.roboledger.operations.extensions_session" + ) as mock_session, ): mock_session.return_value.__enter__ = MagicMock(return_value=MagicMock()) mock_session.return_value.__exit__ = MagicMock(return_value=False) @@ -730,7 +736,9 @@ async def test_happy_path_returns_transitioned_report(self) -> None: "robosystems.routers.extensions.roboledger.operations.cmd_transition_filing_status", return_value=response, ), - _mock_session_ctx() as mock_session, + patch( + "robosystems.routers.extensions.roboledger.operations.extensions_session" + ) as mock_session, ): mock_session.return_value.__enter__ = MagicMock(return_value=MagicMock()) mock_session.return_value.__exit__ = MagicMock(return_value=False) @@ -760,7 +768,9 @@ async def test_422_when_transition_illegal(self) -> None: "Report 'rpt_01' cannot transition from 'under_review' to 'filed'." ), ), - _mock_session_ctx() as mock_session, + patch( + "robosystems.routers.extensions.roboledger.operations.extensions_session" + ) as mock_session, ): mock_session.return_value.__enter__ = MagicMock(return_value=MagicMock()) mock_session.return_value.__exit__ = MagicMock(return_value=False) From a0450fb2bb92cf7761b5a5e8abfb8fd2f5e801f1 Mon Sep 17 00:00:00 2001 From: "Joseph T. French" Date: Sun, 26 Apr 2026 01:03:20 -0500 Subject: [PATCH 7/7] feat: Enhance report filing lifecycle with status checks and database cleanup --- examples/roboledger_demo/_reset.py | 39 ++++++++-- examples/roboledger_demo/main.py | 19 +++-- .../versions/0006_report_package_lifecycle.py | 6 +- .../adapters/sec/processors/classify.py | 8 +- .../models/extensions/roboledger/report.py | 2 +- .../operations/roboledger/commands/reports.py | 36 +++++++-- .../extensions/roboledger/operations.py | 3 + robosystems/taxonomy/loaders/jsonld_loader.py | 41 +++++++--- .../commands/test_reports_filing.py | 76 +++++++++++++++++++ 9 files changed, 192 insertions(+), 38 deletions(-) diff --git a/examples/roboledger_demo/_reset.py b/examples/roboledger_demo/_reset.py index 7122599b..58c679f5 100644 --- a/examples/roboledger_demo/_reset.py +++ b/examples/roboledger_demo/_reset.py @@ -50,19 +50,35 @@ def reset_demo_state(graph_id: str) -> None: with extensions_session(graph_id) as session: # 1-3. All entries + line items + transactions (tenant-generated; - # the library itself never authors these). + # the library itself never authors these). Dimension tables + # FK back to their parent rows so wipe them first. + session.execute(text("DELETE FROM line_item_dimensions")) session.execute(text("DELETE FROM line_items")) + session.execute(text("DELETE FROM entry_dimensions")) session.execute(text("DELETE FROM entries")) + session.execute(text("DELETE FROM transaction_dimensions")) session.execute(text("DELETE FROM transactions")) + # 3b. Events + their dimensions. Events power journal_entry_recorded + # / asset_disposed / schedule_entry_due — none library-seeded. Must + # delete event_dimensions first (FK), then events (which referenced + # entries above via triggered_by_event_id), then agents (events.agent_id). + session.execute(text("DELETE FROM event_dimensions")) + session.execute(text("DELETE FROM events")) + session.execute(text("DELETE FROM agents")) + session.execute(text("DELETE FROM dimensions")) + # 4. Tenant-origin associations. The library's immutability triggers # raise on any UPDATE/DELETE of rows with created_by='library-seeder', # so we filter those out explicitly. CoA mapping arcs created by the # demo (created_by='coa-classifier' or the demo user) are fair game. + # association_classifications FK associations → wipe first. + session.execute(text("DELETE FROM association_classifications")) session.execute( text("DELETE FROM associations WHERE created_by != :seeder"), {"seeder": _LIBRARY_SEEDER}, ) + session.execute(text("DELETE FROM classifications")) # 5. Facts session.execute(text("DELETE FROM facts")) @@ -107,13 +123,18 @@ def reset_demo_state(graph_id: str) -> None: ), {"seeder": _LIBRARY_SEEDER}, ) - session.execute( - text( - "DELETE FROM fact_sets WHERE structure_id IN " - "(SELECT id FROM structures WHERE created_by != :seeder)" - ), - {"seeder": _LIBRARY_SEEDER}, - ) + # 6c.iii. Reports + dependents. Reports are not library-seeded, so + # wipe everything. report_shares.report_id and publish_list_members + # FK to their parents — delete children first. fact_sets.report_id + # FKs reports, so all fact_sets must go before reports. + session.execute(text("DELETE FROM report_shares")) + session.execute(text("DELETE FROM publish_list_members")) + session.execute(text("DELETE FROM publish_lists")) + # All fact_sets — the structure-id filter above only catches tenant- + # owned structures, but report fact_sets attach to library structures + # (e.g. fac default). Safe to wipe all: no fact_sets are library-seeded. + session.execute(text("DELETE FROM fact_sets")) + session.execute(text("DELETE FROM reports")) # 7. Tenant-origin structures (any the demo created — anchor # structure, schedules, report layouts). @@ -121,6 +142,8 @@ def reset_demo_state(graph_id: str) -> None: text("DELETE FROM structures WHERE created_by != :seeder"), {"seeder": _LIBRARY_SEEDER}, ) + session.execute(text("DELETE FROM structure_templates")) + session.execute(text("DELETE FROM event_handlers")) # 8. Entity taxonomy linkages (will be re-created by demo setup). session.query(EntityTaxonomy).delete(synchronize_session=False) diff --git a/examples/roboledger_demo/main.py b/examples/roboledger_demo/main.py index 6e0056df..7107ba87 100644 --- a/examples/roboledger_demo/main.py +++ b/examples/roboledger_demo/main.py @@ -709,13 +709,20 @@ def generate_fy2025_report(graph_id: str) -> str | None: return None mapping_id = structures[0]["id"] - # Find the FAC reporting taxonomy (seeded by the platform on graph create) - taxonomies = client.list_taxonomies(graph_id, taxonomy_type="reporting_standard") - fac_tax = next((t for t in taxonomies if t.get("name", "").startswith("fac")), None) - if not fac_tax: - print(" ERROR: No FAC reporting taxonomy seeded on this graph") + # Find the FAC presentation taxonomy. This is where the proper + # income_statement / cash_flow_statement structures live (with + # associations to FAC elements). The bare ``fac v1`` reporting_standard + # only has a default placeholder structure with no associations, which + # is why the report would otherwise have zero rendered statements. + taxonomies = client.list_taxonomies(graph_id, taxonomy_type="mapping") + fac_pres = next( + (t for t in taxonomies if t.get("name", "").startswith("fac-presentation")), + None, + ) + if not fac_pres: + print(" ERROR: No fac-presentation taxonomy seeded on this graph") return None - taxonomy_id = fac_tax["id"] + taxonomy_id = fac_pres["id"] body = CreateReportRequest( name="FY 2025 Annual Report", diff --git a/migrations/extensions/versions/0006_report_package_lifecycle.py b/migrations/extensions/versions/0006_report_package_lifecycle.py index 31d36489..2091a973 100644 --- a/migrations/extensions/versions/0006_report_package_lifecycle.py +++ b/migrations/extensions/versions/0006_report_package_lifecycle.py @@ -46,7 +46,7 @@ def _add_lifecycle_columns_in_tenant(conn, schema: str) -> None: t = TenantOps(conn, schema) t.add_column("reports", "filing_status", "VARCHAR", nullable=False, default="'draft'") - t.add_column("reports", "filed_at", "TIMESTAMP") + t.add_column("reports", "filed_at", "TIMESTAMP WITH TIME ZONE") t.add_column("reports", "filed_by", "VARCHAR") t.add_column("reports", "supersedes_id", "VARCHAR") t.add_column("reports", "superseded_by_id", "VARCHAR") @@ -111,7 +111,9 @@ def upgrade() -> None: server_default="draft", ), ) - op.add_column("reports", sa.Column("filed_at", sa.DateTime(), nullable=True)) + op.add_column( + "reports", sa.Column("filed_at", sa.DateTime(timezone=True), nullable=True) + ) op.add_column("reports", sa.Column("filed_by", sa.String(), nullable=True)) op.add_column("reports", sa.Column("supersedes_id", sa.String(), nullable=True)) op.add_column("reports", sa.Column("superseded_by_id", sa.String(), nullable=True)) diff --git a/robosystems/adapters/sec/processors/classify.py b/robosystems/adapters/sec/processors/classify.py index f6431c6c..3384614f 100644 --- a/robosystems/adapters/sec/processors/classify.py +++ b/robosystems/adapters/sec/processors/classify.py @@ -768,7 +768,13 @@ def _build_structure_factsets( seen.add(rid) report_ids.append(rid) except Exception as e: - logger.debug(f"FactSet report lookup failed: {e}") + # A miss here means REPORT_HAS_FACT_SET edges never land for this + # filing — the package query won't be able to traverse from Report + # to its FactSets. Log loud enough to be noticed in CloudWatch. + logger.warning( + f"FactSet report lookup failed; REPORT_HAS_FACT_SET edges will " + f"be missing for this filing: {e}" + ) # Get all structures and their elements (both FROM and TO) try: diff --git a/robosystems/models/extensions/roboledger/report.py b/robosystems/models/extensions/roboledger/report.py index ee0d6893..f06994a9 100644 --- a/robosystems/models/extensions/roboledger/report.py +++ b/robosystems/models/extensions/roboledger/report.py @@ -83,7 +83,7 @@ class Report(ExtensionsBase): # Filing lifecycle — orthogonal to generation_status. ``filed`` is # the immutable locked state; ``archived`` is for superseded versions. filing_status = Column(String, nullable=False, default="draft") - filed_at = Column(DateTime, nullable=True) + filed_at = Column(DateTime(timezone=True), nullable=True) filed_by = Column(String, nullable=True) # Restatement chain — restating a filed Report creates a new row with diff --git a/robosystems/operations/roboledger/commands/reports.py b/robosystems/operations/roboledger/commands/reports.py index cf0f6e4a..dd0c182e 100644 --- a/robosystems/operations/roboledger/commands/reports.py +++ b/robosystems/operations/roboledger/commands/reports.py @@ -461,14 +461,15 @@ class ReportNotFiledError(Exception): def file_report(session: Session, report_id: str, filed_by: str) -> ReportResponse: """Transition a Report to ``filed`` — locks the package. - Allowed from ``draft`` or ``under_review``. Stamps ``filed_at`` and - ``filed_by`` for audit. Raises :class:`ReportNotFoundError` when the - Report doesn't exist and :class:`InvalidFilingTransitionError` when - the current status isn't a legal source for filing. - - ``filing_status`` is orthogonal to ``generation_status`` — filing a - Report doesn't require ``generation_status='complete'``, but a UI - built on top of this normally gates the action on completion. + Allowed from ``draft`` or ``under_review`` and only when generation + has reached ``published``. Stamps ``filed_at`` and ``filed_by`` for + audit. Raises :class:`ReportNotFoundError` when the Report doesn't + exist and :class:`InvalidFilingTransitionError` when the current + filing or generation status isn't a legal source for filing. + + ``filing_status`` and ``generation_status`` are orthogonal axes, but + filing an in-progress or failed report would lock an empty / partial + snapshot — so the server gates on ``generation_status='published'``. """ from datetime import UTC, datetime @@ -487,6 +488,16 @@ def file_report(session: Session, report_id: str, filed_by: str) -> ReportRespon f"Report '{report_id}' is in '{report_def.filing_status}'; " f"can only file from 'draft' or 'under_review'." ) + # ``complete`` and ``published`` both mean "generation finished + # successfully" in this codebase (see closing_book.py:63 which treats + # them interchangeably). Filing a ``pending`` / ``generating`` / + # ``failed`` report would lock an empty or partial snapshot. + if report_def.generation_status not in {"complete", "published"}: + raise InvalidFilingTransitionError( + f"Report '{report_id}' has generation_status=" + f"'{report_def.generation_status}'; can only file once generation " + f"has reached 'complete' or 'published'." + ) report_def.filing_status = "filed" report_def.filed_at = datetime.now(UTC) @@ -537,6 +548,9 @@ def delete_report(session: Session, report_id: str, acting_user_id: str) -> bool """Delete a report and its generated facts. Raises `NotAuthorizedError` if the caller doesn't own the report. + Raises `ReportNotFiledError` if the report is in a locked filing + state (``filed`` or ``archived``) — the Report Block lifecycle treats + filed/archived as immutable so the audit trail can't be erased. Returns True if a row was deleted, False if the report did not exist. """ report_def = session.get(Report, report_id) @@ -544,6 +558,12 @@ def delete_report(session: Session, report_id: str, acting_user_id: str) -> bool return False if report_def.created_by != acting_user_id: raise NotAuthorizedError("Not authorized to delete this report.") + if report_def.filing_status in {"filed", "archived"}: + raise ReportNotFiledError( + f"Report '{report_id}' is '{report_def.filing_status}' and cannot " + f"be deleted. Reach 'archived' via transition-filing-status if " + f"retiring; deletion is only available for 'draft' or 'under_review'." + ) session.execute( text("DELETE FROM facts WHERE report_id = :report_id"), diff --git a/robosystems/routers/extensions/roboledger/operations.py b/robosystems/routers/extensions/roboledger/operations.py index d85506fe..be9f4514 100644 --- a/robosystems/routers/extensions/roboledger/operations.py +++ b/robosystems/routers/extensions/roboledger/operations.py @@ -276,6 +276,7 @@ NoEntityError, NotAuthorizedError, PublishListEmptyError, + ReportNotFiledError, ReportNotFoundError, ReportNotPublishedError, TaxonomyNotFoundError, @@ -1530,6 +1531,8 @@ def _runner(): raise HTTPException( status_code=403, detail="Not authorized to delete this report." ) + except ReportNotFiledError as e: + raise HTTPException(status_code=422, detail=str(e)) except (ValueError, ProgrammingError): raise _ledger_404() if not deleted: diff --git a/robosystems/taxonomy/loaders/jsonld_loader.py b/robosystems/taxonomy/loaders/jsonld_loader.py index 1b18c338..67d3b209 100644 --- a/robosystems/taxonomy/loaders/jsonld_loader.py +++ b/robosystems/taxonomy/loaders/jsonld_loader.py @@ -547,25 +547,42 @@ def _extract_rules(graph: Graph) -> list[RuleSpec]: def _extract_structures(graph: Graph) -> list[StructureSpec]: - """Extract extended link roles as structures.""" + """Extract extended link roles as structures. + + The seed JSON-LD may carry an explicit ``structureType`` for each + extended-link role (the authoritative classification). When present, + it wins. Only roles missing that property fall through to the + name-based heuristic on the ``roleUri`` — and the heuristic must stay + permissive (it sees abbreviations like ``BS-classified`` / + ``IS-multistep`` in real-world taxonomies, not full English). + """ structures: list[StructureSpec] = [] role_pred = URIRef(f"{RS_NS}roleUri") name_pred = URIRef(f"{RS_NS}structureName") + type_pred = URIRef(f"{RS_NS}structureType") for subject, role_uri in graph.subject_objects(role_pred): names = list(graph.objects(subject, name_pred)) name = str(names[0]) if names else str(role_uri).rsplit("/", 1)[-1] - # Heuristic structure_type from role URI - role_str = str(role_uri).lower() - if "balancesheet" in role_str: - stype = "balance_sheet" - elif "income" in role_str or "operations" in role_str: - stype = "income_statement" - elif "cashflow" in role_str: - stype = "cash_flow_statement" - elif "equity" in role_str or "changesin" in role_str: - stype = "equity_statement" + explicit_types = list(graph.objects(subject, type_pred)) + if explicit_types: + stype = str(explicit_types[0]) else: - stype = "custom" + role_str = str(role_uri).lower() + if "balancesheet" in role_str or "/bs-" in role_str or "/bs/" in role_str: + stype = "balance_sheet" + elif ( + "income" in role_str + or "operations" in role_str + or "/is-" in role_str + or "/is/" in role_str + ): + stype = "income_statement" + elif "cashflow" in role_str or "/cf-" in role_str or "/cf/" in role_str: + stype = "cash_flow_statement" + elif "equity" in role_str or "changesin" in role_str: + stype = "equity_statement" + else: + stype = "custom" structures.append( StructureSpec(name=name, role_uri=str(role_uri), structure_type=stype) ) diff --git a/tests/operations/roboledger/commands/test_reports_filing.py b/tests/operations/roboledger/commands/test_reports_filing.py index 9a535ad7..7dc4f360 100644 --- a/tests/operations/roboledger/commands/test_reports_filing.py +++ b/tests/operations/roboledger/commands/test_reports_filing.py @@ -9,7 +9,10 @@ from robosystems.operations.roboledger.commands.reports import ( InvalidFilingTransitionError, + NotAuthorizedError, + ReportNotFiledError, ReportNotFoundError, + delete_report, file_report, transition_filing_status, ) @@ -111,6 +114,37 @@ def test_file_report_rejects_archived() -> None: file_report(session, "rpt_01", filed_by="user_01") +def test_file_report_blocks_when_generation_status_pending() -> None: + """Filing must reject reports whose generation hasn't completed. + + An in-progress / pending / failed generation_status would lock an + empty or partial snapshot in the audit trail. The server gates on + generation_status reaching ``complete`` or ``published`` regardless + of what the UI thinks. + """ + session = MagicMock() + report = _make_report_def(filing_status="draft") + report.generation_status = "generating" + session.get.return_value = report + + with pytest.raises(InvalidFilingTransitionError) as exc: + file_report(session, "rpt_01", filed_by="user_01") + + assert "generating" in str(exc.value) + + +def test_file_report_blocks_when_generation_status_failed() -> None: + session = MagicMock() + report = _make_report_def(filing_status="draft") + report.generation_status = "failed" + session.get.return_value = report + + with pytest.raises(InvalidFilingTransitionError) as exc: + file_report(session, "rpt_01", filed_by="user_01") + + assert "failed" in str(exc.value) + + def test_file_report_raises_when_report_missing() -> None: session = MagicMock() session.get.return_value = None @@ -180,3 +214,45 @@ def test_transition_raises_when_report_missing() -> None: with pytest.raises(ReportNotFoundError): transition_filing_status(session, "rpt_missing", "under_review") + + +# ─── delete_report — filing-status immutability guard ─────────────────────── + + +def test_delete_report_blocks_filed_status() -> None: + """Filed reports cannot be deleted — the audit trail is immutable. + + Reaching ``archived`` via ``transition_filing_status`` is the only + retirement path once a report is filed. + """ + session = MagicMock() + report = _make_report_def(filing_status="filed", filed_by="user_01") + report.created_by = "user_01" + session.get.return_value = report + + with pytest.raises(ReportNotFiledError) as exc: + delete_report(session, "rpt_01", acting_user_id="user_01") + + assert "filed" in str(exc.value) + + +def test_delete_report_blocks_archived_status() -> None: + session = MagicMock() + report = _make_report_def(filing_status="archived", filed_by="user_01") + report.created_by = "user_01" + session.get.return_value = report + + with pytest.raises(ReportNotFiledError): + delete_report(session, "rpt_01", acting_user_id="user_01") + + +def test_delete_report_authorization_check_runs_before_status_check() -> None: + """Owner check should fire before the filing-status check so a hostile + caller can't probe filing status by attempting deletion.""" + session = MagicMock() + report = _make_report_def(filing_status="filed", filed_by="user_01") + report.created_by = "user_01" + session.get.return_value = report + + with pytest.raises(NotAuthorizedError): + delete_report(session, "rpt_01", acting_user_id="user_other")