From e64b38fd2d300e6545414561b60b6eadc7b0da0b Mon Sep 17 00:00:00 2001
From: orin <zaqwerss@outlook.com>
Date: Fri, 22 May 2026 14:42:42 +0800
Subject: [PATCH] implement phase 2 fact validation surfaces

---
 src/agent/nodes.py                |  14 +-
 src/agent/state.py                |   2 +
 src/api/routes.py                 |  16 ++
 src/cli.py                        |   6 +
 src/finance/facts.py              |  33 ++--
 src/finance/validators.py         | 257 +++++++++++++++++++++++++++++-
 src/schemas/models.py             |  28 ++++
 tests/test_finance_facts.py       |  35 +++-
 tests/test_routes_web.py          |  38 +++++
 tests/test_validators_extended.py | 102 +++++++++++-
 web/src/api/docs.ts               |   4 +
 web/src/api/types.ts              |  24 +++
 12 files changed, 542 insertions(+), 17 deletions(-)

diff --git a/src/agent/nodes.py b/src/agent/nodes.py
index 5d83c15..314f9f4 100644
--- a/src/agent/nodes.py
+++ b/src/agent/nodes.py
@@ -15,7 +15,7 @@
 from src.finance.normalizer import normalize_account_name
 from src.finance.signals import generate_signals
 from src.finance.utils import table_rows
-from src.finance.validators import validate_statements
+from src.finance.validators import validate_facts, validate_statements
 from src.llm.base import StructuredPromptRequest, get_default_llm_client, get_llm_client, get_llm_model_config, langsmith_metadata
 from src.pdf.extractor import PDFExtractor
 from src.pdf.tables import extract_tables as extract_tables_from_pdf
@@ -259,11 +259,15 @@ def extract_financial_statements(state: AgentState) -> AgentState:
 
 def validate_and_reconcile(state: AgentState) -> AgentState:
     start_ms = monotonic_ms()
+    if not state.facts:
+        state.facts = facts_from_statements(state.doc_meta, state.statements)
     state.validation_results = validate_statements(state.statements)
+    state.fact_validation_results = validate_facts(state.doc_meta, state.facts, state.statements)
     severe = any(
         issue in {"balance_equation_failed", "balance_missing_totals"} or issue.startswith("unit_mismatch")
         for issue in state.validation_results.get("issues", [])
     )
+    severe = severe or any(issue.severity == "high" for issue in (state.fact_validation_results.issues if state.fact_validation_results else []))
     # Remove any previous validation_failed entries before deciding, ensuring idempotency
     state.errors = [err for err in state.errors if err != "validation_failed"]
     if severe:
@@ -559,8 +563,14 @@ def finalize(state: AgentState) -> AgentState:
     store.save_json(state.doc_meta.doc_id, "extracted/tables.json", [t.model_dump() for t in state.tables])
     store.save_json(state.doc_meta.doc_id, "extracted/statements.json", {k: v.model_dump() for k, v in state.statements.items()})
     if not state.facts:
-        state.facts = facts_from_statements(state.doc_meta.doc_id, state.statements)
+        state.facts = facts_from_statements(state.doc_meta, state.statements)
     store.save_json(state.doc_meta.doc_id, "extracted/facts.json", [fact.model_dump(mode="json") for fact in state.facts])
+    if state.fact_validation_results:
+        store.save_json(
+            state.doc_meta.doc_id,
+            "extracted/fact_validation.json",
+            state.fact_validation_results.model_dump(mode="json"),
+        )
     if state.corrections:
         store.save_json(
             state.doc_meta.doc_id,
diff --git a/src/agent/state.py b/src/agent/state.py
index a0de973..93c9edf 100644
--- a/src/agent/state.py
+++ b/src/agent/state.py
@@ -13,6 +13,7 @@
     DocumentMeta,
     EventStudyResult,
     ExtractionTrace,
+    FactValidationResult,
     FinancialFact,
     FinancialStatement,
     KeyNote,
@@ -33,6 +34,7 @@ class AgentState(BaseModel):
     statements: dict[str, FinancialStatement] = Field(default_factory=dict)
     notes: list[KeyNote] = Field(default_factory=list)
     validation_results: dict[str, Any] = Field(default_factory=dict)
+    fact_validation_results: FactValidationResult | None = None
     risk_signals: list[RiskSignal] = Field(default_factory=list)
     facts: list[FinancialFact] = Field(default_factory=list)
     corrections: list[Correction] = Field(default_factory=list)
diff --git a/src/api/routes.py b/src/api/routes.py
index a3afaf4..72d85d4 100644
--- a/src/api/routes.py
+++ b/src/api/routes.py
@@ -140,6 +140,9 @@ async def create_document(
     background_tasks: BackgroundTasks,
     file: UploadFile = File(...),
     company: str | None = Form(default=None),
+    ticker: str | None = Form(default=None),
+    cik: str | None = Form(default=None),
+    filing_type: str | None = Form(default=None),
     period_end: str | None = Form(default=None),
     report_type: str | None = Form(default=None),
     language: str | None = Form(default=None),
@@ -174,6 +177,9 @@ async def create_document(
         doc_id=doc_id,
         filename=safe_filename,
         company=company,
+        ticker=ticker,
+        cik=cik,
+        filing_type=filing_type,
         period_end=parsed_period,
         report_type=report_type,
         language=language,
@@ -274,6 +280,14 @@ async def get_facts(_auth: _AuthDep, doc_id: str):
     return _ok(data)
 
 
+@router.get("/documents/{doc_id}/fact-validation")
+async def get_fact_validation(_auth: _AuthDep, doc_id: str):
+    data = store.load_json(doc_id, "extracted/fact_validation.json")
+    if data is None:
+        return _err("not_found", "Fact validation not found")
+    return _ok(data)
+
+
 @router.get("/documents/{doc_id}/notes")
 async def get_notes(_auth: _AuthDep, doc_id: str):
     data = store.load_json(doc_id, "extracted/notes.json")
@@ -532,6 +546,8 @@ def _save_partial_results(doc_id: str) -> None:
             s.save_json(doc_id, "extracted/statements.json", {k: v.model_dump() for k, v in partial.statements.items()})
         if partial.facts:
             s.save_json(doc_id, "extracted/facts.json", [fact.model_dump(mode="json") for fact in partial.facts])
+        if partial.fact_validation_results:
+            s.save_json(doc_id, "extracted/fact_validation.json", partial.fact_validation_results.model_dump(mode="json"))
         if partial.notes:
             s.save_json(doc_id, "extracted/notes.json", [n.model_dump() for n in partial.notes])
         if partial.risk_signals:
diff --git a/src/cli.py b/src/cli.py
index 0f94459..a9bf62d 100644
--- a/src/cli.py
+++ b/src/cli.py
@@ -22,6 +22,9 @@ def analyze(
     pdf: str = typer.Option(..., help="Path to PDF file"),
     out: str = typer.Option("data", help="Output directory"),
     company: str | None = typer.Option(None, help="Company name"),
+    ticker: str | None = typer.Option(None, help="Ticker symbol"),
+    cik: str | None = typer.Option(None, help="CIK identifier"),
+    filing_type: str | None = typer.Option(None, help="Filing type (for example 10-K or 10-Q)"),
     period_end: str | None = typer.Option(None, help="Report period end (YYYY-MM-DD)"),
     report_type: str | None = typer.Option(None, help="Report type"),
     language: str | None = typer.Option(None, help="Language"),
@@ -38,6 +41,9 @@ def analyze(
         doc_id=doc_id,
         filename=pdf_path.name,
         company=company,
+        ticker=ticker,
+        cik=cik,
+        filing_type=filing_type,
         period_end=parsed_period,
         report_type=report_type,
         language=language,
diff --git a/src/finance/facts.py b/src/finance/facts.py
index 994a7c2..e9ba7e3 100644
--- a/src/finance/facts.py
+++ b/src/finance/facts.py
@@ -4,10 +4,11 @@
 from collections.abc import Iterable
 from typing import Any
 
-from src.schemas.models import Correction, FinancialFact, FinancialStatement, SourceRef, StatementLineItem
+from src.schemas.models import Correction, DocumentMeta, FinancialFact, FinancialStatement, SourceRef, StatementLineItem
 
 
-def facts_from_statements(doc_id: str, statements: dict[str, FinancialStatement]) -> list[FinancialFact]:
+def facts_from_statements(doc_meta: str | DocumentMeta, statements: dict[str, FinancialStatement]) -> list[FinancialFact]:
+    meta = _coerce_doc_meta(doc_meta)
     facts: list[FinancialFact] = []
     seen: set[tuple[str, str]] = set()
 
@@ -17,7 +18,7 @@ def facts_from_statements(doc_id: str, statements: dict[str, FinancialStatement]
         for item in statement.line_items:
             if item.value_current is None and not item.source_refs:
                 continue
-            fact = _fact_from_line_item(doc_id, statement, item)
+            fact = _fact_from_line_item(meta, statement, item)
             facts.append(fact)
             seen.add((statement.statement_type, item.name_norm))
 
@@ -27,10 +28,11 @@ def facts_from_statements(doc_id: str, statements: dict[str, FinancialStatement]
                 continue
             facts.append(
                 _build_fact(
-                    doc_id=doc_id,
+                    doc_meta=meta,
                     statement=statement,
                     concept=concept,
                     label=concept,
+                    raw_label=concept,
                     value=value,
                     unit=None,
                     currency=None,
@@ -55,16 +57,17 @@ def apply_corrections(facts: Iterable[FinancialFact], corrections: Iterable[Corr
 
 
 def _fact_from_line_item(
-    doc_id: str,
+    doc_meta: DocumentMeta,
     statement: FinancialStatement,
     item: StatementLineItem,
 ) -> FinancialFact:
     confidence = _evidence_confidence(item.source_refs, statement.extraction_confidence)
     return _build_fact(
-        doc_id=doc_id,
+        doc_meta=doc_meta,
         statement=statement,
         concept=item.name_norm,
         label=item.name_raw,
+        raw_label=item.name_raw,
         value=item.value_current,
         unit=item.unit,
         currency=item.currency,
@@ -76,10 +79,11 @@ def _fact_from_line_item(
 
 def _build_fact(
     *,
-    doc_id: str,
+    doc_meta: DocumentMeta,
     statement: FinancialStatement,
     concept: str,
     label: str,
+    raw_label: str,
     value: float | None,
     unit: str | None,
     currency: str | None,
@@ -88,11 +92,16 @@ def _build_fact(
     metadata: dict[str, Any],
 ) -> FinancialFact:
     return FinancialFact(
-        fact_id=_fact_id(doc_id, statement.statement_type, concept, statement.period_end, label),
-        doc_id=doc_id,
+        fact_id=_fact_id(doc_meta.doc_id, statement.statement_type, concept, statement.period_end, raw_label),
+        doc_id=doc_meta.doc_id,
+        company=doc_meta.company,
+        ticker=doc_meta.ticker,
+        cik=doc_meta.cik,
+        filing_type=doc_meta.filing_type or doc_meta.report_type,
         statement_type=statement.statement_type,
         concept=concept,
         label=label,
+        raw_label=raw_label,
         value=value,
         unit=unit,
         scale=_infer_scale(unit),
@@ -107,6 +116,12 @@ def _build_fact(
     )
 
 
+def _coerce_doc_meta(doc_meta: str | DocumentMeta) -> DocumentMeta:
+    if isinstance(doc_meta, DocumentMeta):
+        return doc_meta
+    return DocumentMeta(doc_id=doc_meta, filename=f"{doc_meta}.pdf")
+
+
 def _fact_id(doc_id: str, statement_type: str, concept: str, period_end: object, label: str) -> str:
     raw = "|".join([doc_id, statement_type, concept, str(period_end or ""), label])
     return "fact_" + hashlib.sha1(raw.encode("utf-8")).hexdigest()[:16]
diff --git a/src/finance/validators.py b/src/finance/validators.py
index 8794835..3e7a7ec 100644
--- a/src/finance/validators.py
+++ b/src/finance/validators.py
@@ -1,9 +1,38 @@
 from __future__ import annotations
 
+from collections import defaultdict
 from typing import Any
 
 from src.finance.utils import find_line_item, find_total
-from src.schemas.models import FinancialStatement
+from src.schemas.models import DocumentMeta, FactValidationIssue, FactValidationResult, FinancialFact, FinancialStatement
+
+
+_CRITICAL_FACTS: dict[str, tuple[str, ...]] = {
+    "income": ("revenue", "gross_profit", "net_income"),
+    "balance": ("total_assets", "total_liabilities", "total_equity"),
+    "cashflow": ("operating_cash_flow", "capex"),
+}
+
+_CONCEPT_CANONICAL = {
+    "operating_cf": "operating_cash_flow",
+    "operating_cash_flow": "operating_cash_flow",
+}
+
+
+def _canonical_concept(concept: str) -> str:
+    return _CONCEPT_CANONICAL.get(concept, concept)
+
+
+def _fact_value(facts: list[FinancialFact], statement_type: str, concept: str) -> tuple[float | None, list[FinancialFact]]:
+    matched = [
+        fact
+        for fact in facts
+        if fact.statement_type == statement_type and _canonical_concept(fact.concept) == _canonical_concept(concept)
+    ]
+    for fact in matched:
+        if fact.value is not None:
+            return fact.value, matched
+    return None, matched
 
 
 def _unit_consistency(statement: FinancialStatement) -> list[str]:
@@ -86,3 +115,229 @@ def validate_statements(statements: dict[str, FinancialStatement], tolerance: fl
                 rev_growth = (rev_item.value_current - rev_item.value_prior) / max(abs(rev_item.value_prior), 1.0)
                 results["metrics"]["ar_growth_vs_rev_growth"] = ar_growth - rev_growth
     return results
+
+
+def validate_facts(
+    doc_meta: DocumentMeta,
+    facts: list[FinancialFact],
+    statements: dict[str, FinancialStatement] | None = None,
+    tolerance: float = 0.02,
+) -> FactValidationResult:
+    result = FactValidationResult()
+    statements = statements or {}
+    result.metrics["fact_count"] = len(facts)
+    result.metrics["facts_with_source_refs"] = sum(1 for fact in facts if fact.source_refs)
+    result.checks["facts_present"] = bool(facts)
+
+    if not facts:
+        result.issues.append(
+            FactValidationIssue(
+                code="facts_missing",
+                severity="high",
+                message=f"No canonical facts were produced for document {doc_meta.doc_id}.",
+            )
+        )
+        return result
+
+    available_statement_types = {
+        statement_type
+        for statement_type in set(statements) | {fact.statement_type for fact in facts}
+        if statement_type in _CRITICAL_FACTS
+    }
+
+    missing_critical = 0
+    for statement_type in sorted(available_statement_types):
+        expected = _CRITICAL_FACTS[statement_type]
+        missing = [
+            concept
+            for concept in expected
+            if not any(
+                _canonical_concept(fact.concept) == _canonical_concept(concept)
+                for fact in facts
+                if fact.statement_type == statement_type
+            )
+        ]
+        if missing:
+            missing_critical += len(missing)
+            result.issues.append(
+                FactValidationIssue(
+                    code="missing_critical_facts",
+                    severity="high",
+                    message=f"Missing critical facts for {statement_type}: {', '.join(missing)}.",
+                    concepts=missing,
+                    statement_type=statement_type,
+                )
+            )
+    result.metrics["missing_critical_fact_count"] = missing_critical
+    result.checks["missing_critical_facts"] = missing_critical == 0
+
+    duplicates: dict[tuple[str, str, object, object], list[FinancialFact]] = defaultdict(list)
+    for fact in facts:
+        duplicates[(fact.statement_type, _canonical_concept(fact.concept), fact.period_start, fact.period_end)].append(fact)
+    duplicate_groups = [group for group in duplicates.values() if len(group) > 1]
+    result.metrics["duplicate_fact_groups"] = len(duplicate_groups)
+    result.checks["duplicate_concepts"] = len(duplicate_groups) == 0
+    for group in duplicate_groups:
+        result.issues.append(
+            FactValidationIssue(
+                code="duplicate_concepts",
+                severity="medium",
+                message=(
+                    f"Duplicate canonical facts detected for {group[0].statement_type}:{_canonical_concept(group[0].concept)}."
+                ),
+                fact_ids=[fact.fact_id for fact in group],
+                concepts=[group[0].concept],
+                statement_type=group[0].statement_type,
+            )
+        )
+
+    expected_period_type = {"balance": "instant", "income": "duration", "cashflow": "duration"}
+    period_issue_count = 0
+    for statement_type in sorted(available_statement_types):
+        facts_for_type = [fact for fact in facts if fact.statement_type == statement_type]
+        non_null_period_ends = {fact.period_end for fact in facts_for_type if fact.period_end is not None}
+        if len(non_null_period_ends) > 1:
+            period_issue_count += 1
+            result.issues.append(
+                FactValidationIssue(
+                    code="period_inconsistency",
+                    severity="high",
+                    message=f"Facts for {statement_type} use multiple period_end values.",
+                    fact_ids=[fact.fact_id for fact in facts_for_type],
+                    statement_type=statement_type,
+                )
+            )
+
+        wrong_period_type = [
+            fact
+            for fact in facts_for_type
+            if fact.period_type != "unknown" and fact.period_type != expected_period_type.get(statement_type)
+        ]
+        if wrong_period_type:
+            period_issue_count += 1
+            result.issues.append(
+                FactValidationIssue(
+                    code="period_type_inconsistency",
+                    severity="high",
+                    message=f"Facts for {statement_type} use an unexpected period_type.",
+                    fact_ids=[fact.fact_id for fact in wrong_period_type],
+                    statement_type=statement_type,
+                )
+            )
+    result.metrics["period_issue_count"] = period_issue_count
+    result.checks["period_consistency"] = period_issue_count == 0
+
+    currency_scale_issue_count = 0
+    for statement_type in sorted(available_statement_types):
+        facts_for_type = [fact for fact in facts if fact.statement_type == statement_type]
+        currencies = {fact.currency for fact in facts_for_type if fact.currency}
+        scales = {fact.scale for fact in facts_for_type if fact.scale is not None}
+        if len(currencies) > 1:
+            currency_scale_issue_count += 1
+            result.issues.append(
+                FactValidationIssue(
+                    code="currency_inconsistency",
+                    severity="medium",
+                    message=f"Facts for {statement_type} use multiple currencies: {', '.join(sorted(currencies))}.",
+                    fact_ids=[fact.fact_id for fact in facts_for_type if fact.currency],
+                    statement_type=statement_type,
+                )
+            )
+        if len(scales) > 1:
+            currency_scale_issue_count += 1
+            result.issues.append(
+                FactValidationIssue(
+                    code="scale_inconsistency",
+                    severity="medium",
+                    message=f"Facts for {statement_type} use multiple scales.",
+                    fact_ids=[fact.fact_id for fact in facts_for_type if fact.scale is not None],
+                    statement_type=statement_type,
+                )
+            )
+    result.metrics["currency_scale_issue_count"] = currency_scale_issue_count
+    result.checks["scale_currency_consistency"] = currency_scale_issue_count == 0
+
+    missing_ref_count = 0
+    for statement_type, concepts in _CRITICAL_FACTS.items():
+        for concept in concepts:
+            _, matched = _fact_value(facts, statement_type, concept)
+            if matched and not any(fact.source_refs for fact in matched):
+                missing_ref_count += 1
+                result.issues.append(
+                    FactValidationIssue(
+                        code="missing_source_refs",
+                        severity="high",
+                        message=f"Critical fact {statement_type}:{concept} is missing source references.",
+                        fact_ids=[fact.fact_id for fact in matched],
+                        concepts=[concept],
+                        statement_type=statement_type,
+                    )
+                )
+    result.metrics["critical_facts_missing_source_refs"] = missing_ref_count
+    result.checks["critical_facts_have_source_refs"] = missing_ref_count == 0
+
+    if "balance" in available_statement_types:
+        total_assets, asset_facts = _fact_value(facts, "balance", "total_assets")
+        total_liabilities, liability_facts = _fact_value(facts, "balance", "total_liabilities")
+        total_equity, equity_facts = _fact_value(facts, "balance", "total_equity")
+        if total_assets is None or total_liabilities is None or total_equity is None:
+            result.checks["balance_equation"] = False
+            result.issues.append(
+                FactValidationIssue(
+                    code="balance_equation_missing_facts",
+                    severity="high",
+                    message="Balance equation validation is missing one or more required facts.",
+                    fact_ids=[fact.fact_id for fact in asset_facts + liability_facts + equity_facts],
+                    statement_type="balance",
+                )
+            )
+        else:
+            diff_ratio = abs(total_assets - (total_liabilities + total_equity)) / max(abs(total_assets), 1.0)
+            result.metrics["balance_equation_diff_ratio"] = diff_ratio
+            result.checks["balance_equation"] = diff_ratio < tolerance
+            if diff_ratio >= tolerance:
+                result.issues.append(
+                    FactValidationIssue(
+                        code="balance_equation_failed",
+                        severity="high",
+                        message="Balance facts do not satisfy assets = liabilities + equity.",
+                        fact_ids=[fact.fact_id for fact in asset_facts + liability_facts + equity_facts],
+                        concepts=["total_assets", "total_liabilities", "total_equity"],
+                        statement_type="balance",
+                        metadata={"diff_ratio": diff_ratio},
+                    )
+                )
+
+    if "cashflow" in available_statement_types:
+        operating_cash_flow, operating_facts = _fact_value(facts, "cashflow", "operating_cash_flow")
+        capex, capex_facts = _fact_value(facts, "cashflow", "capex")
+        if operating_cash_flow is None or capex is None:
+            result.checks["cashflow_reconciliation"] = False
+            result.issues.append(
+                FactValidationIssue(
+                    code="cashflow_reconciliation_missing_facts",
+                    severity="medium",
+                    message="Cashflow reconciliation is missing operating cash flow or capex.",
+                    fact_ids=[fact.fact_id for fact in operating_facts + capex_facts],
+                    concepts=["operating_cash_flow", "capex"],
+                    statement_type="cashflow",
+                )
+            )
+        else:
+            free_cash_flow = operating_cash_flow + capex
+            result.metrics["free_cash_flow"] = free_cash_flow
+            result.checks["cashflow_reconciliation"] = capex <= 0
+            if capex > 0:
+                result.issues.append(
+                    FactValidationIssue(
+                        code="cashflow_reconciliation_failed",
+                        severity="medium",
+                        message="Capex is positive; expected a cash outflow value for reconciliation.",
+                        fact_ids=[fact.fact_id for fact in operating_facts + capex_facts],
+                        concepts=["operating_cash_flow", "capex"],
+                        statement_type="cashflow",
+                        metadata={"free_cash_flow": free_cash_flow},
+                    )
+                )
+
+    return result
diff --git a/src/schemas/models.py b/src/schemas/models.py
index db06576..77b6a58 100644
--- a/src/schemas/models.py
+++ b/src/schemas/models.py
@@ -38,6 +38,9 @@ class DocumentMeta(BaseModel):
     doc_id: str
     filename: str
     company: str | None = None
+    ticker: str | None = None
+    cik: str | None = None
+    filing_type: str | None = None
     period_end: date | None = None
     report_type: str | None = None
     language: str | None = None
@@ -120,9 +123,14 @@ class FinancialFact(BaseModel):
 
     fact_id: str
     doc_id: str
+    company: str | None = None
+    ticker: str | None = None
+    cik: str | None = None
+    filing_type: str | None = None
     statement_type: Literal["income", "balance", "cashflow", "note", "other"]
     concept: str
     label: str
+    raw_label: str | None = None
     value: float | None = None
     unit: str | None = None
     scale: float | None = None
@@ -167,6 +175,26 @@ class Correction(BaseModel):
     created_at: datetime = Field(default_factory=_utc_now)
 
 
+class FactValidationIssue(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    code: str
+    severity: Literal["low", "medium", "high"] = "medium"
+    message: str
+    fact_ids: list[str] = Field(default_factory=list)
+    concepts: list[str] = Field(default_factory=list)
+    statement_type: str | None = None
+    metadata: dict[str, Any] = Field(default_factory=dict)
+
+
+class FactValidationResult(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+
+    issues: list[FactValidationIssue] = Field(default_factory=list)
+    checks: dict[str, bool | float | int | str] = Field(default_factory=dict)
+    metrics: dict[str, float | int | str] = Field(default_factory=dict)
+
+
 class KeyNote(BaseModel):
     model_config = ConfigDict(extra="forbid")
 
diff --git a/tests/test_finance_facts.py b/tests/test_finance_facts.py
index 3d914c1..041d289 100644
--- a/tests/test_finance_facts.py
+++ b/tests/test_finance_facts.py
@@ -3,7 +3,7 @@
 from datetime import date
 
 from src.finance.facts import apply_corrections, facts_from_statements
-from src.schemas.models import Correction, FinancialStatement, SourceRef, StatementLineItem
+from src.schemas.models import Correction, DocumentMeta, FinancialStatement, SourceRef, StatementLineItem
 
 
 def test_facts_from_statements_preserves_line_item_evidence() -> None:
@@ -35,13 +35,28 @@ def test_facts_from_statements_preserves_line_item_evidence() -> None:
         extraction_confidence=0.7,
     )
 
-    facts = facts_from_statements("doc-1", {"income": statement})
+    facts = facts_from_statements(
+        DocumentMeta(
+            doc_id="doc-1",
+            filename="demo.pdf",
+            company="ACME Corp",
+            ticker="ACME",
+            cik="00001234",
+            filing_type="10-K",
+        ),
+        {"income": statement},
+    )
 
     assert len(facts) == 1
     fact = facts[0]
     assert fact.doc_id == "doc-1"
+    assert fact.company == "ACME Corp"
+    assert fact.ticker == "ACME"
+    assert fact.cik == "00001234"
+    assert fact.filing_type == "10-K"
     assert fact.statement_type == "income"
     assert fact.concept == "revenue"
+    assert fact.raw_label == "Revenue"
     assert fact.value == 100.0
     assert fact.period_type == "duration"
     assert fact.scale == 1_000_000.0
@@ -85,4 +100,18 @@ def test_apply_corrections_updates_allowed_fact_field() -> None:
     corrected = apply_corrections([fact], [correction])
 
     assert corrected[0].value == 125.0
-    assert fact.value == 100.0
\ No newline at end of file
+    assert fact.value == 100.0
+
+
+def test_facts_from_statements_uses_report_type_as_filing_type_fallback() -> None:
+    statement = FinancialStatement(
+        statement_type="balance",
+        line_items=[StatementLineItem(name_raw="Total assets", name_norm="total_assets", value_current=500.0)],
+    )
+
+    facts = facts_from_statements(
+        DocumentMeta(doc_id="doc-2", filename="demo.pdf", report_type="annual-report"),
+        {"balance": statement},
+    )
+
+    assert facts[0].filing_type == "annual-report"
\ No newline at end of file
diff --git a/tests/test_routes_web.py b/tests/test_routes_web.py
index 083e5d8..3d1bdb5 100644
--- a/tests/test_routes_web.py
+++ b/tests/test_routes_web.py
@@ -255,12 +255,50 @@ def test_facts_endpoint(client: TestClient, tmp_path: Path) -> None:
     assert body["data"][0]["concept"] == "revenue"
 
 
+def test_fact_validation_endpoint(client: TestClient, tmp_path: Path) -> None:
+    _make_doc(tmp_path / "data", "abc123")
+    validation_path = tmp_path / "data" / "abc123" / "extracted" / "fact_validation.json"
+    validation_path.write_text(
+        json.dumps(
+            {
+                "issues": [
+                    {
+                        "code": "missing_critical_facts",
+                        "severity": "high",
+                        "message": "Missing critical facts for income: gross_profit.",
+                        "fact_ids": [],
+                        "concepts": ["gross_profit"],
+                        "statement_type": "income",
+                        "metadata": {},
+                    }
+                ],
+                "checks": {"missing_critical_facts": False},
+                "metrics": {"missing_critical_fact_count": 1},
+            }
+        ),
+        encoding="utf-8",
+    )
+
+    r = client.get("/v1/documents/abc123/fact-validation")
+
+    assert r.status_code == 200
+    body = r.json()
+    assert body["ok"] is True
+    assert body["data"]["issues"][0]["code"] == "missing_critical_facts"
+
+
 def test_facts_missing_returns_404(client: TestClient, tmp_path: Path) -> None:
     _make_doc(tmp_path / "data", "abc123")
     r = client.get("/v1/documents/abc123/facts")
     assert r.status_code == 404
 
 
+def test_fact_validation_missing_returns_404(client: TestClient, tmp_path: Path) -> None:
+    _make_doc(tmp_path / "data", "abc123")
+    r = client.get("/v1/documents/abc123/fact-validation")
+    assert r.status_code == 404
+
+
 def test_tables_missing_returns_404(client: TestClient, tmp_path: Path) -> None:
     _make_doc(tmp_path / "data", "abc123", with_tables=False)
     r = client.get("/v1/documents/abc123/tables")
diff --git a/tests/test_validators_extended.py b/tests/test_validators_extended.py
index 33b3a57..192cfa8 100644
--- a/tests/test_validators_extended.py
+++ b/tests/test_validators_extended.py
@@ -1,8 +1,8 @@
 """Tests for validators — balance equation failure, unit mismatch, gross profit check."""
 from __future__ import annotations
 
-from src.finance.validators import validate_statements
-from src.schemas.models import FinancialStatement, StatementLineItem
+from src.finance.validators import validate_facts, validate_statements
+from src.schemas.models import DocumentMeta, FinancialFact, FinancialStatement, SourceRef, StatementLineItem
 
 
 def test_balance_equation_failure():
@@ -80,3 +80,101 @@ def test_profit_to_cfo_ratio_negative_income():
     ratio = results["metrics"].get("profit_to_cfo_ratio")
     assert ratio is not None
     assert abs(ratio - 10.0 / 20.0) < 1e-9
+
+
+def test_validate_facts_flags_missing_critical_fact_and_missing_source_ref():
+    facts = [
+        FinancialFact(
+            fact_id="fact-1",
+            doc_id="doc-1",
+            statement_type="income",
+            concept="revenue",
+            label="Revenue",
+            raw_label="Revenue",
+            value=100.0,
+            period_type="duration",
+            confidence=0.8,
+            source_refs=[SourceRef(ref_type="page_text", page=1, confidence=0.8)],
+        ),
+        FinancialFact(
+            fact_id="fact-2",
+            doc_id="doc-1",
+            statement_type="income",
+            concept="net_income",
+            label="Net income",
+            raw_label="Net income",
+            value=20.0,
+            period_type="duration",
+            confidence=0.8,
+            source_refs=[],
+        ),
+    ]
+
+    result = validate_facts(DocumentMeta(doc_id="doc-1", filename="demo.pdf"), facts, {"income": FinancialStatement(statement_type="income")})
+
+    assert result.checks["missing_critical_facts"] is False
+    assert result.checks["critical_facts_have_source_refs"] is False
+    assert any(issue.code == "missing_critical_facts" for issue in result.issues)
+    assert any(issue.code == "missing_source_refs" for issue in result.issues)
+
+
+def test_validate_facts_flags_duplicates_and_balance_equation_failure():
+    facts = [
+        FinancialFact(
+            fact_id="fact-assets-a",
+            doc_id="doc-1",
+            statement_type="balance",
+            concept="total_assets",
+            label="Total assets",
+            raw_label="Total assets",
+            value=100.0,
+            period_type="instant",
+            period_end=None,
+            confidence=0.8,
+            source_refs=[SourceRef(ref_type="page_text", page=1, confidence=0.8)],
+        ),
+        FinancialFact(
+            fact_id="fact-assets-b",
+            doc_id="doc-1",
+            statement_type="balance",
+            concept="total_assets",
+            label="Assets",
+            raw_label="Assets",
+            value=100.0,
+            period_type="instant",
+            period_end=None,
+            confidence=0.8,
+            source_refs=[SourceRef(ref_type="page_text", page=1, confidence=0.8)],
+        ),
+        FinancialFact(
+            fact_id="fact-liabilities",
+            doc_id="doc-1",
+            statement_type="balance",
+            concept="total_liabilities",
+            label="Total liabilities",
+            raw_label="Total liabilities",
+            value=70.0,
+            period_type="instant",
+            confidence=0.8,
+            source_refs=[SourceRef(ref_type="page_text", page=1, confidence=0.8)],
+        ),
+        FinancialFact(
+            fact_id="fact-equity",
+            doc_id="doc-1",
+            statement_type="balance",
+            concept="total_equity",
+            label="Total equity",
+            raw_label="Total equity",
+            value=20.0,
+            period_type="instant",
+            confidence=0.8,
+            source_refs=[SourceRef(ref_type="page_text", page=1, confidence=0.8)],
+        ),
+    ]
+
+    result = validate_facts(DocumentMeta(doc_id="doc-1", filename="demo.pdf"), facts, {"balance": FinancialStatement(statement_type="balance")})
+
+    assert result.checks["duplicate_concepts"] is False
+    assert result.checks["balance_equation"] is False
+    assert any(issue.code == "duplicate_concepts" for issue in result.issues)
+    assert any(issue.code == "balance_equation_failed" for issue in result.issues)
diff --git a/web/src/api/docs.ts b/web/src/api/docs.ts
index c158b59..e4ce40a 100644
--- a/web/src/api/docs.ts
+++ b/web/src/api/docs.ts
@@ -9,6 +9,7 @@ import type {
   ExtractedTable,
   FinancialFact,
   FinancialStatements,
+  FactValidationResult,
   KeyNote,
   MetricItem,
   PdfOperationResult,
@@ -185,6 +186,9 @@ export const docsApi = {
   facts(docId: string) {
     return unwrap<FinancialFact[]>(http.get(`/v1/documents/${docId}/facts`))
   },
+  factValidation(docId: string) {
+    return unwrap<FactValidationResult>(http.get(`/v1/documents/${docId}/fact-validation`))
+  },
   riskSignals(docId: string) {
     return unwrap<any>(http.get(`/v1/documents/${docId}/risk-signals`)).then(normalizeSignals)
   },
diff --git a/web/src/api/types.ts b/web/src/api/types.ts
index 02235a4..d1f30ca 100644
--- a/web/src/api/types.ts
+++ b/web/src/api/types.ts
@@ -33,9 +33,14 @@ export interface FinancialStatements {
 export interface FinancialFact {
   fact_id: string
   doc_id: string
+  company?: string | null
+  ticker?: string | null
+  cik?: string | null
+  filing_type?: string | null
   statement_type: 'income' | 'balance' | 'cashflow' | 'note' | 'other' | string
   concept: string
   label: string
+  raw_label?: string | null
   value?: number | null
   unit?: string | null
   scale?: number | null
@@ -49,6 +54,22 @@ export interface FinancialFact {
   metadata?: Record<string, unknown>
 }
 
+export interface FactValidationIssue {
+  code: string
+  severity: 'low' | 'medium' | 'high' | string
+  message: string
+  fact_ids?: string[]
+  concepts?: string[]
+  statement_type?: string | null
+  metadata?: Record<string, unknown>
+}
+
+export interface FactValidationResult {
+  issues: FactValidationIssue[]
+  checks: Record<string, boolean | number | string>
+  metrics: Record<string, number | string>
+}
+
 export interface RiskSignal {
   id: string
   category: string
@@ -69,6 +90,9 @@ export interface DocumentMeta {
   doc_id: string
   filename: string
   company?: string | null
+  ticker?: string | null
+  cik?: string | null
+  filing_type?: string | null
   period_end?: string | null
   report_type?: string | null
   language?: string | null