diff --git a/changelogs/v0.5.2/FiifiB_2026-06-25.log b/changelogs/v0.5.2/FiifiB_2026-06-25.log
new file mode 100644
index 00000000..8a23f7f6
--- /dev/null
+++ b/changelogs/v0.5.2/FiifiB_2026-06-25.log
@@ -0,0 +1,58 @@
+# 2026-06-25 — feat(ontology): PGE Evaluator stage for owl-generator
+
+## Context
+
+The owl-generator agent had a single-shot generation + a pitfall-tool fix loop,
+but no deterministic Evaluator stage — so structural defects (orphan classes,
+dangling domain/range, naming violations, duplicate classes) could survive into
+the delivered ontology. This change turns owl-generation into a real
+Planner→Generator→Evaluator (PGE) loop: after the pitfall loop settles, a
+deterministic Stage-1 evaluator scores the ontology against the source metadata
+and feeds concrete retry-hints back to the generator, bounded by a hard cap.
+
+The Evaluator reuses a small, usecase-agnostic ontology-metrics module
+(`agents.pge_eval.ontology_metrics`) — gold-free, computed purely from the
+generated ontology + source schema. Only the ontology slice of the metrics
+package is introduced here; the full scorecard/CLI lands separately.
+
+## Changes
+
+1. `src/agents/agent_owl_generator/engine.py`
+   - Add `MAX_OWL_EVAL_ROUNDS` (bounded Evaluator retry cap) and
+     `_evaluate_ontology_stage()` — parses the Turtle, runs the deterministic
+     Tier-1 ontology checks, and returns a retry-hint string on hard defects
+     (orphan / dangling domain-range / naming / duplicate). Fails open: any
+     parse/dep error returns `None` so a check failure never blocks delivery.
+   - Wire the Evaluator into the agent loop after the pitfall loop; only retry
+     when an iteration remains, so a usable ontology is never discarded by
+     exhausting `MAX_ITERATIONS`.
+   - Raise `max_tokens` to `MAX_OUTPUT_TOKENS = 16000` so exhaustive attribute
+     coverage isn't silently truncated past the old 4096 ceiling.
+   - Strengthen the system prompt: `# ATTRIBUTE COVERAGE` section + a
+     `get_table_detail`-per-table workflow step driving exhaustive (not curated)
+     datatype-property coverage.
+2. `src/agents/pge_eval/__init__.py` — new package (minimal root; importers
+   depend on the concrete submodule to avoid coupling to later modules).
+3. `src/agents/pge_eval/normalize.py` — shared name/metadata/ontology
+   normalization primitives (stdlib-only).
+4. `src/agents/pge_eval/ontology_metrics.py` — `evaluate_ontology()`:
+   deterministic Stage-1 checks + footprint coverage, no stored reference.
+5. Tests: `tests/units/pge_eval/{__init__,_fixtures}.py`,
+   `test_ontology_metrics.py`, `test_owl_evaluator_stage.py`.
+
+## Modified / added files
+
+- M src/agents/agent_owl_generator/engine.py
+- A src/agents/pge_eval/__init__.py
+- A src/agents/pge_eval/normalize.py
+- A src/agents/pge_eval/ontology_metrics.py
+- A tests/units/pge_eval/__init__.py
+- A tests/units/pge_eval/_fixtures.py
+- A tests/units/pge_eval/test_ontology_metrics.py
+- A tests/units/pge_eval/test_owl_evaluator_stage.py
+
+## Tests
+
+`uv run pytest tests/units/pge_eval/test_ontology_metrics.py
+tests/units/pge_eval/test_owl_evaluator_stage.py
+tests/units/ontology/test_owl_generator.py -q` → **39 passed**.
diff --git a/src/agents/agent_owl_generator/engine.py b/src/agents/agent_owl_generator/engine.py
index 9d755d48..c16f4ad1 100644
--- a/src/agents/agent_owl_generator/engine.py
+++ b/src/agents/agent_owl_generator/engine.py
@@ -37,6 +37,18 @@
 
 MAX_ITERATIONS = 10
 LLM_TIMEOUT = 180
+# Exhaustive per-class datatype-property coverage (see # ATTRIBUTE COVERAGE in
+# the system prompt) makes the Turtle output large — a large domain ontology
+# with dozens of classes and 50+ datatype properties runs well past the old 4096
+# ceiling, which silently truncated the final statement and broke parsing.
+# Claude Opus supports large completions; 16k tokens fits an exhaustive
+# domain ontology with headroom.
+MAX_OUTPUT_TOKENS = 16000
+
+# Bounded PGE retry cap for the Evaluator stage (§3.5): how many times the
+# deterministic Stage-1 ontology checks may feed retry_hints back into
+# generation before owl delivery proceeds regardless.
+MAX_OWL_EVAL_ROUNDS = 2
 
 _TRACE_NAME = "owl_generator"
 
@@ -95,9 +107,12 @@ def _load_pitfall_rules() -> str:
 
 # WORKFLOW
 1. Call get_metadata to understand the database schema.
-2. Call list_documents to discover available documents.
-3. Read relevant documents with read_document.
-4. Output ONLY the final Turtle ontology as plain text (starting with @prefix).
+2. Call get_table_detail on EVERY table you intend to map a class to — get_metadata
+   truncates wide tables at 80 columns, and you must see the FULL column list to give
+   each class exhaustive attribute coverage (see # ATTRIBUTE COVERAGE).
+3. Call list_documents to discover available documents.
+4. Read relevant documents with read_document.
+5. Output ONLY the final Turtle ontology as plain text (starting with @prefix).
 
 # NAMING RULES (CRITICAL – NO EXCEPTIONS)
 • Classes: PascalCase (Customer, SalesOrder)
@@ -127,6 +142,34 @@ def _load_pitfall_rules() -> str:
 • For EVERY DatatypeProperty you MUST declare rdfs:domain on the property itself
   (do not rely on owl:Restriction alone — the platform reads attributes from rdfs:domain)
 
+# ATTRIBUTE COVERAGE (CRITICAL — exhaustive, NOT curated)
+The downstream mapping pipeline can only bind a SQL column to a class when that
+column has a matching owl:DatatypeProperty with rdfs:domain on the class. A class
+with few datatype properties produces an ID+Label-only entity that is USELESS for
+analytics. So model attributes EXHAUSTIVELY, not minimally:
+• For EVERY class, emit a DatatypeProperty for EVERY meaningful source column that
+  describes an instance of that class — across ALL tables that realise the class.
+  A single class is often realised by several source tables (e.g. one per source
+  system, region, or tenant) that each hold the same real-world entity in a local
+  schema; UNION their columns mentally and cover the full set. Use get_table_detail
+  on each covering table to see every column.
+• "Meaningful" = a genuine attribute of the entity: dates, measurements, codes,
+  scores, names, statuses, flags, free-text notes. EXCLUDE ONLY: surrogate/auto-
+  increment row keys with no analytical value, audit columns (created_at, updated_by,
+  etl_*, _ingest_*), and the foreign-key columns that ObjectProperty relationships
+  already carry.
+• When two sources expose the SAME attribute under different column names
+  (e.g. total_amount vs TOTAL_AMT; status vs STATUS_CODE), emit ONE datatype
+  property — do NOT emit a per-source duplicate. The mapping layer reconciles the
+  source columns.
+• Name datatype properties in lowerCamelCase derived from business meaning
+  (order_date → orderDate, TOTAL_AMT → totalAmount).
+  Use ONLY [a-z][A-Za-z0-9]* — never underscores, hyphens, or backslash escapes.
+• The "at least 2 datatype properties" floor in the guidelines is a MINIMUM, not a
+  target. Rich, real-world entities (a transaction, an encounter, an event, a core
+  business object) typically warrant 6–11 datatype properties. Aim for full column
+  coverage, not a tidy subset.
+
 # RELATIONSHIP RULES
 • NEVER create bidirectional relationships.
 • Between any two classes A and B create at most ONE ObjectProperty.
@@ -160,10 +203,12 @@ def _load_pitfall_rules() -> str:
 
 ## 2. Class and property design rules
 For each **class** you create:[1][2][3][4]
-1. Provide:  
-   - A short, clear natural-language definition (1–2 sentences).  
-   - At least 1 object property (unless the class is explicitly abstract).  
-   - At least 2 datatype properties, when meaningful in the domain.  
+1. Provide:
+   - A short, clear natural-language definition (1–2 sentences).
+   - At least 1 object property (unless the class is explicitly abstract).
+   - Datatype properties covering EVERY meaningful source column for the class
+     (see "# ATTRIBUTE COVERAGE" in the system prompt — exhaustive, not curated;
+     2 is a floor, full column coverage is the goal).
 2. Naming conventions:  
    - Classes: UpperCamelCase (e.g., `CustomerOrder`).  
    - Object properties: lowerCamelCase verbs or verb-like phrases (e.g., `placesOrder`).  
@@ -241,6 +286,80 @@ def _parse_pitfall_tool_result(tool_result_json: str) -> Optional[Dict]:
         return None
 
 
+# Stage-1 absolute (Tier-1) ontology defects that the Evaluator forces a
+# retry on.  Coverage ratios are computed and logged but are advisory at the
+# generation stage (they are Tier-2 in the scorecard), so they do not by
+# themselves trigger a regeneration — only hard structural defects do.
+_EVAL_ABSOLUTE_CHECKS = (
+    "orphan_class_count",
+    "dangling_domain_range_count",
+    "naming_violation_count",
+    "duplicate_class_count",
+)
+
+
+def _evaluate_ontology_stage(
+    turtle_text: str, metadata: dict, iteration: int
+) -> Optional[str]:
+    """Run the Stage-1 deterministic ontology checks (§3.2) on *turtle_text*.
+
+    Parses the Turtle into the registry shape, runs the shared intrinsic
+    checks, and returns a concrete ``retry_hint`` feedback string when any
+    Tier-1 absolute defect (orphan class, dangling domain/range, naming
+    violation, duplicate class) is present — turning owl-gen into a real
+    PGE loop.  Returns ``None`` when the ontology is structurally clean.
+
+    Fails open: any parse/dep error returns ``None`` so a check failure
+    never blocks OWL delivery (mirrors the pitfall-tool check).
+    """
+    try:
+        from back.core.w3c.owl.OntologyParser import OntologyParser
+        from back.objects.ontology.Ontology import Ontology
+        from agents.pge_eval.ontology_metrics import evaluate_ontology
+
+        # The model sometimes prepends a prose sentence or wraps the Turtle in
+        # a markdown fence; strip that the same way the downstream registry
+        # does, so the Evaluator parses real output instead of skipping.
+        turtle_text = Ontology.clean_owl_output(turtle_text)
+        parser = OntologyParser(turtle_text)
+        ontology = {
+            "classes": parser.get_classes(),
+            "properties": parser.get_properties(),
+        }
+        metrics, issues, _footprint = evaluate_ontology(ontology, metadata or {})
+        logger.info(
+            "Iteration %d: ontology evaluator — metrics=%s",
+            iteration,
+            metrics,
+        )
+
+        absolute_issues = [
+            i for i in issues if i.get("check") in _EVAL_ABSOLUTE_CHECKS
+        ]
+        if not absolute_issues:
+            logger.info(
+                "Iteration %d: ontology evaluator — no Tier-1 defects", iteration
+            )
+            return None
+
+        lines = [
+            "The ontology you produced has structural defects. Fix ALL of them "
+            "and output ONLY the corrected Turtle (no markdown, no comments, "
+            "starting with @prefix declarations):\n"
+        ]
+        # Cap feedback to keep the prompt bounded.
+        for issue in absolute_issues[:12]:
+            lines.append(f"  • {issue['hint']}")
+        return "\n".join(lines)
+    except Exception as exc:  # noqa: BLE001
+        logger.warning(
+            "Iteration %d: ontology evaluator skipped due to error: %s",
+            iteration,
+            exc,
+        )
+        return None
+
+
 def _build_user_prompt(
     guidelines: str,
     options: dict,
@@ -443,6 +562,7 @@ def notify(msg: str):
     # ------------------------------------------------------------------
     tools_supported = True
     _owl_fix_rounds = 0   # pitfall-fix rounds consumed so far
+    _owl_eval_rounds = 0  # Evaluator (Stage-1 PGE) retry rounds consumed
 
     for iteration in range(MAX_ITERATIONS):
         logger.info(
@@ -477,7 +597,7 @@ def notify(msg: str):
                 endpoint_name,
                 messages,
                 tools=send_tools,
-                max_tokens=4096,
+                max_tokens=MAX_OUTPUT_TOKENS,
                 temperature=0.1,
                 timeout=LLM_TIMEOUT,
                 trace_name=_TRACE_NAME,
@@ -509,7 +629,7 @@ def notify(msg: str):
                         endpoint_name,
                         messages,
                         tools=None,
-                        max_tokens=4096,
+                        max_tokens=MAX_OUTPUT_TOKENS,
                         temperature=0.1,
                         timeout=LLM_TIMEOUT,
                         trace_name=_TRACE_NAME,
@@ -749,6 +869,41 @@ def notify(msg: str):
                         _owl_fix_rounds, max_fix_rounds,
                     )
 
+            # --------------------------------------------------------------
+            # Evaluator stage (PGE loop) — after the pitfall-tool loop is
+            # clean/maxed, run the Stage-1 deterministic ontology checks (§3.2).
+            # On a Tier-1 structural defect, feed concrete retry_hints back to
+            # the generator, bounded by MAX_OWL_EVAL_ROUNDS. Only retry when
+            # there's another iteration left, so a usable ontology is never
+            # discarded by exhausting MAX_ITERATIONS.
+            # --------------------------------------------------------------
+            eval_feedback = _evaluate_ontology_stage(content, ctx.metadata, iteration + 1)
+            if (
+                eval_feedback
+                and _owl_eval_rounds < MAX_OWL_EVAL_ROUNDS
+                and iteration < MAX_ITERATIONS - 1
+            ):
+                _owl_eval_rounds += 1
+                notify(
+                    f"Ontology defects found — eval round "
+                    f"{_owl_eval_rounds}/{MAX_OWL_EVAL_ROUNDS}…"
+                )
+                result.steps.append(
+                    AgentStep(
+                        step_type="evaluator",
+                        content=eval_feedback[:200],
+                        duration_ms=0,
+                    )
+                )
+                messages.append({"role": "assistant", "content": content})
+                messages.append({"role": "user", "content": eval_feedback})
+                logger.info(
+                    "Iteration %d: ontology evaluator found defects — eval round %d",
+                    iteration + 1,
+                    _owl_eval_rounds,
+                )
+                continue   # next iteration will produce corrected OWL
+
             # ── Accept this text as the final OWL ────────────────────────────
             result.success = True
             result.owl_content = content
diff --git a/src/agents/pge_eval/__init__.py b/src/agents/pge_eval/__init__.py
new file mode 100644
index 00000000..5a2089d9
--- /dev/null
+++ b/src/agents/pge_eval/__init__.py
@@ -0,0 +1,16 @@
+"""OntoBricks PGE intrinsic-evaluation primitives.
+
+This package holds usecase-agnostic, gold-free structural metrics for the PGE
+pipeline.  This PR introduces only the **ontology** slice consumed by the
+owl-generator Evaluator stage:
+
+* :func:`agents.pge_eval.ontology_metrics.evaluate_ontology` — Stage-1
+  deterministic ontology checks (orphan classes, dangling domain/range,
+  naming, duplicates, footprint coverage), computed purely from the generated
+  ontology + source metadata (no stored reference answer).
+
+The full scorecard (mapping metrics, gate tiers, baseline regression, LLM
+judge, CLI) lands in a separate change.  Importers should depend on the
+concrete submodule (``agents.pge_eval.ontology_metrics``) rather than this
+package root to avoid coupling to modules introduced later.
+"""
diff --git a/src/agents/pge_eval/normalize.py b/src/agents/pge_eval/normalize.py
new file mode 100644
index 00000000..3e20baab
--- /dev/null
+++ b/src/agents/pge_eval/normalize.py
@@ -0,0 +1,262 @@
+"""Shape normalisation + footprint helpers for the PGE intrinsic evaluator.
+
+Everything in this module is pure Python — no LLM, no DB, no domain
+knowledge.  It exists so the rest of the scorer can reason over one stable
+in-memory shape regardless of whether the caller handed it the *agent*
+ontology shape (``{entities, relationships}``), the *registry* ontology
+shape (``{classes, properties}``), or raw source metadata.
+
+Design constraints (see docs/plans/2026-06-10-goal-loop-and-pge-eval-design.md):
+
+* **Usecase-agnostic.**  No table name, identifier, or count from any
+  particular domain is encoded here.  The only constants are generic
+  audit/surrogate column heuristics that hold for any relational source.
+* **Deterministic.**  Pure functions of their inputs; no randomness, no
+  wall-clock, no network.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Any, Dict, List, Optional, Set
+
+
+# =====================================================
+# Name normalisation
+# =====================================================
+
+
+def normalize_name(name: Optional[str]) -> str:
+    """Collapse a column / property / class name to a comparison key.
+
+    Lower-cases and strips every non-alphanumeric character so that
+    ``first_name``, ``firstName`` and ``FirstName`` all collapse to
+    ``firstname``.  This is the footprint-matching key used to decide
+    whether a source column "became" a data property without consulting
+    the mapping (Stage-1 is mapping-independent — see D2/D3).
+    """
+    if not name:
+        return ""
+    return re.sub(r"[^a-z0-9]", "", str(name).lower())
+
+
+def local_name(uri_or_name: Optional[str]) -> str:
+    """Return the local name of a URI/CURIE, or the value unchanged.
+
+    ``http://x/Customer`` -> ``Customer``; ``ex:Customer`` -> ``Customer``;
+    ``Customer`` -> ``Customer``.
+    """
+    if not uri_or_name:
+        return ""
+    s = str(uri_or_name)
+    for sep in ("#", "/"):
+        if sep in s:
+            s = s.rsplit(sep, 1)[-1]
+    if ":" in s and not s.startswith("http"):
+        s = s.rsplit(":", 1)[-1]
+    return s
+
+
+# =====================================================
+# Audit / surrogate column heuristics (generic, not domain-specific)
+# =====================================================
+
+# Audit tokens that mark a column as non-analytical bookkeeping.  These are
+# generic ETL/CDC conventions, not tied to any domain.
+_AUDIT_TOKENS = (
+    "createdat",
+    "updatedat",
+    "createdon",
+    "updatedon",
+    "createdby",
+    "updatedby",
+    "modifiedat",
+    "modifiedby",
+    "deletedat",
+    "ingestedat",
+    "loadedat",
+    "loadts",
+    "etltimestamp",
+    "dwcreated",
+    "dwupdated",
+)
+_AUDIT_PREFIXES = ("etl", "ingest", "_ingest", "dw")
+# Exact surrogate row-key names + suffixes for warehouse surrogate keys.
+_SURROGATE_EXACT = ("id", "rowid", "rownum", "rownumber")
+_SURROGATE_SUFFIXES = ("sk", "surrogatekey")
+
+
+def is_surrogate_or_audit(column_name: str) -> bool:
+    """Heuristic: True when *column_name* is a surrogate row key or audit
+    column with no analytical value.
+
+    The OWL generator is instructed to drop exactly these, so they are
+    excluded from coverage denominators (D3).  Intentionally conservative:
+    it does NOT drop every ``*_id`` column (foreign keys can be meaningful),
+    only obvious surrogate keys and audit bookkeeping.
+    """
+    norm = normalize_name(column_name)
+    if not norm:
+        return True
+    if norm in _SURROGATE_EXACT:
+        return True
+    if any(norm.endswith(sfx) for sfx in _SURROGATE_SUFFIXES):
+        return True
+    if any(tok in norm for tok in _AUDIT_TOKENS):
+        return True
+    raw = re.sub(r"[^a-z0-9_]", "", str(column_name).lower())
+    if any(raw.startswith(p) for p in _AUDIT_PREFIXES):
+        return True
+    return False
+
+
+# =====================================================
+# Ontology normalisation
+# =====================================================
+
+
+def _attr_names(raw_attrs: Any) -> List[str]:
+    """Normalise an attribute container to a flat list of name strings.
+
+    Accepts the agent shape (list of str or ``{name|uri|label}`` dicts) and
+    the registry shape (list of ``{name|localName}`` dicts).
+    """
+    out: List[str] = []
+    for a in raw_attrs or []:
+        if isinstance(a, str):
+            out.append(a)
+        elif isinstance(a, dict):
+            name = a.get("name") or a.get("localName") or a.get("uri") or a.get("label")
+            if name:
+                out.append(local_name(name))
+    return out
+
+
+class NormalizedOntology:
+    """A flat, shape-agnostic view of a generated ontology.
+
+    Attributes:
+        classes: list of ``{"name", "uri", "data_properties": [str]}``.
+        object_properties: list of ``{"name", "uri", "domain", "range"}``
+            where domain/range are the raw refs as authored (URI or local).
+    """
+
+    def __init__(self, classes: List[dict], object_properties: List[dict]):
+        self.classes = classes
+        self.object_properties = object_properties
+
+    # --- derived sets, computed lazily but cheaply ------------------
+
+    @property
+    def class_resolution_set(self) -> Set[str]:
+        """Every token a domain/range ref could legitimately resolve to."""
+        out: Set[str] = set()
+        for c in self.classes:
+            if c.get("uri"):
+                out.add(c["uri"])
+                out.add(local_name(c["uri"]))
+            if c.get("name"):
+                out.add(c["name"])
+                out.add(local_name(c["name"]))
+        return out
+
+    @property
+    def all_data_property_keys(self) -> Set[str]:
+        """Normalised keys of every data property across every class."""
+        keys: Set[str] = set()
+        for c in self.classes:
+            for dp in c.get("data_properties", []):
+                k = normalize_name(local_name(dp))
+                if k:
+                    keys.add(k)
+        return keys
+
+    @property
+    def class_name_keys(self) -> Set[str]:
+        keys: Set[str] = set()
+        for c in self.classes:
+            k = normalize_name(local_name(c.get("name") or c.get("uri")))
+            if k:
+                keys.add(k)
+        return keys
+
+
+def normalize_ontology(ontology: dict) -> NormalizedOntology:
+    """Normalise either the agent shape or the registry shape.
+
+    * Agent shape:    ``{"entities": [...], "relationships": [...]}``
+    * Registry shape: ``{"classes": [...], "properties": [...]}``
+    """
+    ontology = ontology or {}
+    classes: List[dict] = []
+    object_props: List[dict] = []
+
+    if "entities" in ontology or "relationships" in ontology:
+        for e in ontology.get("entities", []) or []:
+            classes.append(
+                {
+                    "name": e.get("name") or local_name(e.get("uri")),
+                    "uri": e.get("uri", ""),
+                    "data_properties": _attr_names(e.get("attributes")),
+                }
+            )
+        for r in ontology.get("relationships", []) or []:
+            object_props.append(
+                {
+                    "name": r.get("name") or local_name(r.get("uri")),
+                    "uri": r.get("uri", ""),
+                    "domain": r.get("domain", ""),
+                    "range": r.get("range", ""),
+                }
+            )
+    else:
+        for c in ontology.get("classes", []) or []:
+            classes.append(
+                {
+                    "name": c.get("name") or local_name(c.get("uri")),
+                    "uri": c.get("uri", ""),
+                    "data_properties": _attr_names(c.get("dataProperties")),
+                }
+            )
+        for p in ontology.get("properties", []) or []:
+            if p.get("type") and p.get("type") != "ObjectProperty":
+                continue
+            object_props.append(
+                {
+                    "name": p.get("name") or local_name(p.get("uri")),
+                    "uri": p.get("uri", ""),
+                    "domain": p.get("domain", ""),
+                    "range": p.get("range", ""),
+                }
+            )
+
+    return NormalizedOntology(classes=classes, object_properties=object_props)
+
+
+# =====================================================
+# Source-metadata normalisation
+# =====================================================
+
+
+def normalize_metadata(metadata: dict) -> List[dict]:
+    """Return ``[{"name", "columns": [str]}]`` from domain metadata.
+
+    Accepts the ``{"tables": [{"name"|"full_name", "columns": [...]}]}``
+    shape produced by the metadata tools.  Column entries may be plain
+    strings or ``{"name": ...}`` dicts.
+    """
+    out: List[dict] = []
+    for t in (metadata or {}).get("tables", []) or []:
+        cols: List[str] = []
+        for c in t.get("columns", []) or []:
+            if isinstance(c, str):
+                cols.append(c)
+            elif isinstance(c, dict) and c.get("name"):
+                cols.append(c["name"])
+        out.append(
+            {
+                "name": t.get("full_name") or t.get("name") or "",
+                "columns": cols,
+            }
+        )
+    return out
diff --git a/src/agents/pge_eval/ontology_metrics.py b/src/agents/pge_eval/ontology_metrics.py
new file mode 100644
index 00000000..21ba2c16
--- /dev/null
+++ b/src/agents/pge_eval/ontology_metrics.py
@@ -0,0 +1,270 @@
+"""Stage-1 — ontology-generation quality (deterministic, no LLM).
+
+Computed purely from the generated ontology + source metadata.  No mapping
+dependency (D2) and no LLM for the deterministic part (§3.2).  The same
+checks back the new owl-generator Evaluator stage (§3.5): each issue carries
+a concrete ``hint`` that becomes a generator retry_hint.
+
+All metrics are usecase-agnostic: nothing about any particular domain is
+hard-coded here.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Any, Dict, List, Set, Tuple
+
+from agents.pge_eval.normalize import (
+    NormalizedOntology,
+    is_surrogate_or_audit,
+    local_name,
+    normalize_metadata,
+    normalize_name,
+    normalize_ontology,
+)
+
+# Naming conventions (mirror the OWL generator's NAMING RULES, domain-free).
+_CLASS_RE = re.compile(r"^[A-Z][A-Za-z0-9]*$")
+_PROPERTY_RE = re.compile(r"^[a-z][A-Za-z0-9]*$")
+
+
+def _issue(check: str, expected: str, observed: str, hint: str) -> Dict[str, str]:
+    return {"check": check, "expected": expected, "observed": observed, "hint": hint}
+
+
+# =====================================================
+# Footprint computation (shared with pipeline.coverage_loss)
+# =====================================================
+
+
+def _column_key(table_name: str, column_name: str) -> str:
+    return f"{normalize_name(table_name)}::{normalize_name(column_name)}"
+
+
+def compute_footprint(
+    ontology: NormalizedOntology, tables: List[dict]
+) -> Dict[str, Any]:
+    """Return the ontology footprint over the source metadata.
+
+    A *column* is covered when its normalised name matches some data
+    property's normalised name.  A *table* is covered when its name matches
+    a class name OR ≥1 of its non-surrogate columns is covered (D3).
+
+    Surrogate/audit columns are excluded from the denominators.
+    """
+    dp_keys = ontology.all_data_property_keys
+    class_keys = ontology.class_name_keys
+
+    total_columns = 0
+    covered_columns: Set[str] = set()
+    total_tables = len(tables)
+    covered_tables: Set[str] = set()
+
+    for t in tables:
+        tname = t["name"]
+        tkey = normalize_name(local_name(tname))
+        table_is_covered = tkey in class_keys
+        for col in t["columns"]:
+            if is_surrogate_or_audit(col):
+                continue
+            total_columns += 1
+            ckey = _column_key(tname, col)
+            if normalize_name(col) in dp_keys:
+                covered_columns.add(ckey)
+                table_is_covered = True
+        if table_is_covered:
+            covered_tables.add(tname)
+
+    return {
+        "total_tables": total_tables,
+        "covered_tables": covered_tables,
+        "total_columns": total_columns,
+        "covered_columns": covered_columns,
+    }
+
+
+# =====================================================
+# Stage-1 metrics + issues
+# =====================================================
+
+
+def evaluate_ontology(
+    ontology: dict,
+    metadata: dict,
+) -> Tuple[Dict[str, Any], List[Dict[str, str]], Dict[str, Any]]:
+    """Run the deterministic Stage-1 checks.
+
+    Returns ``(metrics, issues, footprint)``:
+
+    * ``metrics`` — the §3.2 metric block (ratios + absolute counts).
+    * ``issues`` — actionable failures (``check/expected/observed/hint``)
+      for the owl-gen Evaluator's retry_hints.
+    * ``footprint`` — covered tables/columns sets reused by
+      ``pipeline.coverage_loss``.
+    """
+    norm = normalize_ontology(ontology)
+    tables = normalize_metadata(metadata)
+    footprint = compute_footprint(norm, tables)
+
+    issues: List[Dict[str, str]] = []
+
+    # ---- coverage ratios (Tier-2 warn) -----------------------------
+    table_cov = (
+        len(footprint["covered_tables"]) / footprint["total_tables"]
+        if footprint["total_tables"]
+        else 1.0
+    )
+    column_cov = (
+        len(footprint["covered_columns"]) / footprint["total_columns"]
+        if footprint["total_columns"]
+        else 1.0
+    )
+
+    uncovered_tables = [
+        t["name"]
+        for t in tables
+        if t["name"] not in footprint["covered_tables"]
+    ]
+    for tname in uncovered_tables:
+        issues.append(
+            _issue(
+                "table_footprint_coverage",
+                "table maps to a class or contributes >=1 data property",
+                "no footprint",
+                f"source table '{tname}' has no class and contributes no data "
+                "property — model it as a class, attach its columns as data "
+                "properties on an existing class, or justify the omission.",
+            )
+        )
+
+    # ---- orphan classes (Tier-1 absolute = 0) ----------------------
+    related: Set[str] = set()
+    for op in norm.object_properties:
+        for ref in (op.get("domain"), op.get("range")):
+            if ref:
+                related.add(local_name(ref))
+                related.add(str(ref))
+    orphan_classes: List[str] = []
+    for c in norm.classes:
+        has_props = bool(c.get("data_properties"))
+        name = c.get("name") or local_name(c.get("uri"))
+        in_rel = name in related or local_name(c.get("uri")) in related
+        if not has_props and not in_rel:
+            orphan_classes.append(name)
+            issues.append(
+                _issue(
+                    "orphan_class_count",
+                    "0 orphan classes",
+                    name,
+                    f"class '{name}' is an orphan (no data properties and no "
+                    "object-property domain/range) — attach properties, relate "
+                    "it to another class, or remove it.",
+                )
+            )
+
+    # ---- dangling domain/range (Tier-1 absolute = 0) ---------------
+    resolvable = norm.class_resolution_set
+    dangling_dr: List[str] = []
+    for op in norm.object_properties:
+        opname = op.get("name") or local_name(op.get("uri"))
+        for role in ("domain", "range"):
+            ref = op.get(role)
+            if not ref:
+                dangling_dr.append(f"{opname}.{role}")
+                issues.append(
+                    _issue(
+                        "dangling_domain_range_count",
+                        f"ObjectProperty {role} resolves to a class",
+                        f"{opname}.{role}=<missing>",
+                        f"ObjectProperty '{opname}' has no {role} — declare an "
+                        f"rdfs:{role} pointing at an existing class.",
+                    )
+                )
+                continue
+            if ref not in resolvable and local_name(ref) not in resolvable:
+                dangling_dr.append(f"{opname}.{role}")
+                issues.append(
+                    _issue(
+                        "dangling_domain_range_count",
+                        f"ObjectProperty {role} resolves to a class",
+                        f"{opname}.{role}={local_name(ref)}",
+                        f"ObjectProperty '{opname}' has {role} "
+                        f"'{local_name(ref)}' which resolves to no class — fix "
+                        "the reference or add the missing class.",
+                    )
+                )
+
+    # ---- naming violations (Tier-1 absolute = 0) -------------------
+    naming_violations: List[str] = []
+    for c in norm.classes:
+        nm = local_name(c.get("name") or c.get("uri"))
+        if nm and not _CLASS_RE.match(nm):
+            naming_violations.append(f"class:{nm}")
+            issues.append(
+                _issue(
+                    "naming_violation_count",
+                    "class name is PascalCase [A-Z][A-Za-z0-9]*",
+                    nm,
+                    f"class '{nm}' violates PascalCase — remove spaces / "
+                    "underscores / hyphens and capitalise (e.g. sales_order -> "
+                    "SalesOrder).",
+                )
+            )
+    for op in norm.object_properties:
+        nm = local_name(op.get("name") or op.get("uri"))
+        if nm and not _PROPERTY_RE.match(nm):
+            naming_violations.append(f"property:{nm}")
+            issues.append(
+                _issue(
+                    "naming_violation_count",
+                    "property name is lowerCamelCase [a-z][A-Za-z0-9]*",
+                    nm,
+                    f"property '{nm}' violates lowerCamelCase — use "
+                    "[a-z][A-Za-z0-9]* with no underscores/hyphens/escapes.",
+                )
+            )
+    # data properties too
+    for c in norm.classes:
+        for dp in c.get("data_properties", []):
+            nm = local_name(dp)
+            if nm and not _PROPERTY_RE.match(nm):
+                naming_violations.append(f"dataproperty:{nm}")
+                issues.append(
+                    _issue(
+                        "naming_violation_count",
+                        "data property name is lowerCamelCase",
+                        nm,
+                        f"data property '{nm}' violates lowerCamelCase — use "
+                        "[a-z][A-Za-z0-9]* with no underscores/hyphens/escapes.",
+                    )
+                )
+
+    # ---- duplicate classes (Tier-1 absolute = 0) -------------------
+    seen: Dict[str, int] = {}
+    for c in norm.classes:
+        key = normalize_name(local_name(c.get("name") or c.get("uri")))
+        if not key:
+            continue
+        seen[key] = seen.get(key, 0) + 1
+    duplicate_class_count = sum(n - 1 for n in seen.values() if n > 1)
+    for key, n in seen.items():
+        if n > 1:
+            issues.append(
+                _issue(
+                    "duplicate_class_count",
+                    "0 duplicate class local names",
+                    f"{key} x{n}",
+                    f"{n} classes collapse to the local name '{key}' — merge "
+                    "them or differentiate their names/definitions.",
+                )
+            )
+
+    metrics: Dict[str, Any] = {
+        "table_footprint_coverage": round(table_cov, 6),
+        "column_footprint_coverage": round(column_cov, 6),
+        "orphan_class_count": len(orphan_classes),
+        "dangling_domain_range_count": len(dangling_dr),
+        "naming_violation_count": len(naming_violations),
+        "duplicate_class_count": duplicate_class_count,
+    }
+    return metrics, issues, footprint
diff --git a/tests/units/pge_eval/__init__.py b/tests/units/pge_eval/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/units/pge_eval/_fixtures.py b/tests/units/pge_eval/_fixtures.py
new file mode 100644
index 00000000..71ee60d6
--- /dev/null
+++ b/tests/units/pge_eval/_fixtures.py
@@ -0,0 +1,178 @@
+"""Synthetic, usecase-agnostic fixtures for the PGE evaluator unit tests.
+
+Deliberately uses a generic e-commerce-ish toy domain (Customer / Order /
+Product) so the tests prove the scorer is domain-free — none of these names
+appear in the scorer code.
+"""
+
+from copy import deepcopy
+
+
+def clean_ontology() -> dict:
+    """Agent-shape ontology that is fully structurally clean."""
+    return {
+        "entities": [
+            {
+                "uri": "ex:Customer",
+                "name": "Customer",
+                "attributes": ["firstName", "lastName", "email"],
+            },
+            {
+                "uri": "ex:Order",
+                "name": "Order",
+                "attributes": ["orderDate", "totalAmount"],
+            },
+            {
+                "uri": "ex:Product",
+                "name": "Product",
+                "attributes": ["sku", "unitPrice"],
+            },
+        ],
+        "relationships": [
+            {
+                "uri": "ex:placesOrder",
+                "name": "placesOrder",
+                "domain": "ex:Customer",
+                "range": "ex:Order",
+            },
+            {
+                "uri": "ex:containsProduct",
+                "name": "containsProduct",
+                "domain": "ex:Order",
+                "range": "ex:Product",
+            },
+        ],
+    }
+
+
+def clean_metadata() -> dict:
+    return {
+        "tables": [
+            {
+                "name": "customers",
+                "columns": [
+                    {"name": "id"},
+                    {"name": "first_name"},
+                    {"name": "last_name"},
+                    {"name": "email"},
+                    {"name": "created_at"},
+                ],
+            },
+            {
+                "name": "orders",
+                "columns": [
+                    {"name": "id"},
+                    {"name": "order_date"},
+                    {"name": "total_amount"},
+                ],
+            },
+            {
+                "name": "products",
+                "columns": [
+                    {"name": "id"},
+                    {"name": "sku"},
+                    {"name": "unit_price"},
+                ],
+            },
+        ]
+    }
+
+
+def clean_artifact() -> dict:
+    onto = clean_ontology()
+    meta = clean_metadata()
+    return {
+        "success": True,
+        "iterations": 3,
+        "usage": {"prompt_tokens": 1000, "completion_tokens": 400},
+        "stats": {"planner_reinvocations": 0},
+        "mapping_run_log": [
+            {"item": "ex:Customer", "kind": "entity", "attempts": [{}], "final_status": "PASS"},
+            {"item": "ex:Order", "kind": "entity", "attempts": [{}], "final_status": "PASS"},
+            {"item": "ex:Product", "kind": "entity", "attempts": [{}], "final_status": "PASS"},
+            {"item": "ex:placesOrder", "kind": "relationship", "attempts": [{}], "final_status": "PASS"},
+            {"item": "ex:containsProduct", "kind": "relationship", "attempts": [{}], "final_status": "PASS"},
+        ],
+        "mapping_evaluations": {
+            "ex:Customer": {"metrics": {"row_count": 100, "distinct_id_count": 100, "null_id_count": 0}, "failures": []},
+            "ex:Order": {"metrics": {"row_count": 500, "distinct_id_count": 500, "null_id_count": 0}, "failures": []},
+            "ex:Product": {"metrics": {"row_count": 50, "distinct_id_count": 50, "null_id_count": 0}, "failures": []},
+            "ex:placesOrder": {"metrics": {"total_edges": 500, "dangling_source_pct": 0.0, "dangling_target_pct": 0.0}, "failures": []},
+            "ex:containsProduct": {"metrics": {"total_edges": 800, "dangling_source_pct": 0.0, "dangling_target_pct": 0.0}, "failures": []},
+        },
+        "entity_mappings": [
+            {"ontology_class": "ex:Customer", "attribute_mappings": {"firstName": "first_name", "lastName": "last_name", "email": "email"}},
+            {"ontology_class": "ex:Order", "attribute_mappings": {"orderDate": "order_date", "totalAmount": "total_amount"}},
+            {"ontology_class": "ex:Product", "attribute_mappings": {"sku": "sku", "unitPrice": "unit_price"}},
+        ],
+        "relationship_mappings": [],
+        "steps": [{"step_type": "planner", "tool_name": "", "duration_ms": 1200}],
+        "ontology": onto,
+        "metadata": meta,
+        "elapsed_s": 42.5,
+    }
+
+
+def artifact_with_dangling_fk() -> dict:
+    """Clean except one relationship has a dangling target FK > 5%."""
+    art = clean_artifact()
+    art["mapping_evaluations"]["ex:placesOrder"]["metrics"]["dangling_target_pct"] = 0.47
+    return art
+
+
+def artifact_with_sql_failure() -> dict:
+    """Clean except one entity's SQL failed to execute."""
+    art = clean_artifact()
+    art["mapping_evaluations"]["ex:Order"] = {
+        "metrics": {"sql_error": "UNION type mismatch"},
+        "failures": [
+            {
+                "check": "sql_execution",
+                "expected": "SQL executes without error",
+                "observed": "execution error",
+                "hint": "fix the SQL",
+            }
+        ],
+    }
+    # The entity drops out of PASS in the run log too (in-scope but failed).
+    for entry in art["mapping_run_log"]:
+        if entry["item"] == "ex:Order":
+            entry["final_status"] = "FAIL"
+    return art
+
+
+def ontology_with_orphan() -> dict:
+    """Add a class with no data properties and no relationships."""
+    onto = clean_ontology()
+    onto["entities"].append({"uri": "ex:Ghost", "name": "Ghost", "attributes": []})
+    return onto
+
+
+def artifact_with_orphan_class() -> dict:
+    art = clean_artifact()
+    art["ontology"] = ontology_with_orphan()
+    return art
+
+
+def ontology_with_dangling_range() -> dict:
+    onto = clean_ontology()
+    onto["relationships"].append(
+        {"uri": "ex:refersTo", "name": "refersTo", "domain": "ex:Order", "range": "ex:Nonexistent"}
+    )
+    return onto
+
+
+def ontology_with_naming_violation() -> dict:
+    onto = clean_ontology()
+    onto["entities"].append(
+        {"uri": "ex:bad_class", "name": "bad_class", "attributes": ["someAttr"]}
+    )
+    return onto
+
+
+def ontology_with_duplicate_class() -> dict:
+    onto = clean_ontology()
+    onto["entities"].append(
+        {"uri": "ex:Customer2", "name": "Customer", "attributes": ["nickname"]}
+    )
+    return onto
diff --git a/tests/units/pge_eval/test_ontology_metrics.py b/tests/units/pge_eval/test_ontology_metrics.py
new file mode 100644
index 00000000..640a61ec
--- /dev/null
+++ b/tests/units/pge_eval/test_ontology_metrics.py
@@ -0,0 +1,83 @@
+"""Stage-1 ontology metric tests (deterministic, no LLM)."""
+
+import pytest
+
+from agents.pge_eval.ontology_metrics import evaluate_ontology
+from agents.pge_eval.normalize import is_surrogate_or_audit, normalize_name
+
+from tests.units.pge_eval import _fixtures as fx
+
+
+def test_clean_ontology_all_absolute_zero():
+    metrics, issues, _ = evaluate_ontology(fx.clean_ontology(), fx.clean_metadata())
+    assert metrics["orphan_class_count"] == 0
+    assert metrics["dangling_domain_range_count"] == 0
+    assert metrics["naming_violation_count"] == 0
+    assert metrics["duplicate_class_count"] == 0
+    assert metrics["table_footprint_coverage"] == 1.0
+    assert metrics["column_footprint_coverage"] >= 0.9
+
+
+def test_orphan_class_detected():
+    metrics, issues, _ = evaluate_ontology(fx.ontology_with_orphan(), fx.clean_metadata())
+    assert metrics["orphan_class_count"] == 1
+    assert any(i["check"] == "orphan_class_count" for i in issues)
+
+
+def test_dangling_range_detected():
+    metrics, issues, _ = evaluate_ontology(
+        fx.ontology_with_dangling_range(), fx.clean_metadata()
+    )
+    assert metrics["dangling_domain_range_count"] == 1
+    assert any(i["check"] == "dangling_domain_range_count" for i in issues)
+
+
+def test_naming_violation_detected():
+    metrics, _, _ = evaluate_ontology(
+        fx.ontology_with_naming_violation(), fx.clean_metadata()
+    )
+    assert metrics["naming_violation_count"] >= 1
+
+
+def test_duplicate_class_detected():
+    metrics, _, _ = evaluate_ontology(
+        fx.ontology_with_duplicate_class(), fx.clean_metadata()
+    )
+    assert metrics["duplicate_class_count"] == 1
+
+
+def test_table_coverage_drops_with_unmodelled_table():
+    meta = fx.clean_metadata()
+    meta["tables"].append({"name": "shipments", "columns": [{"name": "carrier"}]})
+    metrics, issues, _ = evaluate_ontology(fx.clean_ontology(), meta)
+    assert metrics["table_footprint_coverage"] < 1.0
+    assert any(
+        i["check"] == "table_footprint_coverage" for i in issues
+    )
+
+
+def test_surrogate_and_audit_columns_excluded():
+    assert is_surrogate_or_audit("id")
+    assert is_surrogate_or_audit("created_at")
+    assert is_surrogate_or_audit("customer_sk")
+    assert is_surrogate_or_audit("etl_load_ts")
+    assert not is_surrogate_or_audit("first_name")
+    assert not is_surrogate_or_audit("customer_id")  # FK can be meaningful
+
+
+def test_name_normalization():
+    assert normalize_name("first_name") == normalize_name("firstName") == "firstname"
+    assert normalize_name("Order Date") == "orderdate"
+
+
+def test_registry_shape_accepted():
+    # Same ontology in registry (classes/properties) shape must score identically.
+    registry = {
+        "classes": [
+            {"uri": "ex:A", "name": "A", "dataProperties": [{"name": "x"}]},
+        ],
+        "properties": [],
+    }
+    metrics, _, _ = evaluate_ontology(registry, {"tables": []})
+    # A has a data property -> not an orphan.
+    assert metrics["orphan_class_count"] == 0
diff --git a/tests/units/pge_eval/test_owl_evaluator_stage.py b/tests/units/pge_eval/test_owl_evaluator_stage.py
new file mode 100644
index 00000000..ec23bf47
--- /dev/null
+++ b/tests/units/pge_eval/test_owl_evaluator_stage.py
@@ -0,0 +1,69 @@
+"""The owl-generator Evaluator stage (§3.5) — deterministic Stage-1 checks
+feeding retry_hints, with a bounded retry cap."""
+
+from agents.agent_owl_generator import engine as owl_engine
+
+_CLEAN_TTL = """@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix : <http://test.org/o#> .
+
+<http://test.org/o> a owl:Ontology .
+
+:Customer a owl:Class ; rdfs:label "Customer" .
+:Order a owl:Class ; rdfs:label "Order" .
+
+:placesOrder a owl:ObjectProperty ; rdfs:domain :Customer ; rdfs:range :Order .
+:firstName a owl:DatatypeProperty ; rdfs:domain :Customer ; rdfs:range xsd:string .
+:orderDate a owl:DatatypeProperty ; rdfs:domain :Order ; rdfs:range xsd:string .
+"""
+
+_ORPHAN_TTL = _CLEAN_TTL + """
+:Ghost a owl:Class ; rdfs:label "Ghost" .
+"""
+
+
+def test_clean_ontology_returns_no_retry_hint():
+    assert owl_engine._evaluate_ontology_stage(_CLEAN_TTL, {}, 1) is None
+
+
+def test_orphan_class_yields_retry_hint():
+    hint = owl_engine._evaluate_ontology_stage(_ORPHAN_TTL, {}, 1)
+    assert hint is not None
+    assert "Ghost" in hint
+    assert "orphan" in hint.lower()
+
+
+_PROSE_PREFIXED = (
+    "No database tables are available. I have what I need from the guidelines.\n\n"
+    + _ORPHAN_TTL
+)
+
+_FENCED = "```turtle\n" + _ORPHAN_TTL + "```"
+
+
+def test_prose_preamble_is_stripped_before_parsing():
+    # Regression: the model sometimes prepends a sentence before @prefix. The
+    # evaluator must clean it (like the downstream registry) and still run,
+    # not skip. Found via a live Chrome DevTools generation run.
+    hint = owl_engine._evaluate_ontology_stage(_PROSE_PREFIXED, {}, 1)
+    assert hint is not None
+    assert "Ghost" in hint
+
+
+def test_markdown_fenced_turtle_is_parsed():
+    hint = owl_engine._evaluate_ontology_stage(_FENCED, {}, 1)
+    assert hint is not None
+    assert "Ghost" in hint
+
+
+def test_parse_error_fails_open():
+    # Garbage in -> None (never blocks OWL delivery).
+    assert owl_engine._evaluate_ontology_stage("not turtle at all {{{", {}, 1) is None
+
+
+def test_evaluator_loop_is_bounded():
+    # The Evaluator retry cap exists and is finite (real PGE discipline).
+    assert owl_engine.MAX_OWL_EVAL_ROUNDS >= 1
+    assert owl_engine.MAX_OWL_EVAL_ROUNDS < 10