boringdata · hachej · May 4, 2026 · May 3, 2026
diff --git a/src/boring_semantic_layer/measure_scope.py b/src/boring_semantic_layer/measure_scope.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import difflib
 from collections.abc import Iterable
 from typing import Any
 
@@ -8,6 +9,17 @@
 from toolz import curry
 
 
+class UnknownMeasureRefError(AttributeError):
+    """Raised when a calc-measure lambda references an unknown name.
+
+    Subclasses :class:`AttributeError` so existing code that ``except``\\ s
+    on attribute errors continues to work, but ``_classify_measure``
+    re-raises this specific subclass instead of swallowing it. Surfaces
+    typos at construction time with a "did you mean?" suggestion built
+    from the surrounding measure / column names.
+    """
+
+
 def _has_prefixed_columns(tbl, name: str) -> bool:
     """Check if table has columns with the given prefix (e.g., 'flights.' prefix)."""
     if not hasattr(tbl, "columns"):
@@ -361,7 +373,39 @@ def __getattr__(self, name: str):
         if _has_prefixed_columns(self.tbl, name):
             return _ColumnPrefixProxy(self.tbl, name)
 
-        return _resolve_column_short_name(self.tbl, name)
+        # Fall through to ibis (covers Table methods like ``count``, ``filter``).
+        # If ibis rejects too, surface a typo suggestion rather than the opaque
+        # ibis AttributeError so the user can see a "did you mean?" hint.
+        try:
+            return _resolve_column_short_name(self.tbl, name)
+        except AttributeError:
+            suggestion = self._typo_suggestion(name)
+            if suggestion:
+                raise UnknownMeasureRefError(
+                    f"{name!r} is not a known measure or column. {suggestion}"
+                ) from None
+            raise
+
+    def _typo_suggestion(self, name: str) -> str | None:
+        # 0.80 catches single-character typos and case mistakes
+        # (``flight_konut`` vs ``flight_count`` ≈ 0.83) without flagging
+        # legitimate substring overlaps (``net_revenue`` vs
+        # ``total_net_revenue`` ≈ 0.79). Calibrated against real-world
+        # confusable measure names.
+        cutoff = 0.80
+        candidates: list[tuple[str, str]] = []
+        if self.known:
+            for match in difflib.get_close_matches(name, self.known, n=3, cutoff=cutoff):
+                candidates.append(("measure", match))
+        if hasattr(self.tbl, "columns"):
+            for match in difflib.get_close_matches(
+                name, list(self.tbl.columns), n=3, cutoff=cutoff
+            ):
+                candidates.append(("column", match))
+        if not candidates:
+            return None
+        formatted = ", ".join(f"{kind} {match!r}" for kind, match in candidates)
+        return f"Did you mean: {formatted}?"
 
     def __getitem__(self, name: str):
         if self.post_agg:

diff --git a/src/boring_semantic_layer/ops.py b/src/boring_semantic_layer/ops.py
@@ -75,6 +75,21 @@ class _RenamedResolver:
 
     Used during join predicate resolution to avoid ibis "Ambiguous field
     reference" errors when left and right tables share column names.
+
+    This works around two upstream ibis behaviors that have no public
+    issue tracker entry yet but are pinned by
+    ``test_upstream_ibis_pins.py``:
+
+    1. ``DerefMap`` raises ``IbisInputError: Ambiguous field reference``
+       when a predicate column-name appears in more than one relation
+       reachable from the join's LHS.
+    2. The default ``rname='{name}_right'`` collides on the third
+       table when 3+ joined relations share a column name, raising
+       ``IntegrityError: Name collisions``.
+
+    When the pinning tests start failing — i.e. ibis no longer raises
+    these errors — the rename dance in ``SemanticJoinOp.to_untagged``
+    can be removed.
     """
 
     __slots__ = ("_table", "_name_map")
@@ -663,13 +678,31 @@ def _extract_measure_metadata(
         return (fn_or_expr, None, (), {})
 
 
-_AGG_METHODS = frozenset({"sum", "mean", "avg", "count", "min", "max"})
+_AGG_METHODS = frozenset({
+    # Standard reductions
+    "sum", "mean", "avg", "count", "min", "max",
+    # Statistical reductions
+    "var", "std", "median", "quantile",
+    # Approximate reductions
+    "approx_count_distinct", "approx_nunique", "approx_median",
+    # Distinct count
+    "nunique",
+    # Categorical reductions
+    "mode", "first", "last", "arbitrary",
+    # Boolean reductions
+    "any", "all",
+    # Collection reductions
+    "group_concat", "collect",
+})
 
 
 def _is_calculated_measure(val: Any) -> bool:
     # A MethodCall with an aggregation method on a MeasureRef is a base measure:
     # the column name matched a known measure name in MeasureScope, but the user
-    # is really defining a column aggregation (e.g. lambda t: t.flight_count.sum()).
+    # is really defining a column aggregation (e.g. lambda t: t.flight_count.sum()
+    # or t.distance.var()).  ``_AGG_METHODS`` covers the ibis ``Reduction`` ops a
+    # user might reasonably reach for; methods outside the set fall through to
+    # the calc path.
     if (
         isinstance(val, MethodCall)
         and val.method in _AGG_METHODS
@@ -843,18 +876,26 @@ def _classify_measure(
     fn_or_expr: Any, scope: Any, measure_name: str | None = None
 ) -> tuple[str, Any]:
     """Classify measure as 'calc' or 'base' with appropriate handling."""
-    from .measure_scope import validate_calc_ast
+    from .measure_scope import UnknownMeasureRefError, validate_calc_ast
 
     expr, description, requires_unnest, metadata = _extract_measure_metadata(fn_or_expr)
 
-    resolved = safe(lambda: _resolve_expr(expr, scope))().map(
-        lambda val: ("calc", val) if _is_calculated_measure(val) else None
-    )
+    # ``_resolve_expr`` may raise for legitimate base-measure shapes
+    # (e.g. lambdas that touch ibis methods MeasureScope can't reflect),
+    # so most exceptions are caught and the lambda falls through to
+    # base classification. ``UnknownMeasureRefError`` is the typo case
+    # though — surface it loudly instead of letting the lambda fail with
+    # an opaque error at execute time.
+    try:
+        resolved_value = _resolve_expr(expr, scope)
+    except UnknownMeasureRefError:
+        raise
+    except Exception:
+        resolved_value = None
 
-    if isinstance(resolved, Success) and resolved.unwrap() is not None:
-        kind, value = resolved.unwrap()
-        validate_calc_ast(value, measure_name)
-        return (kind, value)
+    if resolved_value is not None and _is_calculated_measure(resolved_value):
+        validate_calc_ast(resolved_value, measure_name)
+        return ("calc", resolved_value)
 
     if not requires_unnest and callable(expr):
         # All scopes (MeasureScope, ColumnScope) have tbl attribute
@@ -4014,7 +4055,10 @@ def to_untagged(self, parent_requirements: dict[str, set[str]] | None = None):
             return left_tbl.join(right_tbl, how=self.how, rname=rname)
 
         # Detect column name conflicts that cause ibis/xorq to raise
-        # "Ambiguous field reference" during predicate resolution.
+        # ``Ambiguous field reference`` during predicate resolution. The
+        # rename dance + ``_RenamedResolver`` below is a workaround for
+        # upstream ibis behaviors pinned by ``test_upstream_ibis_pins``;
+        # remove this branch when those tests fail.
         conflicting = frozenset(left_tbl.columns) & frozenset(right_tbl.columns)
 
         if not conflicting:

diff --git a/src/boring_semantic_layer/tests/test_measure_reference_styles.py b/src/boring_semantic_layer/tests/test_measure_reference_styles.py
@@ -540,3 +540,112 @@ def test_calc_dtype_inference_with_inline_aggregation():
     # Before infer_calc_dtype handled the rewrite path, this was silently
     # dropped from values and the dtype was unknown.
     assert "pct_of_total" in st.schema.names
+
+
+# ---------------------------------------------------------------------------
+# Broader base-measure recognition (#1)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "method, expected_kind",
+    [
+        ("var", "base"),
+        ("std", "base"),
+        ("median", "base"),
+        ("nunique", "base"),
+        ("approx_nunique", "base"),
+        ("first", "base"),
+        ("last", "base"),
+    ],
+)
+def test_extra_reductions_classified_as_base(method, expected_kind):
+    """Measures named the same as a column then aggregated with a non-sum/mean
+    reduction should still be classified as base, not silently routed through
+    the calc compiler.
+    """
+    con = ibis.duckdb.connect(":memory:")
+    data = pd.DataFrame({"carrier": ["AA", "AA", "UA"], "distance": [100, 200, 300]})
+    tbl = con.create_table(f"flights_{method}", data)
+
+    # Define a measure named ``distance`` (matching the column name) so that
+    # ``t.distance.<method>()`` resolves through the MeasureRef path that
+    # ``_AGG_METHODS`` guards.
+    st = to_semantic_table(tbl, f"flights_{method}").with_measures(
+        distance=lambda t: t.distance.sum(),
+    )
+    st = st.with_measures(
+        derived=lambda t, _m=method: getattr(t.distance, _m)(),
+    )
+
+    op = st.op()
+    base_names = set(op.get_measures().keys())
+    calc_names = set(op.get_calculated_measures().keys())
+    assert "derived" in base_names, (
+        f"{method!r} measure should classify as base, got calc"
+    )
+    assert "derived" not in calc_names
+
+
+# ---------------------------------------------------------------------------
+# Typo'd measure references (#2)
+# ---------------------------------------------------------------------------
+
+
+def test_typo_measure_ref_raises_with_suggestion():
+    """A calc-measure lambda referencing a misspelled measure name should
+    fail loudly at construction with a "did you mean?" suggestion."""
+    from boring_semantic_layer.measure_scope import UnknownMeasureRefError
+
+    con = ibis.duckdb.connect(":memory:")
+    data = pd.DataFrame({"carrier": ["AA"], "distance": [100]})
+    tbl = con.create_table("flights_typo", data)
+
+    st = to_semantic_table(tbl, "flights_typo").with_measures(
+        flight_count=lambda t: t.count(),
+        total_distance=lambda t: t.distance.sum(),
+    )
+
+    with pytest.raises(UnknownMeasureRefError, match="flight_count"):
+        st.with_measures(
+            # ``flight_konut`` typo of ``flight_count``
+            ratio=lambda t: t.flight_konut / t.total_distance,
+        )
+
+
+def test_typo_in_t_all_raises():
+    """Same loud-fail behavior when the typo is the argument to ``t.all``."""
+    from boring_semantic_layer.measure_scope import UnknownMeasureRefError
+
+    con = ibis.duckdb.connect(":memory:")
+    data = pd.DataFrame({"carrier": ["AA"], "distance": [100]})
+    tbl = con.create_table("flights_typo_all", data)
+
+    st = to_semantic_table(tbl, "flights_typo_all").with_measures(
+        flight_count=lambda t: t.count(),
+    )
+
+    with pytest.raises(UnknownMeasureRefError, match="flight_count"):
+        st.with_measures(
+            ratio=lambda t: t.flight_count / t.all(t.flight_konut),
+        )
+
+
+def test_substring_measure_name_does_not_trigger_typo():
+    """Names that are substrings of other measures should not trip the typo
+    detector. ``net_revenue`` referenced from a measure that also defines
+    ``total_net_revenue`` is legitimate, not a typo (similarity ≈ 0.79).
+    """
+    from boring_semantic_layer.measure_scope import MeasureScope
+
+    # Probe MeasureScope directly: with measures named [net_revenue,
+    # total_net_revenue], looking up an unrelated name 'net_revenue'
+    # via getattr should NOT fire the typo path.
+    import ibis as i
+
+    tbl = i.table({"col": "int64"}, name="t")
+    scope = MeasureScope(_tbl=tbl, _known=("net_revenue", "total_net_revenue"))
+    # Asking for 'net_revenue' is fine — it's a known measure.
+    assert scope.net_revenue is not None
+    # Asking for 'col' is fine — it's a column.
+    assert scope.col is not None
diff --git a/src/boring_semantic_layer/tests/test_upstream_ibis_pins.py b/src/boring_semantic_layer/tests/test_upstream_ibis_pins.py
@@ -0,0 +1,116 @@
+"""Pin upstream ibis behaviors that BSL works around.
+
+Each test reproduces a specific ibis (or xorq-vendored ibis) behavior that
+forced a workaround somewhere in BSL. The tests pass while ibis still
+exhibits the behavior and fail when ibis is fixed — at which point the
+corresponding BSL workaround can be deleted.
+
+When a test in this file starts failing, do not "fix" it by adding the
+workaround in: instead, locate the BSL workaround it pins (each test
+docstring names the call site) and delete that workaround.
+
+Each pin is parametrized over plain ``ibis`` and ``xorq.vendor.ibis``
+because BSL's internal tables are xorq-vendored after construction-time
+conversion, so the workaround actually fires against the xorq flavor —
+but plain ibis is the upstream, and divergence between them is itself a
+signal worth pinning.
+"""
+
+from __future__ import annotations
+
+import pandas as pd
+import pytest
+
+import ibis as plain_ibis
+from xorq.common.utils.ibis_utils import from_ibis
+
+
+def _make_table(flavor: str, name: str, df: pd.DataFrame):
+    """Create a table in the requested ibis flavor.
+
+    For ``"ibis"`` we create directly via the duckdb backend.
+    For ``"xorq"`` we create via plain ibis, then wrap with
+    ``from_ibis`` so the resulting table is in ``xorq.vendor.ibis``.
+    """
+    con = plain_ibis.duckdb.connect()
+    tbl = con.create_table(name, df)
+    if flavor == "xorq":
+        return from_ibis(tbl)
+    return tbl
+
+
+@pytest.mark.parametrize("flavor", ["ibis", "xorq"])
+def test_pin_three_way_join_default_rname_collides(flavor):
+    """3+ joined relations sharing a column name collide on ``_right``.
+
+    Pinned BSL workaround: depth-based ``rname`` in
+    ``SemanticJoinOp._rname_for_depth`` (``_right``, ``_right2``, …)
+    plus the ``conflicting`` rename block in ``SemanticJoinOp.to_untagged``.
+
+    When ibis auto-disambiguates collisions across nested joins, this
+    test will succeed without raising — at which point the depth
+    hack and rename block can be removed.
+    """
+    flights = _make_table(
+        flavor,
+        f"fl_pin_{flavor}",
+        pd.DataFrame(
+            {"carrier": ["AA"], "origin": ["JFK"], "destination": ["LAX"]}
+        ),
+    )
+    carriers = _make_table(
+        flavor,
+        f"ca_pin_{flavor}",
+        pd.DataFrame({"code": ["AA"], "name": ["American"]}),
+    )
+    airports1 = _make_table(
+        flavor,
+        f"ap1_pin_{flavor}",
+        pd.DataFrame({"code": ["JFK"], "city": ["NY"]}),
+    )
+    airports2 = _make_table(
+        flavor,
+        f"ap2_pin_{flavor}",
+        pd.DataFrame({"code": ["LAX"], "city": ["LA"]}),
+    )
+
+    fc = flights.join(carriers, flights.carrier == carriers.code)
+    fca = fc.join(airports1, fc.origin == airports1.code)
+    # Both ``fca`` and ``airports2`` carry a column called ``code``; ibis
+    # tries to suffix the right one as ``code_right``, which already exists
+    # from the previous join → IntegrityError. Plain ibis raises on schema
+    # materialization (``.columns``); xorq's vendored ibis raises at
+    # execute time. Force materialization to cover both.
+    with pytest.raises(Exception, match=r"(?i)collision|integrity"):
+        fca.join(airports2, fca.destination == airports2.code).execute()
+
+
+@pytest.mark.parametrize("flavor", ["ibis", "xorq"])
+def test_pin_self_reference_join_is_ambiguous_without_view(flavor):
+    """Joining the same table twice without ``.view()`` is ambiguous.
+
+    Pinned BSL workaround: BSL relies on the user calling ``.view()`` (or
+    ``to_semantic_table`` on a viewed table) when joining the same source
+    twice. The reconstruction path in ``serialization.reconstruct``
+    explicitly preserves ``SelfReference`` nodes for the same reason.
+
+    When ibis disambiguates self-references automatically, the
+    ``SelfReference`` preservation in reconstruct can be relaxed.
+    """
+    ports = _make_table(
+        flavor,
+        f"ports_pin_{flavor}",
+        pd.DataFrame({"code": ["JFK", "LAX"], "city": ["NY", "LA"]}),
+    )
+    flights = _make_table(
+        flavor,
+        f"fl_self_pin_{flavor}",
+        pd.DataFrame({"origin": ["JFK"], "dest": ["LAX"]}),
+    )
+
+    j1 = flights.join(ports, flights.origin == ports.code)
+    # Reusing ``ports`` (same identity) without ``.view()`` makes the
+    # second predicate's ``ports.code`` ambiguous. Error fires at
+    # predicate-resolution time during the second ``join`` call.
+    with pytest.raises(Exception, match=r"(?i)ambiguous"):
+        j1.join(ports, j1.dest == ports.code)