From f62b4cac03d0084700450ab9d11115d709777fca Mon Sep 17 00:00:00 2001 From: Hussain Sultan Date: Sun, 3 May 2026 08:45:40 -0400 Subject: [PATCH 1/4] =?UTF-8?q?refactor:=20convert=20ibis=E2=86=92xorq=20o?= =?UTF-8?q?nce=20at=20SemanticModel=20construction?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The boundary between user-supplied ibis tables and BSL's internal xorq representation was scattered. ``_ensure_xorq_table`` was called lazily inside ``SemanticTableOp.to_untagged()`` on every traversal, and ops.py wrapped the xorq imports in a ``try/except ImportError`` that could never trigger because ``xorq.api`` is already imported unconditionally above it. Move the conversion to ``SemanticModel.__init__`` so it runs once at construction. Internal code paths can then assume a xorq table when the backend is supported, and a plain ibis fallback otherwise (e.g. Databricks). The conversion is idempotent, so re-construction via ``with_dimensions`` / ``with_measures`` is cheap. Surfaces two latent mixing bugs that the lazy conversion was hiding: - ``SemanticTableOp.values`` built a dummy table for calc-measure type inference using plain ``ibis.table()`` even when the base table was xorq, then silently swallowed the resulting compile error. Switch to the matching ibis module via ``_get_ibis_module``. - The HTTP search-dimension-values endpoint called ``ibis.desc(...)`` on what is now a xorq aggregate, triggering infinite recursion in xorq's ``bind``. Same fix. Rename ``_unify_backends`` → ``_rebind_to_canonical_backend`` and split out the rebind primitive as ``_rebind_to_backend``, deduplicating with the copy in ``serialization/reconstruct.py``. The dead ``except ImportError`` branch in ops.py is removed; xorq is required. A follow-up PR can remove the runtime canonical-rebind once ``from_ibis()`` results are cached per source backend (currently each call returns a distinct Backend, so joins between separately-converted models still need the rebind pass). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/boring_semantic_layer/expr.py | 19 ++- src/boring_semantic_layer/ops.py | 125 ++++++++++-------- .../serialization/reconstruct.py | 16 +-- src/boring_semantic_layer/server/api.py | 7 +- .../tests/test_plain_ibis.py | 26 +++- 5 files changed, 118 insertions(+), 75 deletions(-) diff --git a/src/boring_semantic_layer/expr.py b/src/boring_semantic_layer/expr.py index 5775e93..0a49a7b 100644 --- a/src/boring_semantic_layer/expr.py +++ b/src/boring_semantic_layer/expr.py @@ -256,19 +256,19 @@ def to_tagged(self, aggregate_cache_storage=None): def execute(self, **kwargs): # Accept kwargs for ibis compatibility (params, limit, etc) - from .ops import _unify_backends + from .ops import _rebind_to_canonical_backend - return _unify_backends(to_untagged(self)).execute(**kwargs) + return _rebind_to_canonical_backend(to_untagged(self)).execute(**kwargs) def compile(self, **kwargs): - from .ops import _unify_backends + from .ops import _rebind_to_canonical_backend - return _unify_backends(to_untagged(self)).compile(**kwargs) + return _rebind_to_canonical_backend(to_untagged(self)).compile(**kwargs) def sql(self, **kwargs): - from .ops import _unify_backends + from .ops import _rebind_to_canonical_backend - return ibis.to_sql(_unify_backends(to_untagged(self)), **kwargs) + return ibis.to_sql(_rebind_to_canonical_backend(to_untagged(self)), **kwargs) def to_pandas(self, **kwargs): return self.to_untagged().to_pandas(**kwargs) @@ -489,7 +489,12 @@ def __init__( description: str | None = None, _source_join: Any | None = None, ) -> None: - # Keep tables in regular ibis - only convert to xorq at execution time if needed + # Convert ibis → xorq once at the boundary; internal code paths can + # then assume xorq-vendored tables when the backend is supported. + # Falls back to the plain ibis table on backends xorq can't wrap. + from .ops import _ensure_xorq_table + + table = _ensure_xorq_table(table) dims = _expand_derived_dimensions(dimensions) diff --git a/src/boring_semantic_layer/ops.py b/src/boring_semantic_layer/ops.py index 3ac5542..d2a0dfe 100644 --- a/src/boring_semantic_layer/ops.py +++ b/src/boring_semantic_layer/ops.py @@ -17,29 +17,22 @@ from ibis.expr.operations.relations import Field, Relation from ibis.expr.schema import Schema -try: - from xorq.vendor.ibis.common.collections import FrozenDict, FrozenOrderedDict - from xorq.vendor.ibis.expr import operations as xorq_ops - from xorq.vendor.ibis.expr.schema import Schema as XorqSchema - - _SchemaClass = XorqSchema - _FrozenOrderedDict = FrozenOrderedDict - _MeanTypes = (ibis_ops.reductions.Mean, xorq_ops.reductions.Mean) - _MinTypes = (ibis_ops.reductions.Min, xorq_ops.reductions.Min) - _MaxTypes = (ibis_ops.reductions.Max, xorq_ops.reductions.Max) - _CountDistinctTypes = ( - ibis_ops.reductions.CountDistinct, - xorq_ops.reductions.CountDistinct, - ) -except ImportError: - from ibis.common.collections import FrozenDict, FrozenOrderedDict - - _SchemaClass = Schema - _FrozenOrderedDict = FrozenOrderedDict - _MeanTypes = (ibis_ops.reductions.Mean,) - _MinTypes = (ibis_ops.reductions.Min,) - _MaxTypes = (ibis_ops.reductions.Max,) - _CountDistinctTypes = (ibis_ops.reductions.CountDistinct,) +from xorq.vendor.ibis.common.collections import FrozenDict, FrozenOrderedDict +from xorq.vendor.ibis.expr import operations as xorq_ops +from xorq.vendor.ibis.expr.schema import Schema as XorqSchema + +_SchemaClass = XorqSchema +_FrozenOrderedDict = FrozenOrderedDict +# User-supplied callables may produce expressions against either plain ibis +# tables or xorq-vendored tables, so isinstance checks against reduction ops +# accept both forms. +_MeanTypes = (ibis_ops.reductions.Mean, xorq_ops.reductions.Mean) +_MinTypes = (ibis_ops.reductions.Min, xorq_ops.reductions.Min) +_MaxTypes = (ibis_ops.reductions.Max, xorq_ops.reductions.Max) +_CountDistinctTypes = ( + ibis_ops.reductions.CountDistinct, + xorq_ops.reductions.CountDistinct, +) from returns.maybe import Maybe, Nothing, Some from returns.result import Success, safe @@ -228,8 +221,14 @@ def _map_sort_key(val, kwargs=None): def _ensure_xorq_table(table): """Convert plain ibis Table to xorq-vendored ibis if possible. + This is the single boundary between user-supplied ibis tables and + BSL's internal xorq representation. ``SemanticModel`` calls it once + at construction so internal code paths can assume xorq tables when + the backend is supported, and a plain ibis fallback otherwise. + Falls back to returning the plain ibis table when the backend is not - supported by xorq (e.g. Databricks). + supported by xorq (e.g. Databricks). Idempotent: calling it on a + xorq-vendored table is a cheap no-op. """ _patch_xorq_sortkey_compat() if "xorq.vendor.ibis" not in type(table).__module__: @@ -242,34 +241,18 @@ def _ensure_xorq_table(table): return table -def _unify_backends(expr): - """Ensure all DatabaseTable nodes in *expr* share a single xorq backend. - - ``from_ibis()`` creates a distinct Backend per call, so expressions - built by composing separately-converted tables contain multiple - backends. This function rewrites the tree so every DatabaseTable - points at the same canonical backend, eliminating "Multiple backends - found" errors at execution time. +def _rebind_to_backend(expr, target_backend): + """Rebind every ``DatabaseTable`` op in *expr* to *target_backend*. - For plain ibis expressions (not xorq-vendored), this is a no-op. + Low-level primitive shared with ``serialization.reconstruct``. + No-op on plain ibis expressions or when xorq is unavailable for any + reason; callers must pass a xorq-vendored ``target_backend``. """ try: - from xorq.common.utils.node_utils import walk_nodes from xorq.vendor.ibis.expr.operations import relations as xorq_rel except Exception: return expr - try: - db_tables = list(walk_nodes((xorq_rel.DatabaseTable,), expr)) - except Exception: - return expr - - canonical = db_tables[0].source if db_tables else None - - if canonical is None: - return expr - - # 2. Replace divergent backends. def _recreate(op, _kwargs, **overrides): kwargs = dict(zip(op.__argnames__, op.__args__, strict=False)) if _kwargs: @@ -278,8 +261,8 @@ def _recreate(op, _kwargs, **overrides): return op.__recreate__(kwargs) def replacer(op, _kwargs): - if isinstance(op, xorq_rel.DatabaseTable) and op.source is not canonical: - return _recreate(op, _kwargs, source=canonical) + if isinstance(op, xorq_rel.DatabaseTable) and op.source is not target_backend: + return _recreate(op, _kwargs, source=target_backend) if _kwargs: return _recreate(op, _kwargs) return op @@ -287,6 +270,35 @@ def replacer(op, _kwargs): return expr.op().replace(replacer).to_expr() +def _rebind_to_canonical_backend(expr): + """Rebind divergent ``DatabaseTable`` backends in *expr* to share one. + + ``from_ibis()`` creates a distinct ``Backend`` per call, so expressions + built by composing separately-converted tables contain multiple + backends. Picking the first ``DatabaseTable``'s source as canonical + and rebinding the rest eliminates "Multiple backends found" errors + at execution time. + + No-op on plain ibis expressions (not xorq-vendored). + """ + try: + from xorq.common.utils.node_utils import walk_nodes + from xorq.vendor.ibis.expr.operations import relations as xorq_rel + except Exception: + return expr + + try: + db_tables = list(walk_nodes((xorq_rel.DatabaseTable,), expr)) + except Exception: + return expr + + canonical = db_tables[0].source if db_tables else None + if canonical is None: + return expr + + return _rebind_to_backend(expr, canonical) + + def _to_untagged(source: Any) -> ir.Table: return source.to_untagged() if hasattr(source, "to_untagged") else source.to_expr() @@ -1108,13 +1120,16 @@ def values(self) -> FrozenOrderedDict[str, Any]: } # Resolve calculated measure types via a dummy table with base measure dtypes if calc_measures: - from .compile_all import _compile_formula + from .compile_all import _compile_formula, _get_ibis_module measure_schema = { name: base_values[name].dtype for name in measures if name in base_values } + # Match the ibis module of the enriched table so calc-measure + # compilation doesn't mix plain ibis and xorq-vendored types. + ibis_module = _get_ibis_module(enriched) try: - dummy = ibis.table(measure_schema, name="__type_inference__") + dummy = ibis_module.table(measure_schema, name="__type_inference__") except Exception: # ibis.table() rejects schemas with dotted names (joined models); # skip calc-measure type inference in that case. @@ -1200,7 +1215,9 @@ def __getattribute__(self, name: str): return object.__getattribute__(self, name) def to_untagged(self): - return _ensure_xorq_table(self.table) + # Conversion happens at SemanticModel construction; self.table is + # already xorq when supported, plain ibis when not. + return self.table class SemanticFilterOp(Relation): @@ -4068,13 +4085,13 @@ def replacer(op, _kwargs): return new_left, new_right def execute(self): - return _unify_backends(self.to_untagged()).execute() + return _rebind_to_canonical_backend(self.to_untagged()).execute() def compile(self, **kwargs): - return _unify_backends(self.to_untagged()).compile(**kwargs) + return _rebind_to_canonical_backend(self.to_untagged()).compile(**kwargs) def sql(self, **kwargs): - return ibis.to_sql(_unify_backends(self.to_untagged()), **kwargs) + return ibis.to_sql(_rebind_to_canonical_backend(self.to_untagged()), **kwargs) def __getitem__(self, key): dims_dict = self.get_dimensions() @@ -4471,7 +4488,7 @@ def limit(self, n: int, offset: int = 0) -> SemanticLimit: return SemanticLimit(source=self, n=n, offset=offset) def execute(self): - return _unify_backends(self.to_untagged()).execute() + return _rebind_to_canonical_backend(self.to_untagged()).execute() def as_expr(self): """Return self as expression.""" diff --git a/src/boring_semantic_layer/serialization/reconstruct.py b/src/boring_semantic_layer/serialization/reconstruct.py index 782e500..61cd846 100644 --- a/src/boring_semantic_layer/serialization/reconstruct.py +++ b/src/boring_semantic_layer/serialization/reconstruct.py @@ -326,18 +326,14 @@ def _unwrap_join_ref(expr): def _rebind_to_backend(expr, target_backend): - """Rebind all DatabaseTable ops in *expr* to use *target_backend*.""" - from xorq.common.utils.graph_utils import replace_nodes - from xorq.vendor.ibis.expr.operations import relations as xorq_rel + """Rebind every ``DatabaseTable`` op in *expr* to *target_backend*. - def replacer(op, _kwargs): - if isinstance(op, xorq_rel.DatabaseTable) and op.source is not target_backend: - kwargs = dict(zip(op.__argnames__, op.__args__, strict=False)) - kwargs["source"] = target_backend - return op.__recreate__(kwargs) - return op + Thin re-export of the primitive defined in ``ops`` so callers in this + module don't have to reach across the package layer. + """ + from ..ops import _rebind_to_backend as _impl - return replace_nodes(replacer, expr).to_expr() + return _impl(expr, target_backend) def _split_join_expr(xorq_expr): diff --git a/src/boring_semantic_layer/server/api.py b/src/boring_semantic_layer/server/api.py index 86dca39..820f623 100644 --- a/src/boring_semantic_layer/server/api.py +++ b/src/boring_semantic_layer/server/api.py @@ -162,8 +162,13 @@ def _search_dimension_values_response( ), ) + from boring_semantic_layer.compile_all import _get_ibis_module + dim = dims[dimension_name] tbl = model.table + # Match the ibis module of the underlying table so sort keys etc. don't + # mix plain ibis with xorq-vendored types. + ibis_module = _get_ibis_module(tbl) col_expr = dim(tbl) agg = ( tbl.select(col_expr.name("_value")) @@ -180,7 +185,7 @@ def to_value_list(df) -> list[dict[str, Any]]: ] def fetch(base_agg, n: int) -> tuple[list[dict[str, Any]], bool]: - df = base_agg.order_by(ibis.desc("frequency")).limit(n + 1).execute() + df = base_agg.order_by(ibis_module.desc("frequency")).limit(n + 1).execute() return to_value_list(df.head(n)), len(df) <= n separator_pattern = r"[\s\-_.,]+" diff --git a/src/boring_semantic_layer/tests/test_plain_ibis.py b/src/boring_semantic_layer/tests/test_plain_ibis.py index 243d140..898aab8 100644 --- a/src/boring_semantic_layer/tests/test_plain_ibis.py +++ b/src/boring_semantic_layer/tests/test_plain_ibis.py @@ -172,19 +172,39 @@ def test_join_one(self, flights_model, carriers_model): assert "name" in result.columns def test_join_one_unsupported_backend( - self, flights_model, carriers_model, monkeypatch + self, flights_table, carriers_table, monkeypatch ): """Simulate a backend xorq cannot wrap (e.g. BigQuery) by forcing ``_ensure_xorq_table`` to return the plain ibis table. The join must still execute natively rather than crashing inside ``_rebind_join_backends``. See issue #242. + + Conversion happens at ``SemanticModel`` construction, so the + monkeypatch must be active before models are built. """ from boring_semantic_layer import ops monkeypatch.setattr(ops, "_ensure_xorq_table", lambda table: table) - joined = flights_model.join_one( - carriers_model, + flights = ( + to_semantic_table(flights_table, name="flights") + .with_dimensions( + carrier=lambda t: t.carrier, + origin=lambda t: t.origin, + dest=lambda t: t.dest, + ) + .with_measures(flight_count=lambda t: t.count()) + ) + carriers = ( + to_semantic_table(carriers_table, name="carriers") + .with_dimensions( + code=lambda t: t.code, + name=lambda t: t.name, + ) + ) + + joined = flights.join_one( + carriers, on=lambda f, c: f.carrier == c.code, ) result = joined.group_by("name").aggregate("flight_count").execute() From 3eed45441ab6b64a3cb8770da86abe4fd3a5794c Mon Sep 17 00:00:00 2001 From: Hussain Sultan Date: Sun, 3 May 2026 08:58:47 -0400 Subject: [PATCH 2/4] refactor: dispatch reduction-type check on expr's ibis flavor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-supplied callables produce expressions against exactly one of ``ibis`` or ``xorq.vendor.ibis`` — never a mix — so isinstance checks against ``(ibis_ops.X, xorq_ops.X)`` unions were always going to match exactly one tuple element. Replace the unions with a small ``_reductions_for_expr`` helper that picks the right ``reductions`` module by inspecting the expression's class module, and rewrite the three reduction-shape predicates (``_is_mean_expr``, ``_is_count_distinct_expr``, ``_reagg_op_for_expr``) to use it. No behavior change; just narrows each isinstance to a single concrete type and removes the four module-level union tuples. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/boring_semantic_layer/ops.py | 38 ++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/boring_semantic_layer/ops.py b/src/boring_semantic_layer/ops.py index d2a0dfe..481bd91 100644 --- a/src/boring_semantic_layer/ops.py +++ b/src/boring_semantic_layer/ops.py @@ -23,16 +23,19 @@ _SchemaClass = XorqSchema _FrozenOrderedDict = FrozenOrderedDict -# User-supplied callables may produce expressions against either plain ibis -# tables or xorq-vendored tables, so isinstance checks against reduction ops -# accept both forms. -_MeanTypes = (ibis_ops.reductions.Mean, xorq_ops.reductions.Mean) -_MinTypes = (ibis_ops.reductions.Min, xorq_ops.reductions.Min) -_MaxTypes = (ibis_ops.reductions.Max, xorq_ops.reductions.Max) -_CountDistinctTypes = ( - ibis_ops.reductions.CountDistinct, - xorq_ops.reductions.CountDistinct, -) + + +def _reductions_for_expr(expr): + """Return the ``reductions`` ops module matching *expr*'s ibis flavor. + + A user-supplied callable produces expressions against exactly one of + ``ibis`` or ``xorq.vendor.ibis`` — whichever the underlying table came + from. Pick that module so isinstance checks compare against a single + concrete type rather than a cross-module union. + """ + if type(expr.op()).__module__.startswith("xorq.vendor.ibis"): + return xorq_ops.reductions + return ibis_ops.reductions from returns.maybe import Maybe, Nothing, Some from returns.result import Success, safe @@ -1973,14 +1976,16 @@ def _attach_dim_column(pt, gb_col, measure_names, join_tree_info, merged_dimensi def _is_mean_expr(expr): """Check if an ibis expression is a Mean/Average reduction.""" try: - return isinstance(expr.op(), _MeanTypes) + return isinstance(expr.op(), _reductions_for_expr(expr).Mean) except Exception: return False def _is_count_distinct_expr(expr): """Check if an ibis expression is a CountDistinct (nunique) reduction.""" - return safe(lambda: isinstance(expr.op(), _CountDistinctTypes))().value_or(False) + return safe( + lambda: isinstance(expr.op(), _reductions_for_expr(expr).CountDistinct) + )().value_or(False) def _reagg_op_for_expr(expr): @@ -1991,16 +1996,17 @@ def _reagg_op_for_expr(expr): MEAN should never reach here — it is decomposed by ``_is_mean_expr``. """ op = expr.op() - if isinstance(op, _MinTypes): + reductions = _reductions_for_expr(expr) + if isinstance(op, reductions.Min): return "min" - if isinstance(op, _MaxTypes): + if isinstance(op, reductions.Max): return "max" - if isinstance(op, _MeanTypes): + if isinstance(op, reductions.Mean): raise ValueError( f"Mean expression {expr.get_name()!r} was not decomposed — " "this is a bug in the pre-aggregation logic" ) - if isinstance(op, _CountDistinctTypes): + if isinstance(op, reductions.CountDistinct): raise ValueError( f"CountDistinct expression {expr.get_name()!r} was not deferred — " "this is a bug in the pre-aggregation logic" From bf7870592ba1d0497282d2a9148509e897e8ace6 Mon Sep 17 00:00:00 2001 From: Hussain Sultan Date: Sun, 3 May 2026 09:06:12 -0400 Subject: [PATCH 3/4] refactor: tighten calc-measure AST and surface type-inference failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three small fixes around calc-measure construction: - Add ``validate_calc_ast`` in ``measure_scope`` and call it from ``_classify_measure``. Rejects illegal AllOf shapes (e.g. ``AllOf(BinOp(...))``) at construction with a message that names the offending calc measure, instead of letting them parse and fail deep inside the compiler. The existing valid shapes — ``AllOf(MeasureRef)`` and ``AllOf(AggregationExpr)`` (handled by the rewrite pipeline) — pass through untouched. - Add ``infer_calc_dtype`` in ``compile_all``. The type-inference path in ``SemanticTableOp.values`` now mirrors the ``AggregationExpr`` rewrite step from ``compile_grouped_with_all`` so calc measures containing inline aggregations (``t.value.sum() / t.all(t.value.sum())``) get a known dtype on ``schema`` instead of being silently dropped. - Replace the bare ``except Exception: pass`` in ``SemanticTableOp.values`` with a debug-level log naming the calc measure and the exception, so legitimate type-inference failures are at least discoverable. Three tests added covering the validator (rejects, accepts) and the dtype-inference round-trip. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/boring_semantic_layer/compile_all.py | 31 ++++++++++++ src/boring_semantic_layer/expr.py | 10 ++-- src/boring_semantic_layer/measure_scope.py | 37 +++++++++++++++ src/boring_semantic_layer/ops.py | 47 +++++++++++-------- .../tests/test_measure_reference_styles.py | 40 ++++++++++++++++ 5 files changed, 140 insertions(+), 25 deletions(-) diff --git a/src/boring_semantic_layer/compile_all.py b/src/boring_semantic_layer/compile_all.py index 716001c..06998dd 100644 --- a/src/boring_semantic_layer/compile_all.py +++ b/src/boring_semantic_layer/compile_all.py @@ -217,6 +217,37 @@ def _compile_formula(expr: MeasureExpr, by_tbl, all_tbl, base_tbl): return expr +def infer_calc_dtype(calc_expr, base_measure_schema, base_tbl, ibis_module): + """Compile *calc_expr* against a synthetic dummy table to infer its dtype. + + Mirrors the ``AggregationExpr`` rewrite step in + ``compile_grouped_with_all`` so that calc measures containing inline + aggregations (e.g. ``t.value.sum() / t.all(t.value.sum())``) can have + their type resolved. Each inline ``AggregationExpr`` is materialized + against ``base_tbl`` to learn its dtype, added as a synthetic column + on a dummy table, and replaced with a ``MeasureRef`` in the rewritten + expression before compilation. + + Returns the compiled ibis expression. Caller handles failure. + """ + inline_aggs = set() + _collect_aggregation_exprs(calc_expr, inline_aggs) + + extended_schema = dict(base_measure_schema) + agg_name_map = {} + for agg_expr in sorted(inline_aggs, key=repr): + name = _make_agg_name(agg_expr) + while name in extended_schema: + name = name + "_" + agg_name_map[agg_expr] = name + agg_fn = _make_agg_fn_from_expr(agg_expr) + extended_schema[name] = agg_fn(base_tbl).type() + + dummy = ibis_module.table(extended_schema, name="__type_inference__") + rewritten = _replace_aggregation_exprs(calc_expr, agg_name_map) + return _compile_formula(rewritten, dummy, dummy, base_tbl) + + @frozen class MeasureClassification: regular_measures: dict[str, tuple[callable, Any]] diff --git a/src/boring_semantic_layer/expr.py b/src/boring_semantic_layer/expr.py index 0a49a7b..91f3d9d 100644 --- a/src/boring_semantic_layer/expr.py +++ b/src/boring_semantic_layer/expr.py @@ -597,7 +597,7 @@ def with_measures(self, **meas) -> SemanticModel: scope = MeasureScope(_tbl=base_tbl, _known=all_measure_names) for name, fn_or_expr in meas.items(): - kind, value = _classify_measure(fn_or_expr, scope) + kind, value = _classify_measure(fn_or_expr, scope, name) (new_calc_meas if kind == "calc" else new_base_meas)[name] = value return SemanticModel( @@ -990,7 +990,7 @@ def with_measures(self, **meas) -> SemanticModel: dict(self.get_calculated_measures()), ) for name, fn_or_expr in meas.items(): - kind, value = _classify_measure(fn_or_expr, scope) + kind, value = _classify_measure(fn_or_expr, scope, name) (new_calc if kind == "calc" else new_base)[name] = value return SemanticModel( @@ -1127,7 +1127,7 @@ def with_measures(self, **meas) -> SemanticModel: scope = MeasureScope(_tbl=self.op().to_untagged(), _known=all_measure_names) for name, fn_or_expr in meas.items(): - kind, value = _classify_measure(fn_or_expr, scope) + kind, value = _classify_measure(fn_or_expr, scope, name) (new_calc_meas if kind == "calc" else new_base_meas)[name] = value return SemanticModel( @@ -1610,7 +1610,7 @@ def with_measures(self, **meas) -> SemanticModel: scope = MeasureScope(_tbl=self, _known=all_measure_names) for name, fn_or_expr in meas.items(): - kind, value = _classify_measure(fn_or_expr, scope) + kind, value = _classify_measure(fn_or_expr, scope, name) (new_calc_meas if kind == "calc" else new_base_meas)[name] = value return SemanticModel( @@ -1696,7 +1696,7 @@ def with_measures(self, **meas) -> SemanticModel: scope = MeasureScope(_tbl=self, _known=all_measure_names) for name, fn_or_expr in meas.items(): - kind, value = _classify_measure(fn_or_expr, scope) + kind, value = _classify_measure(fn_or_expr, scope, name) (new_calc_meas if kind == "calc" else new_base_meas)[name] = value return SemanticModel( diff --git a/src/boring_semantic_layer/measure_scope.py b/src/boring_semantic_layer/measure_scope.py index 5c53f26..38ad485 100644 --- a/src/boring_semantic_layer/measure_scope.py +++ b/src/boring_semantic_layer/measure_scope.py @@ -174,6 +174,43 @@ def __call__(self, *args, **kwargs): MeasureExpr = MeasureRef | AllOf | BinOp | MethodCall | AggregationExpr | float | int +def validate_calc_ast(expr: Any, measure_name: str | None = None) -> None: + """Walk a calc-measure AST and raise ``ValueError`` on illegal shapes. + + The AST nodes are unconstrained at construction (Any-typed fields), so + invalid compositions like ``AllOf(BinOp(...))`` parse but later fail + deep inside the compiler with confusing messages. Run this after + classification to surface the structural problem early, naming the + offending calc measure when known. + + ``AllOf.ref`` must be a ``MeasureRef`` or ``AggregationExpr``. Other + refs (BinOp, MethodCall, nested AllOf) are not supported by either + the direct compile path or the rewrite-then-compile pipeline in + ``compile_grouped_with_all``. + """ + where = f" in calc measure {measure_name!r}" if measure_name else "" + + def walk(node): + if isinstance(node, AllOf): + if not isinstance(node.ref, (MeasureRef, AggregationExpr)): + raise ValueError( + f"Invalid AllOf{where}: ref must be a measure reference or " + f"inline aggregation, got {type(node.ref).__name__}. " + f"Wrap it in a named measure first, e.g. " + f".with_measures(my_measure=...) then use t.all(t.my_measure)." + ) + walk(node.ref) + elif isinstance(node, BinOp): + walk(node.left) + walk(node.right) + elif isinstance(node, MethodCall): + walk(node.receiver) + for arg in node.args: + walk(arg) + + walk(expr) + + class DeferredColumn: _AGGREGATIONS = { "sum": "sum", diff --git a/src/boring_semantic_layer/ops.py b/src/boring_semantic_layer/ops.py index 481bd91..ff2f534 100644 --- a/src/boring_semantic_layer/ops.py +++ b/src/boring_semantic_layer/ops.py @@ -839,8 +839,12 @@ def wrapped_expr(t): ) -def _classify_measure(fn_or_expr: Any, scope: Any) -> tuple[str, Any]: +def _classify_measure( + fn_or_expr: Any, scope: Any, measure_name: str | None = None +) -> tuple[str, Any]: """Classify measure as 'calc' or 'base' with appropriate handling.""" + from .measure_scope import validate_calc_ast + expr, description, requires_unnest, metadata = _extract_measure_metadata(fn_or_expr) resolved = safe(lambda: _resolve_expr(expr, scope))().map( @@ -848,7 +852,9 @@ def _classify_measure(fn_or_expr: Any, scope: Any) -> tuple[str, Any]: ) if isinstance(resolved, Success) and resolved.unwrap() is not None: - return resolved.unwrap() + kind, value = resolved.unwrap() + validate_calc_ast(value, measure_name) + return (kind, value) if not requires_unnest and callable(expr): # All scopes (MeasureScope, ColumnScope) have tbl attribute @@ -1121,29 +1127,30 @@ def values(self) -> FrozenOrderedDict[str, Any]: **{name: enriched[name].op() for name in dims}, **{name: fn(enriched).op() for name, fn in measures.items()}, } - # Resolve calculated measure types via a dummy table with base measure dtypes + # Resolve calculated measure types via a dummy table with base measure dtypes. + # ``infer_calc_dtype`` mirrors the AggregationExpr rewrite from + # ``compile_grouped_with_all`` so calc measures with inline aggregations + # (e.g. ``AllOf(AggregationExpr)``) round-trip through type inference. if calc_measures: - from .compile_all import _compile_formula, _get_ibis_module + from .compile_all import _get_ibis_module, infer_calc_dtype measure_schema = { name: base_values[name].dtype for name in measures if name in base_values } - # Match the ibis module of the enriched table so calc-measure - # compilation doesn't mix plain ibis and xorq-vendored types. ibis_module = _get_ibis_module(enriched) - try: - dummy = ibis_module.table(measure_schema, name="__type_inference__") - except Exception: - # ibis.table() rejects schemas with dotted names (joined models); - # skip calc-measure type inference in that case. - dummy = None - if dummy is not None: - for name, expr in calc_measures.items(): - try: - compiled = _compile_formula(expr, dummy, dummy, enriched) - base_values[name] = compiled.op() - except Exception: - pass + for name, expr in calc_measures.items(): + try: + compiled = infer_calc_dtype( + expr, measure_schema, enriched, ibis_module + ) + base_values[name] = compiled.op() + except Exception as e: + # Joined models with dotted column names, calc measures + # whose inline aggregations don't apply to the dummy schema, + # etc. Type info is best-effort; surface for debugging. + logger.debug( + "calc-measure type inference failed for %r: %s", name, e + ) return FrozenOrderedDict(base_values) @property @@ -3445,7 +3452,7 @@ def with_measures(self, **meas) -> SemanticTable: dict(self.get_calculated_measures()), ) for name, fn_or_expr in meas.items(): - kind, value = _classify_measure(fn_or_expr, scope) + kind, value = _classify_measure(fn_or_expr, scope, name) (new_calc if kind == "calc" else new_base)[name] = value return _semantic_table( diff --git a/src/boring_semantic_layer/tests/test_measure_reference_styles.py b/src/boring_semantic_layer/tests/test_measure_reference_styles.py index 4a52556..d741af9 100644 --- a/src/boring_semantic_layer/tests/test_measure_reference_styles.py +++ b/src/boring_semantic_layer/tests/test_measure_reference_styles.py @@ -500,3 +500,43 @@ def test_method_call_serialization_roundtrip(): assert result.args == (2,) assert isinstance(result.receiver, BinOp) assert result.receiver.op == "div" + + +def test_validate_calc_ast_rejects_allof_binop(): + """AllOf wrapping a BinOp should fail at construction with a clear error.""" + from boring_semantic_layer.measure_scope import ( + AllOf, + BinOp, + MeasureRef, + validate_calc_ast, + ) + + bad = AllOf(ref=BinOp("add", MeasureRef("a"), MeasureRef("b"))) + with pytest.raises(ValueError, match="Invalid AllOf.*BinOp"): + validate_calc_ast(bad, measure_name="ratio") + + +def test_validate_calc_ast_accepts_allof_aggregation_expr(): + """AllOf(AggregationExpr) is valid — handled by the rewrite pipeline.""" + from boring_semantic_layer.measure_scope import ( + AggregationExpr, + AllOf, + validate_calc_ast, + ) + + ok = AllOf(ref=AggregationExpr(column="value", operation="sum")) + validate_calc_ast(ok, measure_name="pct") # no raise + + +def test_calc_dtype_inference_with_inline_aggregation(): + """``schema`` should know the dtype of a calc measure built with t.all(t.col.sum()).""" + con = ibis.duckdb.connect(":memory:") + events = pd.DataFrame({"grp": ["a", "b"], "value": [1.0, 2.0]}) + tbl = con.create_table("events", events) + + st = to_semantic_table(tbl, "events").with_measures( + pct_of_total=lambda t: t.value.sum() / t.all(t.value.sum()), + ) + # Before infer_calc_dtype handled the rewrite path, this was silently + # dropped from values and the dtype was unknown. + assert "pct_of_total" in st.schema.names From ef646cb75146e7c9f4d79f481ff21060bcd3e341 Mon Sep 17 00:00:00 2001 From: Hussain Sultan Date: Sun, 3 May 2026 13:28:45 -0400 Subject: [PATCH 4/4] fix: use matching ibis module for desc() in MCP search_dimension_values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same recursion bug the construction-time conversion surfaced in ``server/api.py`` — ``ibis.desc(...)`` from plain ibis on a xorq-vendored aggregate sends xorq's ``bind`` into infinite recursion. The MCP ``search_dimension_values`` tool had its own copy of the pattern that CI caught. Pick the matching module via ``_get_ibis_module(tbl)`` like the FastAPI handler now does. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/boring_semantic_layer/agents/backends/mcp.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/boring_semantic_layer/agents/backends/mcp.py b/src/boring_semantic_layer/agents/backends/mcp.py index e54f966..e30e0f2 100644 --- a/src/boring_semantic_layer/agents/backends/mcp.py +++ b/src/boring_semantic_layer/agents/backends/mcp.py @@ -427,8 +427,14 @@ def search_dimension_values( f"Available dimensions: {list(dims.keys())}" ) + from boring_semantic_layer.compile_all import _get_ibis_module + dim = dims[dimension_name] tbl = model.table + # Match the ibis module of the underlying table so sort keys + # don't mix plain ibis with xorq-vendored types (which causes + # infinite recursion in xorq's ``bind``). + ibis_module = _get_ibis_module(tbl) col_expr = dim(tbl) # Aggregate by value to get frequency counts @@ -453,7 +459,7 @@ def _fetch(base_agg, n): """Fetch top n+1 rows and return (values_list, is_complete).""" df = ( base_agg - .order_by(ibis.desc("frequency")) + .order_by(ibis_module.desc("frequency")) .limit(n + 1) .execute() )