Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/boring_semantic_layer/agents/backends/mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,8 +427,14 @@ def search_dimension_values(
f"Available dimensions: {list(dims.keys())}"
)

from boring_semantic_layer.compile_all import _get_ibis_module

dim = dims[dimension_name]
tbl = model.table
# Match the ibis module of the underlying table so sort keys
# don't mix plain ibis with xorq-vendored types (which causes
# infinite recursion in xorq's ``bind``).
ibis_module = _get_ibis_module(tbl)
col_expr = dim(tbl)

# Aggregate by value to get frequency counts
Expand All @@ -453,7 +459,7 @@ def _fetch(base_agg, n):
"""Fetch top n+1 rows and return (values_list, is_complete)."""
df = (
base_agg
.order_by(ibis.desc("frequency"))
.order_by(ibis_module.desc("frequency"))
.limit(n + 1)
.execute()
)
Expand Down
31 changes: 31 additions & 0 deletions src/boring_semantic_layer/compile_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,37 @@ def _compile_formula(expr: MeasureExpr, by_tbl, all_tbl, base_tbl):
return expr


def infer_calc_dtype(calc_expr, base_measure_schema, base_tbl, ibis_module):
"""Compile *calc_expr* against a synthetic dummy table to infer its dtype.

Mirrors the ``AggregationExpr`` rewrite step in
``compile_grouped_with_all`` so that calc measures containing inline
aggregations (e.g. ``t.value.sum() / t.all(t.value.sum())``) can have
their type resolved. Each inline ``AggregationExpr`` is materialized
against ``base_tbl`` to learn its dtype, added as a synthetic column
on a dummy table, and replaced with a ``MeasureRef`` in the rewritten
expression before compilation.

Returns the compiled ibis expression. Caller handles failure.
"""
inline_aggs = set()
_collect_aggregation_exprs(calc_expr, inline_aggs)

extended_schema = dict(base_measure_schema)
agg_name_map = {}
for agg_expr in sorted(inline_aggs, key=repr):
name = _make_agg_name(agg_expr)
while name in extended_schema:
name = name + "_"
agg_name_map[agg_expr] = name
agg_fn = _make_agg_fn_from_expr(agg_expr)
extended_schema[name] = agg_fn(base_tbl).type()

dummy = ibis_module.table(extended_schema, name="__type_inference__")
rewritten = _replace_aggregation_exprs(calc_expr, agg_name_map)
return _compile_formula(rewritten, dummy, dummy, base_tbl)


@frozen
class MeasureClassification:
regular_measures: dict[str, tuple[callable, Any]]
Expand Down
29 changes: 17 additions & 12 deletions src/boring_semantic_layer/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,19 +256,19 @@ def to_tagged(self, aggregate_cache_storage=None):

def execute(self, **kwargs):
# Accept kwargs for ibis compatibility (params, limit, etc)
from .ops import _unify_backends
from .ops import _rebind_to_canonical_backend

return _unify_backends(to_untagged(self)).execute(**kwargs)
return _rebind_to_canonical_backend(to_untagged(self)).execute(**kwargs)

def compile(self, **kwargs):
from .ops import _unify_backends
from .ops import _rebind_to_canonical_backend

return _unify_backends(to_untagged(self)).compile(**kwargs)
return _rebind_to_canonical_backend(to_untagged(self)).compile(**kwargs)

def sql(self, **kwargs):
from .ops import _unify_backends
from .ops import _rebind_to_canonical_backend

return ibis.to_sql(_unify_backends(to_untagged(self)), **kwargs)
return ibis.to_sql(_rebind_to_canonical_backend(to_untagged(self)), **kwargs)

def to_pandas(self, **kwargs):
return self.to_untagged().to_pandas(**kwargs)
Expand Down Expand Up @@ -489,7 +489,12 @@ def __init__(
description: str | None = None,
_source_join: Any | None = None,
) -> None:
# Keep tables in regular ibis - only convert to xorq at execution time if needed
# Convert ibis → xorq once at the boundary; internal code paths can
# then assume xorq-vendored tables when the backend is supported.
# Falls back to the plain ibis table on backends xorq can't wrap.
from .ops import _ensure_xorq_table

table = _ensure_xorq_table(table)

dims = _expand_derived_dimensions(dimensions)

Expand Down Expand Up @@ -592,7 +597,7 @@ def with_measures(self, **meas) -> SemanticModel:
scope = MeasureScope(_tbl=base_tbl, _known=all_measure_names)

for name, fn_or_expr in meas.items():
kind, value = _classify_measure(fn_or_expr, scope)
kind, value = _classify_measure(fn_or_expr, scope, name)
(new_calc_meas if kind == "calc" else new_base_meas)[name] = value

return SemanticModel(
Expand Down Expand Up @@ -985,7 +990,7 @@ def with_measures(self, **meas) -> SemanticModel:
dict(self.get_calculated_measures()),
)
for name, fn_or_expr in meas.items():
kind, value = _classify_measure(fn_or_expr, scope)
kind, value = _classify_measure(fn_or_expr, scope, name)
(new_calc if kind == "calc" else new_base)[name] = value

return SemanticModel(
Expand Down Expand Up @@ -1122,7 +1127,7 @@ def with_measures(self, **meas) -> SemanticModel:
scope = MeasureScope(_tbl=self.op().to_untagged(), _known=all_measure_names)

for name, fn_or_expr in meas.items():
kind, value = _classify_measure(fn_or_expr, scope)
kind, value = _classify_measure(fn_or_expr, scope, name)
(new_calc_meas if kind == "calc" else new_base_meas)[name] = value

return SemanticModel(
Expand Down Expand Up @@ -1605,7 +1610,7 @@ def with_measures(self, **meas) -> SemanticModel:
scope = MeasureScope(_tbl=self, _known=all_measure_names)

for name, fn_or_expr in meas.items():
kind, value = _classify_measure(fn_or_expr, scope)
kind, value = _classify_measure(fn_or_expr, scope, name)
(new_calc_meas if kind == "calc" else new_base_meas)[name] = value

return SemanticModel(
Expand Down Expand Up @@ -1691,7 +1696,7 @@ def with_measures(self, **meas) -> SemanticModel:
scope = MeasureScope(_tbl=self, _known=all_measure_names)

for name, fn_or_expr in meas.items():
kind, value = _classify_measure(fn_or_expr, scope)
kind, value = _classify_measure(fn_or_expr, scope, name)
(new_calc_meas if kind == "calc" else new_base_meas)[name] = value

return SemanticModel(
Expand Down
37 changes: 37 additions & 0 deletions src/boring_semantic_layer/measure_scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,43 @@ def __call__(self, *args, **kwargs):
MeasureExpr = MeasureRef | AllOf | BinOp | MethodCall | AggregationExpr | float | int


def validate_calc_ast(expr: Any, measure_name: str | None = None) -> None:
"""Walk a calc-measure AST and raise ``ValueError`` on illegal shapes.

The AST nodes are unconstrained at construction (Any-typed fields), so
invalid compositions like ``AllOf(BinOp(...))`` parse but later fail
deep inside the compiler with confusing messages. Run this after
classification to surface the structural problem early, naming the
offending calc measure when known.

``AllOf.ref`` must be a ``MeasureRef`` or ``AggregationExpr``. Other
refs (BinOp, MethodCall, nested AllOf) are not supported by either
the direct compile path or the rewrite-then-compile pipeline in
``compile_grouped_with_all``.
"""
where = f" in calc measure {measure_name!r}" if measure_name else ""

def walk(node):
if isinstance(node, AllOf):
if not isinstance(node.ref, (MeasureRef, AggregationExpr)):
raise ValueError(
f"Invalid AllOf{where}: ref must be a measure reference or "
f"inline aggregation, got {type(node.ref).__name__}. "
f"Wrap it in a named measure first, e.g. "
f".with_measures(my_measure=...) then use t.all(t.my_measure)."
)
walk(node.ref)
elif isinstance(node, BinOp):
walk(node.left)
walk(node.right)
elif isinstance(node, MethodCall):
walk(node.receiver)
for arg in node.args:
walk(arg)

walk(expr)


class DeferredColumn:
_AGGREGATIONS = {
"sum": "sum",
Expand Down
Loading
Loading