BSL grain mismatch + with_measures interaction.
When two semantic tables have a grain mismatch (one table declares more entity
dimensions than the other), BSL correctly detects this on join_one and issues
a warning: "Upgrading join_one to join_many for automatic pre-aggregation."
However, if .with_measures() is chained onto the joined result to define a
composed cross-table measure (e.g. amount / total_hours), the SQL generation
is corrupted: the pre-aggregation subquery is not emitted even though join_many
was invoked. The finer-grained table's rows fan out into the spine, causing
the coarser-grained measures to be summed multiple times.
Setup:
financials : 1 row/month, entity dims {year, month}
hours : 3 rows/month, entity dims {year, month, project}
Expected behaviour (join_many without with_measures):
BSL pre-aggregates hours to {year, month} before joining → correct sums.
Broken behaviour (join_many + with_measures):
BSL warns but SQL is flat join → amount summed 3× (once per hours row).
"""
import ibis
import xorq.vendor.ibis as xibis
from boring_semantic_layer import Measure, entity_dimension, to_semantic_table, to_untagged
con = ibis.duckdb.connect()
def execute(bsl_expr):
sql = xibis.to_sql(to_untagged(bsl_expr), dialect="duckdb")
return con.sql(sql).execute()
con.raw_sql("""
CREATE OR REPLACE TABLE financials AS
SELECT * FROM (VALUES
(2025, 1, 42.0),
(2025, 2, 31.0)
) AS t(year, month, amount)
""")
con.raw_sql("""
CREATE OR REPLACE TABLE hours AS
SELECT * FROM (VALUES
(2025, 1, 'P1', 7.0),
(2025, 1, 'P2', 4.0),
(2025, 1, 'P3', 2.0),
(2025, 2, 'P1', 8.0),
(2025, 2, 'P2', 3.0),
(2025, 2, 'P3', 1.0)
) AS t(year, month, project, hours)
""")
financials_raw = con.table("financials")
hours_raw = con.table("hours")
financials_st = (
to_semantic_table(financials_raw, name="financials")
.with_dimensions(
year=entity_dimension(lambda t: t.year),
month=entity_dimension(lambda t: t.month),
)
.with_measures(
amount=Measure(expr=lambda t: t.amount.sum()),
)
)
hours_st = (
to_semantic_table(hours_raw, name="hours")
.with_dimensions(
year=entity_dimension(lambda t: t.year),
month=entity_dimension(lambda t: t.month),
project=entity_dimension(lambda t: t.project),
)
.with_measures(
total_hours=Measure(expr=lambda t: t.hours.sum()),
)
)
print("=" * 60)
print("CORRECT: join_one without with_measures")
print("Expected: amount=42/31, total_hours=13/12")
print("=" * 60)
joined_correct = financials_st.join_one(
hours_st, on=lambda f, h: (f.year == h.year) & (f.month == h.month)
)
result_correct = (
joined_correct
.group_by("financials.year", "financials.month")
.aggregate("financials.amount", "hours.total_hours")
.order_by("financials.year", "financials.month")
)
print(execute(result_correct).to_string(index=False))
print("^ BSL detects grain mismatch, pre-aggregates hours to year/month → correct")
print()
print("=" * 60)
print("BROKEN: join_one + with_measures")
print("Expected: amount=42/31, total_hours=13/12")
print("=" * 60)
joined_broken = (
financials_st
.join_one(hours_st, on=lambda f, h: (f.year == h.year) & (f.month == h.month))
.with_measures(
rate=Measure(expr=lambda t: t.amount / t.total_hours),
)
)
result_broken = (
joined_broken
.group_by("financials.year", "financials.month")
.aggregate("financials.amount", "hours.total_hours")
.order_by("financials.year", "financials.month")
)
print(execute(result_broken).to_string(index=False))
print("^ amount tripled — BSL warned about grain mismatch and invoked join_many,")
print(" but with_measures corrupts SQL: hours not pre-aggregated → fanout")
Pull #219 made it possible to join semantic tables with different grains. However, when adding measures to the resulting semantic table there is still fanout. Below script demonstrates.