Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions src/boring_semantic_layer/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -4003,12 +4003,25 @@ def _rebind_join_backends(left_tbl, right_tbl):
backends found" unless all tables in a join share the same instance.
Uses ``op.replace()`` (ibis graph rewriting) to swap out the
``source`` field on every ``DatabaseTable`` node in the right tree.

For plain ibis backends (e.g. Snowflake, Databricks) that are not
wrapped by xorq, the walk_nodes utility cannot traverse the
expression tree. In that case we skip rebinding — it is only
needed for xorq-vendored backends.
"""
from xorq.common.utils.node_utils import walk_nodes
from xorq.vendor.ibis.expr.operations import relations as xorq_rel
try:
from xorq.common.utils.node_utils import walk_nodes
from xorq.vendor.ibis.expr.operations import relations as xorq_rel
except ImportError:
return left_tbl, right_tbl

# Find a canonical backend from the left tree.
db_tables = list(walk_nodes((xorq_rel.DatabaseTable,), left_tbl))
try:
db_tables = list(walk_nodes((xorq_rel.DatabaseTable,), left_tbl))
except (ValueError, TypeError):
# Plain ibis Table objects cannot be traversed by xorq's
# walk_nodes — rebinding is not needed for these backends.
return left_tbl, right_tbl
canonical = db_tables[0].source if db_tables else None

if canonical is None:
Expand Down
221 changes: 220 additions & 1 deletion src/boring_semantic_layer/tests/test_plain_ibis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
See: https://github.com/boringdata/boring-semantic-layer/issues/216
"""

import os
import tempfile
import textwrap

import ibis
import pandas as pd
import pytest

from boring_semantic_layer import to_semantic_table
from boring_semantic_layer import from_yaml, to_semantic_table


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -171,6 +175,221 @@ def test_join_one(self, flights_model, carriers_model):
assert len(result) == 3
assert "name" in result.columns

def test_rebind_join_backends_with_plain_ibis_table(self, plain_ibis_con):
"""_rebind_join_backends gracefully skips plain ibis tables (GH-221).

Directly tests the guarded code path: xorq's walk_nodes cannot
traverse plain ibis Table objects, so rebinding should be a no-op.
"""
from boring_semantic_layer.ops import SemanticJoinOp

t1 = plain_ibis_con.create_table(
"rebind_l", pd.DataFrame({"a": [1]})
)
t2 = plain_ibis_con.create_table(
"rebind_r", pd.DataFrame({"a": [2]})
)
result_l, result_r = SemanticJoinOp._rebind_join_backends(t1, t2)
assert result_l is t1
assert result_r is t2

def test_rebind_mixed_xorq_left_plain_ibis_right(self, plain_ibis_con):
"""Mixed scenario: xorq-wrapped left + plain ibis right (GH-221).

When the left table is xorq-wrapped (walk_nodes succeeds) but the
right is plain ibis, the replacer should be a no-op on the right
side since plain ibis ops are not xorq DatabaseTable instances.
"""
from boring_semantic_layer.ops import SemanticJoinOp

try:
from xorq.common.utils.ibis_utils import from_ibis
except ImportError:
pytest.skip("xorq not available")

con = ibis.duckdb.connect()
plain_right = con.create_table(
"mixed_r", pd.DataFrame({"id": [1, 2]})
)
xorq_left = from_ibis(
con.create_table("mixed_l", pd.DataFrame({"id": [1, 2]}))
)
# Should not raise — replacer is no-op on plain ibis ops
result_l, result_r = SemanticJoinOp._rebind_join_backends(
xorq_left, plain_right
)
assert result_l is not None
assert result_r is not None

def test_rebind_with_xorq_wrapped_tables(self):
"""Happy path: xorq-wrapped tables still rebind correctly (GH-221).

Ensures the fix doesn't regress the normal xorq code path.
"""
from boring_semantic_layer.ops import SemanticJoinOp

try:
from xorq.common.utils.ibis_utils import from_ibis
except ImportError:
pytest.skip("xorq not available")

con = ibis.duckdb.connect()
t1 = from_ibis(con.create_table("xorq_l", pd.DataFrame({"a": [1]})))
t2 = from_ibis(con.create_table("xorq_r", pd.DataFrame({"b": [2]})))
result_l, result_r = SemanticJoinOp._rebind_join_backends(t1, t2)
assert result_l is not None
assert result_r is not None
# Both should still be executable after rebinding
assert result_l.execute() is not None
assert result_r.execute() is not None

def test_yaml_join_unsupported_backend(self, plain_ibis_con, monkeypatch):
"""Regression test for issue #221 using the YAML join flow.

The reported failure came from `from_yaml(...)` models running on a
plain ibis backend (e.g. Snowflake) where `_rebind_join_backends`
would call xorq's `walk_nodes` on an ibis Table and raise
`ValueError: Don't know how to handle type ...`.
"""
from boring_semantic_layer import ops

monkeypatch.setattr(ops, "_ensure_xorq_table", lambda table: table)

users_tbl = plain_ibis_con.create_table(
"users_fact_yaml_221",
pd.DataFrame(
{
"USER_KEY": [1, 2, 3],
"PROGRAM_KEY": [10, 10, 20],
}
),
overwrite=True,
)
programs_tbl = plain_ibis_con.create_table(
"programs_table_yaml_221",
pd.DataFrame(
{
"PROGRAM_KEY": [10, 20],
"PROGRAM": ["A", "B"],
}
),
overwrite=True,
)

yaml_text = textwrap.dedent(
"""
users:
table: users__fact
description: "users fact table"
dimensions:
user_key: _.USER_KEY
program_key: _.PROGRAM_KEY
measures:
user_count: _.USER_KEY.nunique()
joins:
programs:
model: programs
type: one
left_on: PROGRAM_KEY
right_on: PROGRAM_KEY

programs:
table: programs_table
dimensions:
program_key: _.PROGRAM_KEY
program: _.PROGRAM
"""
)

fd, yaml_path = tempfile.mkstemp(suffix=".yaml")
os.close(fd)
try:
with open(yaml_path, "w", encoding="utf-8") as f:
f.write(yaml_text)

models = from_yaml(
yaml_path,
tables={
"users__fact": users_tbl,
"programs_table": programs_tbl,
},
)
result = (
models["users"]
.group_by("programs.program")
.aggregate("users.user_count")
.execute()
)
finally:
os.unlink(yaml_path)

records = result.sort_values("programs.program").reset_index(drop=True).to_dict("records")
assert records == [
{"programs.program": "A", "users.user_count": 2},
{"programs.program": "B", "users.user_count": 1},
]

def test_chained_joins_plain_ibis(self, plain_ibis_con):
"""Multi-table chained joins work on plain ibis backends (GH-221)."""
orders_tbl = plain_ibis_con.create_table(
"orders_pi", pd.DataFrame({
"order_id": [1, 2, 3],
"customer_id": [10, 20, 10],
"product_id": [100, 100, 200],
})
)
customers_tbl = plain_ibis_con.create_table(
"customers_pi", pd.DataFrame({
"customer_id": [10, 20],
"name": ["Alice", "Bob"],
})
)
products_tbl = plain_ibis_con.create_table(
"products_pi", pd.DataFrame({
"product_id": [100, 200],
"product_name": ["Widget", "Gadget"],
})
)

orders = (
to_semantic_table(orders_tbl, name="orders")
.with_dimensions(
order_id=lambda t: t.order_id,
customer_id=lambda t: t.customer_id,
product_id=lambda t: t.product_id,
)
.with_measures(order_count=lambda t: t.count())
)
customers = (
to_semantic_table(customers_tbl, name="customers")
.with_dimensions(
customer_id=lambda t: t.customer_id,
name=lambda t: t.name,
)
)
products = (
to_semantic_table(products_tbl, name="products")
.with_dimensions(
product_id=lambda t: t.product_id,
product_name=lambda t: t.product_name,
)
)

joined = orders.join_one(
customers, on=lambda o, c: o.customer_id == c.customer_id
).join_one(
products, on=lambda o, p: o.product_id == p.product_id
)
result = (
joined.group_by("name", "product_name")
.aggregate("order_count")
.execute()
)
assert len(result) >= 2
assert "name" in result.columns
assert "product_name" in result.columns
assert result.order_count.sum() == 3


class TestPlainIbisSerializationGating:
"""Serialization features raise clear errors for non-xorq backends."""
Expand Down
Loading