From 9fb5c254ebde02cc8c223a972d9307dfb1ccf5a6 Mon Sep 17 00:00:00 2001 From: boringdata Date: Sat, 28 Mar 2026 07:32:02 +0000 Subject: [PATCH 1/4] fix: handle plain ibis backends in join backend rebinding (#221) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _rebind_join_backends uses xorq's walk_nodes to traverse expression trees, which fails with ValueError on plain ibis Table objects (e.g. Snowflake, Databricks). Gracefully skip rebinding when walk_nodes cannot handle the expression — rebinding is only needed for xorq-vendored backends. Closes #221 Co-Authored-By: Claude Opus 4.6 (1M context) --- src/boring_semantic_layer/ops.py | 19 ++++- .../tests/test_plain_ibis.py | 70 +++++++++++++++++++ 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/src/boring_semantic_layer/ops.py b/src/boring_semantic_layer/ops.py index 3702816f..9779ff53 100644 --- a/src/boring_semantic_layer/ops.py +++ b/src/boring_semantic_layer/ops.py @@ -3748,12 +3748,25 @@ def _rebind_join_backends(left_tbl, right_tbl): backends found" unless all tables in a join share the same instance. Uses ``op.replace()`` (ibis graph rewriting) to swap out the ``source`` field on every ``DatabaseTable`` node in the right tree. + + For plain ibis backends (e.g. Snowflake, Databricks) that are not + wrapped by xorq, the walk_nodes utility cannot traverse the + expression tree. In that case we skip rebinding — it is only + needed for xorq-vendored backends. """ - from xorq.common.utils.node_utils import walk_nodes - from xorq.vendor.ibis.expr.operations import relations as xorq_rel + try: + from xorq.common.utils.node_utils import walk_nodes + from xorq.vendor.ibis.expr.operations import relations as xorq_rel + except ImportError: + return left_tbl, right_tbl # Find a canonical backend from the left tree. - db_tables = list(walk_nodes((xorq_rel.DatabaseTable,), left_tbl)) + try: + db_tables = list(walk_nodes((xorq_rel.DatabaseTable,), left_tbl)) + except (ValueError, TypeError): + # Plain ibis Table objects cannot be traversed by xorq's + # walk_nodes — rebinding is not needed for these backends. + return left_tbl, right_tbl canonical = db_tables[0].source if db_tables else None if canonical is None: diff --git a/src/boring_semantic_layer/tests/test_plain_ibis.py b/src/boring_semantic_layer/tests/test_plain_ibis.py index 9e079756..4d3d8377 100644 --- a/src/boring_semantic_layer/tests/test_plain_ibis.py +++ b/src/boring_semantic_layer/tests/test_plain_ibis.py @@ -171,6 +171,76 @@ def test_join_one(self, flights_model, carriers_model): assert len(result) == 3 assert "name" in result.columns + def test_yaml_join_on_plain_ibis(self, plain_ibis_con): + """YAML-defined joins work on plain ibis backends (GH-221). + + Reproduces the ValueError from _rebind_join_backends when xorq's + walk_nodes encounters a plain ibis Table. + """ + from boring_semantic_layer import from_yaml + + users_df = pd.DataFrame( + { + "USER_KEY": [1, 2, 3, 4, 5], + "PROGRAM_KEY": [10, 10, 20, 20, 30], + } + ) + programs_df = pd.DataFrame( + { + "PROGRAM_KEY": [10, 20, 30], + "PROGRAM": ["Alpha", "Beta", "Gamma"], + } + ) + plain_ibis_con.create_table("users_fact", users_df) + plain_ibis_con.create_table("programs_dim", programs_df) + + yaml_str = """ +users: + table: users_fact + dimensions: + user_key: _.USER_KEY + program_key: _.PROGRAM_KEY + measures: + user_count: _.USER_KEY.nunique() + joins: + programs: + model: programs + type: one + left_on: PROGRAM_KEY + right_on: PROGRAM_KEY + +programs: + table: programs_dim + dimensions: + program_key: _.PROGRAM_KEY + program: _.PROGRAM +""" + import tempfile, os + + with tempfile.NamedTemporaryFile( + mode="w", suffix=".yaml", delete=False + ) as f: + f.write(yaml_str) + yaml_path = f.name + + try: + tables = { + "users_fact": plain_ibis_con.table("users_fact"), + "programs_dim": plain_ibis_con.table("programs_dim"), + } + models = from_yaml(yaml_path, tables=tables) + users_model = models["users"] + result = ( + users_model.group_by("programs.program") + .aggregate("users.user_count") + .execute() + ) + assert len(result) == 3 + assert "programs.program" in result.columns or "program" in result.columns + assert result.iloc[:, 1].sum() == 5 + finally: + os.unlink(yaml_path) + class TestPlainIbisSerializationGating: """Serialization features raise clear errors for non-xorq backends.""" From 3c09d64f848e64e6fc556668a3d7ad94cb95ed41 Mon Sep 17 00:00:00 2001 From: boringdata Date: Sat, 28 Mar 2026 08:46:44 +0000 Subject: [PATCH 2/4] fix: replace integration test with direct unit test for _rebind_join_backends The previous YAML integration test didn't actually reproduce the bug because DuckDB gets xorq-wrapped. This direct unit test calls _rebind_join_backends with plain ibis tables, hitting the exact ValueError code path from GH-221. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../tests/test_plain_ibis.py | 76 +++---------------- 1 file changed, 12 insertions(+), 64 deletions(-) diff --git a/src/boring_semantic_layer/tests/test_plain_ibis.py b/src/boring_semantic_layer/tests/test_plain_ibis.py index 4d3d8377..17a9433d 100644 --- a/src/boring_semantic_layer/tests/test_plain_ibis.py +++ b/src/boring_semantic_layer/tests/test_plain_ibis.py @@ -171,75 +171,23 @@ def test_join_one(self, flights_model, carriers_model): assert len(result) == 3 assert "name" in result.columns - def test_yaml_join_on_plain_ibis(self, plain_ibis_con): - """YAML-defined joins work on plain ibis backends (GH-221). + def test_rebind_join_backends_with_plain_ibis_table(self, plain_ibis_con): + """_rebind_join_backends gracefully skips plain ibis tables (GH-221). - Reproduces the ValueError from _rebind_join_backends when xorq's - walk_nodes encounters a plain ibis Table. + Directly tests the guarded code path: xorq's walk_nodes cannot + traverse plain ibis Table objects, so rebinding should be a no-op. """ - from boring_semantic_layer import from_yaml + from boring_semantic_layer.ops import SemanticJoinOp - users_df = pd.DataFrame( - { - "USER_KEY": [1, 2, 3, 4, 5], - "PROGRAM_KEY": [10, 10, 20, 20, 30], - } + t1 = plain_ibis_con.create_table( + "rebind_l", pd.DataFrame({"a": [1]}) ) - programs_df = pd.DataFrame( - { - "PROGRAM_KEY": [10, 20, 30], - "PROGRAM": ["Alpha", "Beta", "Gamma"], - } + t2 = plain_ibis_con.create_table( + "rebind_r", pd.DataFrame({"a": [2]}) ) - plain_ibis_con.create_table("users_fact", users_df) - plain_ibis_con.create_table("programs_dim", programs_df) - - yaml_str = """ -users: - table: users_fact - dimensions: - user_key: _.USER_KEY - program_key: _.PROGRAM_KEY - measures: - user_count: _.USER_KEY.nunique() - joins: - programs: - model: programs - type: one - left_on: PROGRAM_KEY - right_on: PROGRAM_KEY - -programs: - table: programs_dim - dimensions: - program_key: _.PROGRAM_KEY - program: _.PROGRAM -""" - import tempfile, os - - with tempfile.NamedTemporaryFile( - mode="w", suffix=".yaml", delete=False - ) as f: - f.write(yaml_str) - yaml_path = f.name - - try: - tables = { - "users_fact": plain_ibis_con.table("users_fact"), - "programs_dim": plain_ibis_con.table("programs_dim"), - } - models = from_yaml(yaml_path, tables=tables) - users_model = models["users"] - result = ( - users_model.group_by("programs.program") - .aggregate("users.user_count") - .execute() - ) - assert len(result) == 3 - assert "programs.program" in result.columns or "program" in result.columns - assert result.iloc[:, 1].sum() == 5 - finally: - os.unlink(yaml_path) + result_l, result_r = SemanticJoinOp._rebind_join_backends(t1, t2) + assert result_l is t1 + assert result_r is t2 class TestPlainIbisSerializationGating: From 169498379b236e0e7709770afd2fcf7952ba2cb5 Mon Sep 17 00:00:00 2001 From: boringdata Date: Tue, 31 Mar 2026 19:23:30 +0000 Subject: [PATCH 3/4] test: add comprehensive unit tests for _rebind_join_backends (GH-221) - Mixed xorq-left + plain-ibis-right scenario - xorq-wrapped happy path (no regression) - Chained multi-table joins on plain ibis backends Co-Authored-By: Claude Opus 4.6 (1M context) --- .../tests/test_plain_ibis.py | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/src/boring_semantic_layer/tests/test_plain_ibis.py b/src/boring_semantic_layer/tests/test_plain_ibis.py index 17a9433d..33d61741 100644 --- a/src/boring_semantic_layer/tests/test_plain_ibis.py +++ b/src/boring_semantic_layer/tests/test_plain_ibis.py @@ -189,6 +189,117 @@ def test_rebind_join_backends_with_plain_ibis_table(self, plain_ibis_con): assert result_l is t1 assert result_r is t2 + def test_rebind_mixed_xorq_left_plain_ibis_right(self, plain_ibis_con): + """Mixed scenario: xorq-wrapped left + plain ibis right (GH-221). + + When the left table is xorq-wrapped (walk_nodes succeeds) but the + right is plain ibis, the replacer should be a no-op on the right + side since plain ibis ops are not xorq DatabaseTable instances. + """ + from boring_semantic_layer.ops import SemanticJoinOp + + try: + from xorq.common.utils.ibis_utils import from_ibis + except ImportError: + pytest.skip("xorq not available") + + con = ibis.duckdb.connect() + plain_right = con.create_table( + "mixed_r", pd.DataFrame({"id": [1, 2]}) + ) + xorq_left = from_ibis( + con.create_table("mixed_l", pd.DataFrame({"id": [1, 2]})) + ) + # Should not raise — replacer is no-op on plain ibis ops + result_l, result_r = SemanticJoinOp._rebind_join_backends( + xorq_left, plain_right + ) + assert result_l is not None + assert result_r is not None + + def test_rebind_with_xorq_wrapped_tables(self): + """Happy path: xorq-wrapped tables still rebind correctly (GH-221). + + Ensures the fix doesn't regress the normal xorq code path. + """ + from boring_semantic_layer.ops import SemanticJoinOp + + try: + from xorq.common.utils.ibis_utils import from_ibis + except ImportError: + pytest.skip("xorq not available") + + con = ibis.duckdb.connect() + t1 = from_ibis(con.create_table("xorq_l", pd.DataFrame({"a": [1]}))) + t2 = from_ibis(con.create_table("xorq_r", pd.DataFrame({"b": [2]}))) + result_l, result_r = SemanticJoinOp._rebind_join_backends(t1, t2) + assert result_l is not None + assert result_r is not None + # Both should still be executable after rebinding + assert result_l.execute() is not None + assert result_r.execute() is not None + + def test_chained_joins_plain_ibis(self, plain_ibis_con): + """Multi-table chained joins work on plain ibis backends (GH-221).""" + orders_tbl = plain_ibis_con.create_table( + "orders_pi", pd.DataFrame({ + "order_id": [1, 2, 3], + "customer_id": [10, 20, 10], + "product_id": [100, 100, 200], + }) + ) + customers_tbl = plain_ibis_con.create_table( + "customers_pi", pd.DataFrame({ + "customer_id": [10, 20], + "name": ["Alice", "Bob"], + }) + ) + products_tbl = plain_ibis_con.create_table( + "products_pi", pd.DataFrame({ + "product_id": [100, 200], + "product_name": ["Widget", "Gadget"], + }) + ) + + orders = ( + to_semantic_table(orders_tbl, name="orders") + .with_dimensions( + order_id=lambda t: t.order_id, + customer_id=lambda t: t.customer_id, + product_id=lambda t: t.product_id, + ) + .with_measures(order_count=lambda t: t.count()) + ) + customers = ( + to_semantic_table(customers_tbl, name="customers") + .with_dimensions( + customer_id=lambda t: t.customer_id, + name=lambda t: t.name, + ) + ) + products = ( + to_semantic_table(products_tbl, name="products") + .with_dimensions( + product_id=lambda t: t.product_id, + product_name=lambda t: t.product_name, + ) + ) + + joined = orders.join_one( + customers, on=lambda o, c: o.customer_id == c.customer_id + ).join_one( + products, on=lambda o, p: o.product_id == p.product_id + ) + result = ( + joined.group_by("name", "product_name") + .aggregate("order_count") + .execute() + ) + assert len(result) >= 2 + assert "name" in result.columns + assert "product_name" in result.columns + assert result.order_count.sum() == 3 + class TestPlainIbisSerializationGating: """Serialization features raise clear errors for non-xorq backends.""" From 3fc624deb9660f56c10347bffb9e5ecd5e53d652 Mon Sep 17 00:00:00 2001 From: boringdata Date: Mon, 20 Apr 2026 19:40:53 +0000 Subject: [PATCH 4/4] test: cover yaml joins on plain ibis backends --- .../tests/test_plain_ibis.py | 92 ++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/src/boring_semantic_layer/tests/test_plain_ibis.py b/src/boring_semantic_layer/tests/test_plain_ibis.py index 33d61741..7547ceb9 100644 --- a/src/boring_semantic_layer/tests/test_plain_ibis.py +++ b/src/boring_semantic_layer/tests/test_plain_ibis.py @@ -4,11 +4,15 @@ See: https://github.com/boringdata/boring-semantic-layer/issues/216 """ +import os +import tempfile +import textwrap + import ibis import pandas as pd import pytest -from boring_semantic_layer import to_semantic_table +from boring_semantic_layer import from_yaml, to_semantic_table @pytest.fixture(scope="module") @@ -239,6 +243,92 @@ def test_rebind_with_xorq_wrapped_tables(self): assert result_l.execute() is not None assert result_r.execute() is not None + def test_yaml_join_unsupported_backend(self, plain_ibis_con, monkeypatch): + """Regression test for issue #221 using the YAML join flow. + + The reported failure came from `from_yaml(...)` models running on a + plain ibis backend (e.g. Snowflake) where `_rebind_join_backends` + would call xorq's `walk_nodes` on an ibis Table and raise + `ValueError: Don't know how to handle type ...`. + """ + from boring_semantic_layer import ops + + monkeypatch.setattr(ops, "_ensure_xorq_table", lambda table: table) + + users_tbl = plain_ibis_con.create_table( + "users_fact_yaml_221", + pd.DataFrame( + { + "USER_KEY": [1, 2, 3], + "PROGRAM_KEY": [10, 10, 20], + } + ), + overwrite=True, + ) + programs_tbl = plain_ibis_con.create_table( + "programs_table_yaml_221", + pd.DataFrame( + { + "PROGRAM_KEY": [10, 20], + "PROGRAM": ["A", "B"], + } + ), + overwrite=True, + ) + + yaml_text = textwrap.dedent( + """ + users: + table: users__fact + description: "users fact table" + dimensions: + user_key: _.USER_KEY + program_key: _.PROGRAM_KEY + measures: + user_count: _.USER_KEY.nunique() + joins: + programs: + model: programs + type: one + left_on: PROGRAM_KEY + right_on: PROGRAM_KEY + + programs: + table: programs_table + dimensions: + program_key: _.PROGRAM_KEY + program: _.PROGRAM + """ + ) + + fd, yaml_path = tempfile.mkstemp(suffix=".yaml") + os.close(fd) + try: + with open(yaml_path, "w", encoding="utf-8") as f: + f.write(yaml_text) + + models = from_yaml( + yaml_path, + tables={ + "users__fact": users_tbl, + "programs_table": programs_tbl, + }, + ) + result = ( + models["users"] + .group_by("programs.program") + .aggregate("users.user_count") + .execute() + ) + finally: + os.unlink(yaml_path) + + records = result.sort_values("programs.program").reset_index(drop=True).to_dict("records") + assert records == [ + {"programs.program": "A", "users.user_count": 2}, + {"programs.program": "B", "users.user_count": 1}, + ] + def test_chained_joins_plain_ibis(self, plain_ibis_con): """Multi-table chained joins work on plain ibis backends (GH-221).""" orders_tbl = plain_ibis_con.create_table(