From c4a9fe9b544de8e5c596f9f160a5ce6e8225bdc4 Mon Sep 17 00:00:00 2001 From: dlovell Date: Tue, 21 Apr 2026 15:23:40 -0400 Subject: [PATCH 1/5] feat: add reemit to BSL TagHandler for catalog rebuild support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Register a `reemit` callable on `bsl_tag_handler` so that `xorq catalog replay --rebuild` can re-emit BSL-tagged subtrees with translated sources. The rebuild logic re-stamps the original tag metadata onto the rebuilt source—no metadata manipulation needed because BSL's tag captures the full query recipe. Co-Authored-By: Claude Opus 4.6 --- .../serialization/tag_handler.py | 17 ++ .../tests/test_xorq_rebuild.py | 248 ++++++++++++++++++ 2 files changed, 265 insertions(+) create mode 100644 src/boring_semantic_layer/tests/test_xorq_rebuild.py diff --git a/src/boring_semantic_layer/serialization/tag_handler.py b/src/boring_semantic_layer/serialization/tag_handler.py index 9e3d4f2..898ca7e 100644 --- a/src/boring_semantic_layer/serialization/tag_handler.py +++ b/src/boring_semantic_layer/serialization/tag_handler.py @@ -65,10 +65,26 @@ def from_tag_node(tag_node): return reconstruct_bsl_operation(metadata, expr, ctx) +def reemit(tag_node, rebuild_subexpr): + """Re-emit a BSL-tagged subtree with a translated source. + + ``from_tag_node`` returns the base SemanticModel (discarding the query + chain), so it cannot be used for rebuild — rebuild needs the full tag + metadata to reproduce the original query. This function works from the + tag node directly: it rebuilds the source subtree and re-stamps the + original tag metadata on top. + """ + new_source = rebuild_subexpr(tag_node.parent.to_expr()) + meta = dict(tag_node.metadata) + tag_name = meta.pop("tag") + return new_source.tag(tag=tag_name, **meta) + + bsl_tag_handler = TagHandler( tag_names=("bsl",), extract_metadata=extract_metadata, from_tag_node=from_tag_node, + reemit=reemit, ) @@ -76,4 +92,5 @@ def from_tag_node(tag_node): "bsl_tag_handler", "extract_metadata", "from_tag_node", + "reemit", ] diff --git a/src/boring_semantic_layer/tests/test_xorq_rebuild.py b/src/boring_semantic_layer/tests/test_xorq_rebuild.py new file mode 100644 index 0000000..e08a3ce --- /dev/null +++ b/src/boring_semantic_layer/tests/test_xorq_rebuild.py @@ -0,0 +1,248 @@ +"""Tests for BSL rebuild support via ``reemit`` on the TagHandler. + +Covers: +- ``reemit`` is registered on ``bsl_tag_handler`` +- Identity reemit preserves tag metadata (round-trip invariant) +- ``get_rebuild_dispatch`` returns handler-level reemit for BSL tags +- Catalog rebuild round-trip with BSL entries +- Rebuilt BSL entries execute correctly +- Query-chain (aggregate) rebuild +""" + +from __future__ import annotations + +from pathlib import Path + +import ibis +import pytest + +from boring_semantic_layer import SemanticModel +from boring_semantic_layer.serialization import to_tagged +from boring_semantic_layer.serialization.tag_handler import ( + bsl_tag_handler, + reemit, +) + +xorq = pytest.importorskip("xorq", reason="xorq not installed") + + +def _tag_node(tagged_expr): + return tagged_expr.op() + + +# --------------------------------------------------------------------------- +# Phase 2: reemit registration +# --------------------------------------------------------------------------- + + +def test_reemit_registered_on_handler(): + assert bsl_tag_handler.reemit is reemit + + +def test_reemit_is_callable(): + assert callable(bsl_tag_handler.reemit) + + +# --------------------------------------------------------------------------- +# Phase 3: identity reemit preserves tag metadata (Invariant B) +# --------------------------------------------------------------------------- + + +@pytest.fixture +def simple_model(): + table = ibis.memtable({"a": [1, 2, 3], "b": [4, 5, 6]}) + return SemanticModel( + table=table, + dimensions={"a": lambda t: t.a, "b": lambda t: t.b}, + measures={"sum_b": lambda t: t.b.sum(), "avg_b": lambda t: t.b.mean()}, + name="simple", + ) + + +def test_identity_reemit_preserves_metadata(simple_model): + tagged = to_tagged(simple_model) + original_meta = dict(_tag_node(tagged).metadata) + + rebuilt = reemit(_tag_node(tagged), rebuild_subexpr=lambda e: e) + rebuilt_meta = dict(_tag_node(rebuilt).metadata) + + assert original_meta == rebuilt_meta + + +def test_identity_reemit_on_query_chain(simple_model): + query = simple_model.query(dimensions=("a",), measures=("sum_b",)) + tagged = to_tagged(query) + original_meta = dict(_tag_node(tagged).metadata) + + rebuilt = reemit(_tag_node(tagged), rebuild_subexpr=lambda e: e) + rebuilt_meta = dict(_tag_node(rebuilt).metadata) + + assert original_meta == rebuilt_meta + + +# --------------------------------------------------------------------------- +# get_rebuild_dispatch returns handler-level reemit for BSL +# --------------------------------------------------------------------------- + + +def test_get_rebuild_dispatch_returns_callable_for_bsl(simple_model): + from xorq.expr.builders import get_rebuild_dispatch + + tagged = to_tagged(simple_model) + dispatch = get_rebuild_dispatch(_tag_node(tagged)) + assert callable(dispatch) + + +def test_get_rebuild_dispatch_invokes_handler_reemit(simple_model): + from xorq.expr.builders import get_rebuild_dispatch + + tagged = to_tagged(simple_model) + dispatch = get_rebuild_dispatch(_tag_node(tagged)) + result = dispatch(lambda e: e) + assert result is not None + rebuilt_meta = dict(_tag_node(result).metadata) + original_meta = dict(_tag_node(tagged).metadata) + assert original_meta == rebuilt_meta + + +# --------------------------------------------------------------------------- +# Catalog helpers +# --------------------------------------------------------------------------- + + +def _make_catalog(tmpdir, name="src"): + import xorq.api as xo + from xorq.catalog.backend import GitBackend + from xorq.catalog.catalog import Catalog + + repo = Catalog.init_repo_path(Path(tmpdir).joinpath(name)) + catalog = Catalog(backend=GitBackend(repo=repo)) + return catalog, xo + + +def _add_source_with_identity_transform(catalog, xo, data, *, source_alias, transform_alias): + from xorq.vendor.ibis.expr import operations as ops + + source_expr = xo.memtable(data, name=source_alias) + source_entry = catalog.add(source_expr, aliases=(source_alias,)) + + unbound = ops.UnboundTable(name="p", schema=source_expr.schema()).to_expr() + identity = unbound.select(*source_expr.columns) + transform_entry = catalog.add(identity, aliases=(transform_alias,)) + + return source_entry, transform_entry + + +def _replay_rebuild(source_catalog_obj, target_path): + from xorq.catalog.catalog import Catalog + from xorq.catalog.replay import Replayer + + target = Catalog.from_repo_path(target_path, init=True) + Replayer(from_catalog=source_catalog_obj, rebuild=True).replay(target) + return target + + +# --------------------------------------------------------------------------- +# Catalog rebuild: query chain (SemanticAggregateOp) +# --------------------------------------------------------------------------- + + +@pytest.fixture +def catalog_with_bsl_query(tmpdir): + from xorq.catalog.bind import bind + + catalog, xo = _make_catalog(tmpdir) + + source_entry, transform_entry = _add_source_with_identity_transform( + catalog, + xo, + {"origin": ["JFK", "LAX", "ORD"], "delay": [10.0, -5.0, 3.0]}, + source_alias="flights", + transform_alias="flights-identity", + ) + + bound = bind(source_entry, transform_entry) + model = SemanticModel( + table=bound, + dimensions={"origin": lambda t: t.origin}, + measures={"avg_delay": lambda t: t.delay.mean()}, + name="flights_model", + ) + tagged = to_tagged( + model.query(dimensions=("origin",), measures=("avg_delay",)) + ) + bsl_entry = catalog.add(tagged, aliases=("origin-delays",)) + + return catalog, source_entry, bsl_entry + + +def test_catalog_rebuild_produces_consistent_target(catalog_with_bsl_query, tmpdir): + catalog, _, _ = catalog_with_bsl_query + target = _replay_rebuild(catalog, Path(tmpdir).joinpath("tgt")) + assert len(target.list()) == len(catalog.list()) + assert set(target.list_aliases()) == set(catalog.list_aliases()) + target.assert_consistency() + + +def test_catalog_rebuild_bsl_entry_exists(catalog_with_bsl_query, tmpdir): + catalog, _, _ = catalog_with_bsl_query + target = _replay_rebuild(catalog, Path(tmpdir).joinpath("tgt")) + entry = target.get_catalog_entry("origin-delays", maybe_alias=True) + assert entry is not None + + +def test_catalog_rebuild_bsl_entry_executes(catalog_with_bsl_query, tmpdir): + catalog, _, _ = catalog_with_bsl_query + target = _replay_rebuild(catalog, Path(tmpdir).joinpath("tgt")) + entry = target.get_catalog_entry("origin-delays", maybe_alias=True) + result = entry.lazy_expr.execute() + assert len(result) == 3 + assert "origin" in result.columns + assert "avg_delay" in result.columns + + +# --------------------------------------------------------------------------- +# Catalog rebuild: base model (SemanticTableOp) +# --------------------------------------------------------------------------- + + +@pytest.fixture +def catalog_with_base_model(tmpdir): + from xorq.catalog.bind import bind + + catalog, xo = _make_catalog(tmpdir) + + source_entry, transform_entry = _add_source_with_identity_transform( + catalog, + xo, + {"city": ["NYC", "LA"], "pop": [8_000_000, 4_000_000]}, + source_alias="cities", + transform_alias="cities-identity", + ) + + bound = bind(source_entry, transform_entry) + model = SemanticModel( + table=bound, + dimensions={"city": lambda t: t.city}, + measures={"total_pop": lambda t: t.pop.sum()}, + name="city_model", + ) + tagged = to_tagged(model) + bsl_entry = catalog.add(tagged, aliases=("city-stats",)) + + return catalog, source_entry, bsl_entry + + +def test_catalog_rebuild_base_model(catalog_with_base_model, tmpdir): + catalog, _, _ = catalog_with_base_model + target = _replay_rebuild(catalog, Path(tmpdir).joinpath("tgt")) + assert set(target.list_aliases()) == set(catalog.list_aliases()) + target.assert_consistency() + + +def test_catalog_rebuild_base_model_executes(catalog_with_base_model, tmpdir): + catalog, _, _ = catalog_with_base_model + target = _replay_rebuild(catalog, Path(tmpdir).joinpath("tgt")) + entry = target.get_catalog_entry("city-stats", maybe_alias=True) + result = entry.lazy_expr.execute() + assert len(result) == 2 From dc68b3251b3e01ee339a6a014dc0203304cd3246 Mon Sep 17 00:00:00 2001 From: dlovell Date: Tue, 21 Apr 2026 15:42:03 -0400 Subject: [PATCH 2/5] fix: guard reemit wiring against xorq without the field CI installs xorq 0.3.19 which doesn't have `reemit` on TagHandler yet. Conditionally pass it only when the field exists, and skip rebuild tests when it's absent. Co-Authored-By: Claude Opus 4.6 --- .../serialization/tag_handler.py | 7 +++++-- .../tests/test_xorq_rebuild.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/boring_semantic_layer/serialization/tag_handler.py b/src/boring_semantic_layer/serialization/tag_handler.py index 898ca7e..031e76c 100644 --- a/src/boring_semantic_layer/serialization/tag_handler.py +++ b/src/boring_semantic_layer/serialization/tag_handler.py @@ -80,12 +80,15 @@ def reemit(tag_node, rebuild_subexpr): return new_source.tag(tag=tag_name, **meta) -bsl_tag_handler = TagHandler( +_handler_kwargs = dict( tag_names=("bsl",), extract_metadata=extract_metadata, from_tag_node=from_tag_node, - reemit=reemit, ) +if "reemit" in {a.name for a in TagHandler.__attrs_attrs__}: + _handler_kwargs["reemit"] = reemit + +bsl_tag_handler = TagHandler(**_handler_kwargs) __all__ = [ diff --git a/src/boring_semantic_layer/tests/test_xorq_rebuild.py b/src/boring_semantic_layer/tests/test_xorq_rebuild.py index e08a3ce..8a1cd49 100644 --- a/src/boring_semantic_layer/tests/test_xorq_rebuild.py +++ b/src/boring_semantic_layer/tests/test_xorq_rebuild.py @@ -25,6 +25,13 @@ xorq = pytest.importorskip("xorq", reason="xorq not installed") +from xorq.expr.builders import TagHandler as _TagHandler + +_has_reemit = "reemit" in {a.name for a in _TagHandler.__attrs_attrs__} +requires_reemit = pytest.mark.skipif( + not _has_reemit, reason="xorq TagHandler does not have reemit field" +) + def _tag_node(tagged_expr): return tagged_expr.op() @@ -35,10 +42,12 @@ def _tag_node(tagged_expr): # --------------------------------------------------------------------------- +@requires_reemit def test_reemit_registered_on_handler(): assert bsl_tag_handler.reemit is reemit +@requires_reemit def test_reemit_is_callable(): assert callable(bsl_tag_handler.reemit) @@ -59,6 +68,7 @@ def simple_model(): ) +@requires_reemit def test_identity_reemit_preserves_metadata(simple_model): tagged = to_tagged(simple_model) original_meta = dict(_tag_node(tagged).metadata) @@ -69,6 +79,7 @@ def test_identity_reemit_preserves_metadata(simple_model): assert original_meta == rebuilt_meta +@requires_reemit def test_identity_reemit_on_query_chain(simple_model): query = simple_model.query(dimensions=("a",), measures=("sum_b",)) tagged = to_tagged(query) @@ -85,6 +96,7 @@ def test_identity_reemit_on_query_chain(simple_model): # --------------------------------------------------------------------------- +@requires_reemit def test_get_rebuild_dispatch_returns_callable_for_bsl(simple_model): from xorq.expr.builders import get_rebuild_dispatch @@ -93,6 +105,7 @@ def test_get_rebuild_dispatch_returns_callable_for_bsl(simple_model): assert callable(dispatch) +@requires_reemit def test_get_rebuild_dispatch_invokes_handler_reemit(simple_model): from xorq.expr.builders import get_rebuild_dispatch @@ -176,6 +189,7 @@ def catalog_with_bsl_query(tmpdir): return catalog, source_entry, bsl_entry +@requires_reemit def test_catalog_rebuild_produces_consistent_target(catalog_with_bsl_query, tmpdir): catalog, _, _ = catalog_with_bsl_query target = _replay_rebuild(catalog, Path(tmpdir).joinpath("tgt")) @@ -184,6 +198,7 @@ def test_catalog_rebuild_produces_consistent_target(catalog_with_bsl_query, tmpd target.assert_consistency() +@requires_reemit def test_catalog_rebuild_bsl_entry_exists(catalog_with_bsl_query, tmpdir): catalog, _, _ = catalog_with_bsl_query target = _replay_rebuild(catalog, Path(tmpdir).joinpath("tgt")) @@ -191,6 +206,7 @@ def test_catalog_rebuild_bsl_entry_exists(catalog_with_bsl_query, tmpdir): assert entry is not None +@requires_reemit def test_catalog_rebuild_bsl_entry_executes(catalog_with_bsl_query, tmpdir): catalog, _, _ = catalog_with_bsl_query target = _replay_rebuild(catalog, Path(tmpdir).joinpath("tgt")) @@ -233,6 +249,7 @@ def catalog_with_base_model(tmpdir): return catalog, source_entry, bsl_entry +@requires_reemit def test_catalog_rebuild_base_model(catalog_with_base_model, tmpdir): catalog, _, _ = catalog_with_base_model target = _replay_rebuild(catalog, Path(tmpdir).joinpath("tgt")) @@ -240,6 +257,7 @@ def test_catalog_rebuild_base_model(catalog_with_base_model, tmpdir): target.assert_consistency() +@requires_reemit def test_catalog_rebuild_base_model_executes(catalog_with_base_model, tmpdir): catalog, _, _ = catalog_with_base_model target = _replay_rebuild(catalog, Path(tmpdir).joinpath("tgt")) From c621a7844dc08cb0a11e7af1ff817dd489a0084d Mon Sep 17 00:00:00 2001 From: dlovell Date: Tue, 21 Apr 2026 15:47:59 -0400 Subject: [PATCH 3/5] ci: install xorq from feat/catalog/replay-rebuild branch Temporary: install xorq from the feature branch that has the `reemit` field on TagHandler so rebuild tests run in CI instead of being skipped. Revert once the xorq release ships. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci-release.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci-release.yml b/.github/workflows/ci-release.yml index 8cfec41..28c3f4c 100644 --- a/.github/workflows/ci-release.yml +++ b/.github/workflows/ci-release.yml @@ -45,6 +45,7 @@ jobs: - name: Install dependencies run: | uv sync --all-extras + uv pip install "xorq[duckdb] @ git+https://github.com/xorq-labs/xorq@feat/catalog/replay-rebuild" cd docs/web && npm ci - name: Run all checks (tests + examples + docs build) From a7ac6a9bccd1dde17595ec20c23cb20541afa32d Mon Sep 17 00:00:00 2001 From: dlovell Date: Wed, 22 Apr 2026 07:54:20 -0400 Subject: [PATCH 4/5] fix: guard reemit against missing parent and add edge-case tests Fail fast with a clear ValueError when tag_node.parent is None instead of a bare AttributeError. Add non-identity transform tests and a missing-parent test to cover paths the original suite missed. Co-Authored-By: Claude Opus 4.6 --- .../serialization/tag_handler.py | 2 + .../tests/test_xorq_rebuild.py | 47 +++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/src/boring_semantic_layer/serialization/tag_handler.py b/src/boring_semantic_layer/serialization/tag_handler.py index 031e76c..1cc065f 100644 --- a/src/boring_semantic_layer/serialization/tag_handler.py +++ b/src/boring_semantic_layer/serialization/tag_handler.py @@ -74,6 +74,8 @@ def reemit(tag_node, rebuild_subexpr): tag node directly: it rebuilds the source subtree and re-stamps the original tag metadata on top. """ + if tag_node.parent is None: + raise ValueError("tag_node has no parent; cannot rebuild a root tag node") new_source = rebuild_subexpr(tag_node.parent.to_expr()) meta = dict(tag_node.metadata) tag_name = meta.pop("tag") diff --git a/src/boring_semantic_layer/tests/test_xorq_rebuild.py b/src/boring_semantic_layer/tests/test_xorq_rebuild.py index 8a1cd49..8128c48 100644 --- a/src/boring_semantic_layer/tests/test_xorq_rebuild.py +++ b/src/boring_semantic_layer/tests/test_xorq_rebuild.py @@ -91,6 +91,35 @@ def test_identity_reemit_on_query_chain(simple_model): assert original_meta == rebuilt_meta +@requires_reemit +def test_reemit_with_source_transform(simple_model): + tagged = to_tagged(simple_model) + original_meta = dict(_tag_node(tagged).metadata) + + def rename_column(expr): + return expr.rename(a_renamed="a") + + rebuilt = reemit(_tag_node(tagged), rebuild_subexpr=rename_column) + rebuilt_meta = dict(_tag_node(rebuilt).metadata) + assert original_meta == rebuilt_meta + assert "a_renamed" in rebuilt.columns + + +@requires_reemit +def test_reemit_query_chain_with_source_transform(simple_model): + query = simple_model.query(dimensions=("a",), measures=("sum_b",)) + tagged = to_tagged(query) + original_meta = dict(_tag_node(tagged).metadata) + + def add_column(expr): + return expr.mutate(extra=ibis.literal(1)) + + rebuilt = reemit(_tag_node(tagged), rebuild_subexpr=add_column) + rebuilt_meta = dict(_tag_node(rebuilt).metadata) + assert original_meta == rebuilt_meta + assert "extra" in rebuilt.columns + + # --------------------------------------------------------------------------- # get_rebuild_dispatch returns handler-level reemit for BSL # --------------------------------------------------------------------------- @@ -264,3 +293,21 @@ def test_catalog_rebuild_base_model_executes(catalog_with_base_model, tmpdir): entry = target.get_catalog_entry("city-stats", maybe_alias=True) result = entry.lazy_expr.execute() assert len(result) == 2 + + +# --------------------------------------------------------------------------- +# Edge cases +# --------------------------------------------------------------------------- + + +@requires_reemit +def test_reemit_raises_on_missing_parent(simple_model): + tagged = to_tagged(simple_model) + node = _tag_node(tagged) + original_parent = node.parent + try: + node.parent = None + with pytest.raises(ValueError, match="no parent"): + reemit(node, rebuild_subexpr=lambda e: e) + finally: + node.parent = original_parent From 4fb5ccfe8b6136680707da8e69a7cb8624548bad Mon Sep 17 00:00:00 2001 From: dlovell Date: Wed, 22 Apr 2026 08:17:02 -0400 Subject: [PATCH 5/5] revert: remove xorq branch pin from CI xorq's ci-downstream-bsl workflow handles testing BSL against in-flight xorq changes, so BSL CI doesn't need to pin a branch. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/ci-release.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci-release.yml b/.github/workflows/ci-release.yml index 28c3f4c..8cfec41 100644 --- a/.github/workflows/ci-release.yml +++ b/.github/workflows/ci-release.yml @@ -45,7 +45,6 @@ jobs: - name: Install dependencies run: | uv sync --all-extras - uv pip install "xorq[duckdb] @ git+https://github.com/xorq-labs/xorq@feat/catalog/replay-rebuild" cd docs/web && npm ci - name: Run all checks (tests + examples + docs build)