Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/boring_semantic_layer/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ def _create_dimension(expr: Dimension | Callable | dict) -> Dimension:
is_time_dimension=expr.get("is_time_dimension", False),
smallest_time_grain=expr.get("smallest_time_grain"),
derived_dimensions=tuple(expr.get("derived_dimensions") or ()),
metadata=dict(expr.get("metadata") or {}),
)
return Dimension(expr=expr, description=None)

Expand Down
22 changes: 18 additions & 4 deletions src/boring_semantic_layer/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,22 +626,26 @@ def _infer_unnest(fn: Callable, table: Any) -> tuple[str, ...]:
return ()


def _extract_measure_metadata(fn_or_expr: Any) -> tuple[Any, str | None, tuple]:
def _extract_measure_metadata(
fn_or_expr: Any,
) -> tuple[Any, str | None, tuple, Mapping[str, Any]]:
"""Extract metadata from various measure representations."""
if isinstance(fn_or_expr, dict):
return (
fn_or_expr["expr"],
fn_or_expr.get("description"),
tuple(fn_or_expr.get("requires_unnest", [])),
dict(fn_or_expr.get("metadata") or {}),
)
elif isinstance(fn_or_expr, Measure):
return (
fn_or_expr.expr,
fn_or_expr.description,
fn_or_expr.requires_unnest,
dict(fn_or_expr.metadata),
)
else:
return (fn_or_expr, None, ())
return (fn_or_expr, None, (), {})


_AGG_METHODS = frozenset({"sum", "mean", "avg", "count", "min", "max"})
Expand Down Expand Up @@ -733,6 +737,7 @@ def _make_base_measure(
expr: Any,
description: str | None,
requires_unnest: tuple,
metadata: Mapping[str, Any] | None = None,
) -> Measure:
"""Create a base measure with proper callable wrapping using functional patterns."""

Expand Down Expand Up @@ -788,6 +793,7 @@ def wrapped_expr(t):
description=description,
requires_unnest=requires_unnest,
original_expr=raw_expr,
metadata=dict(metadata or {}),
)

if callable(expr):
Expand All @@ -806,19 +812,21 @@ def wrapped_expr(t):
description=description,
requires_unnest=requires_unnest,
original_expr=raw_expr,
metadata=dict(metadata or {}),
)
else:
return Measure(
expr=lambda t, fn=expr: evaluate_expr(fn, ColumnScope(_tbl=t)),
description=description,
requires_unnest=requires_unnest,
original_expr=raw_expr,
metadata=dict(metadata or {}),
)


def _classify_measure(fn_or_expr: Any, scope: Any) -> tuple[str, Any]:
"""Classify measure as 'calc' or 'base' with appropriate handling."""
expr, description, requires_unnest = _extract_measure_metadata(fn_or_expr)
expr, description, requires_unnest, metadata = _extract_measure_metadata(fn_or_expr)

resolved = safe(lambda: _resolve_expr(expr, scope))().map(
lambda val: ("calc", val) if _is_calculated_measure(val) else None
Expand All @@ -833,7 +841,7 @@ def _classify_measure(fn_or_expr: Any, scope: Any) -> tuple[str, Any]:
inferred_unnest = _infer_unnest(expr, table)
requires_unnest = requires_unnest or inferred_unnest

return ("base", _make_base_measure(expr, description, requires_unnest))
return ("base", _make_base_measure(expr, description, requires_unnest, metadata))


def _build_json_definition(
Expand Down Expand Up @@ -953,6 +961,7 @@ class Dimension:
is_event_timestamp: bool = False
smallest_time_grain: str | None = None
derived_dimensions: tuple[str, ...] = ()
metadata: Mapping[str, Any] = field(factory=dict, eq=False, hash=False)

def __call__(self, table: ir.Table, _dims: dict | None = None) -> ir.Value:
try:
Expand Down Expand Up @@ -990,6 +999,8 @@ def to_json(self) -> Mapping[str, Any]:
base["smallest_time_grain"] = self.smallest_time_grain
if self.derived_dimensions:
base["derived_dimensions"] = list(self.derived_dimensions)
if self.metadata:
base.update(self.metadata)
return base

def __hash__(self) -> int:
Expand All @@ -1011,6 +1022,7 @@ class Measure:
description: str | None = None
requires_unnest: tuple[str, ...] = () # Internal: Arrays that must be unnested
original_expr: Any = field(default=None, eq=False, hash=False)
metadata: Mapping[str, Any] = field(factory=dict, eq=False, hash=False)

def __call__(self, table: ir.Table) -> ir.Value:
return self.expr.resolve(table) if _is_deferred(self.expr) else self.expr(table)
Expand All @@ -1026,6 +1038,8 @@ def to_json(self) -> Mapping[str, Any]:
base["locality"] = self.locality
if self.requires_unnest:
base["requires_unnest"] = list(self.requires_unnest)
if self.metadata:
base.update(self.metadata)
return base

def __hash__(self) -> int:
Expand Down
152 changes: 152 additions & 0 deletions src/boring_semantic_layer/tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""Tests for the ``metadata`` field on Dimension and Measure."""

from pathlib import Path
import tempfile

import ibis

from boring_semantic_layer import from_yaml, to_semantic_table
from boring_semantic_layer.ops import Dimension, Measure


def _build_table():
con = ibis.duckdb.connect(":memory:")
data = ibis.memtable(
{
"country": ["SE", "DE", "GB"],
"sales": [10.0, 20.0, 30.0],
}
)
return con.create_table("orders", data)


def test_dimension_metadata_defaults_empty():
dim = Dimension(expr=lambda t: t.country, description="Country")
assert dim.metadata == {}
assert dim.to_json() == {"description": "Country"}


def test_measure_metadata_defaults_empty():
meas = Measure(expr=lambda t: t.sales.sum(), description="Total sales")
assert meas.metadata == {}
assert meas.to_json() == {"description": "Total sales"}


def test_dimension_metadata_flows_into_to_json():
dim = Dimension(
expr=lambda t: t.country,
description="Country ISO code",
is_entity=True,
metadata={
"entity_type": "market",
"format": "iso_country",
"example_values": ["SE", "DE", "GB"],
},
)
payload = dim.to_json()
assert payload["description"] == "Country ISO code"
assert payload["is_entity"] is True
assert payload["entity_type"] == "market"
assert payload["format"] == "iso_country"
assert payload["example_values"] == ["SE", "DE", "GB"]


def test_measure_metadata_flows_into_to_json():
meas = Measure(
expr=lambda t: t.sales.sum(),
description="Total sales in EUR",
metadata={"format": "currency_eur", "unit": "EUR", "is_additive": True},
)
payload = meas.to_json()
assert payload["description"] == "Total sales in EUR"
assert payload["format"] == "currency_eur"
assert payload["unit"] == "EUR"
assert payload["is_additive"] is True


def test_metadata_appears_in_json_definition():
tbl = _build_table()
model = (
to_semantic_table(tbl, name="orders")
.with_dimensions(
country=Dimension(
expr=lambda t: t.country,
description="Country",
is_entity=True,
metadata={"entity_type": "market", "format": "iso_country"},
)
)
.with_measures(
total_sales=Measure(
expr=lambda t: t.sales.sum(),
description="Total sales",
metadata={"format": "currency_eur", "unit": "EUR"},
)
)
)
json_def = model.json_definition
country = json_def["dimensions"]["country"]
assert country["entity_type"] == "market"
assert country["format"] == "iso_country"
total = json_def["measures"]["total_sales"]
assert total["format"] == "currency_eur"
assert total["unit"] == "EUR"


def test_metadata_equality_ignored():
a = Dimension(expr=lambda t: t.country, metadata={"format": "iso_country"})
b = Dimension(expr=a.expr, metadata={"format": "something_else"})
assert a == b
assert hash(a) == hash(b)


def test_metadata_overrides_base_fields():
"""Metadata is merged after base fields, so matching keys win."""
dim = Dimension(
expr=lambda t: t.country,
description="real",
metadata={"description": "override"},
)
assert dim.to_json()["description"] == "override"


def test_yaml_dimension_metadata_round_trip():
yaml_content = """
orders:
table: orders_tbl
dimensions:
country:
expr: _.country
description: Country ISO code
is_entity: true
metadata:
entity_type: market
format: iso_country
example_values: [SE, DE, GB]
measures:
total_sales:
expr: _.sales.sum()
description: Total sales
metadata:
format: currency_eur
unit: EUR
is_additive: true
"""
con = ibis.duckdb.connect(":memory:")
data = ibis.memtable(
{"country": ["SE", "DE"], "sales": [1.0, 2.0]}
)
tbl = con.create_table("orders_tbl", data)

with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "models.yml"
path.write_text(yaml_content)
models = from_yaml(path, tables={"orders_tbl": tbl})

json_def = models["orders"].json_definition
assert json_def["dimensions"]["country"]["entity_type"] == "market"
assert json_def["dimensions"]["country"]["format"] == "iso_country"
assert json_def["dimensions"]["country"]["example_values"] == ["SE", "DE", "GB"]
assert json_def["measures"]["total_sales"]["format"] == "currency_eur"
assert json_def["measures"]["total_sales"]["unit"] == "EUR"
assert json_def["measures"]["total_sales"]["is_additive"] is True
17 changes: 10 additions & 7 deletions src/boring_semantic_layer/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def _parse_expression_config(name: str, config: str | dict, metric_type: str):
extra_kwargs["is_time_dimension"] = config.get("is_time_dimension", False)
extra_kwargs["smallest_time_grain"] = config.get("smallest_time_grain")
extra_kwargs["derived_dimensions"] = tuple(config.get("derived_dimensions") or ())
if "metadata" in config:
extra_kwargs["metadata"] = dict(config["metadata"] or {})
return config["expr"], config.get("description"), extra_kwargs
else:
raise ValueError(f"Invalid {metric_type} format for '{name}'. Must be a string or dict")
Expand All @@ -51,15 +53,15 @@ def _parse_dimension_or_measure(
is_time_dimension: true/false (dimensions only)
smallest_time_grain: "TIME_GRAIN_DAY" (dimensions only)
derived_dimensions: ["year", "month", "day"] (dimensions only)
metadata: {format: currency_eur, unit: EUR, ...} (free-form)
"""
expr_str, description, extra_kwargs = _parse_expression_config(name, config, metric_type)
deferred = safe_eval(expr_str, context={"_": _}).unwrap()
base_kwargs = {"expr": deferred, "description": description}
return (
Dimension(**base_kwargs, **extra_kwargs)
if metric_type == "dimension"
else Measure(**base_kwargs)
)
if metric_type == "dimension":
return Dimension(**base_kwargs, **extra_kwargs)
measure_kwargs = {"metadata": extra_kwargs["metadata"]} if "metadata" in extra_kwargs else {}
return Measure(**base_kwargs, **measure_kwargs)


def _parse_calc_measure(name: str, config: str | dict) -> Measure:
Expand All @@ -77,14 +79,15 @@ def _parse_calc_measure(name: str, config: str | dict) -> Measure:
pct_of_total:
expr: _.distance_sum / _.all(_.distance_sum) * 100
"""
expr_str, description, _ = _parse_expression_config(name, config, "measure")
expr_str, description, extra_kwargs = _parse_expression_config(name, config, "measure")

def _make_calc_fn(source: str):
def calc_fn(scope):
return safe_eval(source, context={"_": scope}).unwrap()
return calc_fn

return Measure(expr=_make_calc_fn(expr_str), description=description)
measure_kwargs = {"metadata": extra_kwargs["metadata"]} if "metadata" in extra_kwargs else {}
return Measure(expr=_make_calc_fn(expr_str), description=description, **measure_kwargs)


def _parse_filter(filter_expr: str) -> callable:
Expand Down
Loading