diff --git a/src/boring_semantic_layer/expr.py b/src/boring_semantic_layer/expr.py index a23d29f..5775e93 100644 --- a/src/boring_semantic_layer/expr.py +++ b/src/boring_semantic_layer/expr.py @@ -332,6 +332,7 @@ def _create_dimension(expr: Dimension | Callable | dict) -> Dimension: is_time_dimension=expr.get("is_time_dimension", False), smallest_time_grain=expr.get("smallest_time_grain"), derived_dimensions=tuple(expr.get("derived_dimensions") or ()), + metadata=dict(expr.get("metadata") or {}), ) return Dimension(expr=expr, description=None) diff --git a/src/boring_semantic_layer/ops.py b/src/boring_semantic_layer/ops.py index 869f37d..3ac5542 100644 --- a/src/boring_semantic_layer/ops.py +++ b/src/boring_semantic_layer/ops.py @@ -626,22 +626,26 @@ def _infer_unnest(fn: Callable, table: Any) -> tuple[str, ...]: return () -def _extract_measure_metadata(fn_or_expr: Any) -> tuple[Any, str | None, tuple]: +def _extract_measure_metadata( + fn_or_expr: Any, +) -> tuple[Any, str | None, tuple, Mapping[str, Any]]: """Extract metadata from various measure representations.""" if isinstance(fn_or_expr, dict): return ( fn_or_expr["expr"], fn_or_expr.get("description"), tuple(fn_or_expr.get("requires_unnest", [])), + dict(fn_or_expr.get("metadata") or {}), ) elif isinstance(fn_or_expr, Measure): return ( fn_or_expr.expr, fn_or_expr.description, fn_or_expr.requires_unnest, + dict(fn_or_expr.metadata), ) else: - return (fn_or_expr, None, ()) + return (fn_or_expr, None, (), {}) _AGG_METHODS = frozenset({"sum", "mean", "avg", "count", "min", "max"}) @@ -733,6 +737,7 @@ def _make_base_measure( expr: Any, description: str | None, requires_unnest: tuple, + metadata: Mapping[str, Any] | None = None, ) -> Measure: """Create a base measure with proper callable wrapping using functional patterns.""" @@ -788,6 +793,7 @@ def wrapped_expr(t): description=description, requires_unnest=requires_unnest, original_expr=raw_expr, + metadata=dict(metadata or {}), ) if callable(expr): @@ -806,6 +812,7 @@ def wrapped_expr(t): description=description, requires_unnest=requires_unnest, original_expr=raw_expr, + metadata=dict(metadata or {}), ) else: return Measure( @@ -813,12 +820,13 @@ def wrapped_expr(t): description=description, requires_unnest=requires_unnest, original_expr=raw_expr, + metadata=dict(metadata or {}), ) def _classify_measure(fn_or_expr: Any, scope: Any) -> tuple[str, Any]: """Classify measure as 'calc' or 'base' with appropriate handling.""" - expr, description, requires_unnest = _extract_measure_metadata(fn_or_expr) + expr, description, requires_unnest, metadata = _extract_measure_metadata(fn_or_expr) resolved = safe(lambda: _resolve_expr(expr, scope))().map( lambda val: ("calc", val) if _is_calculated_measure(val) else None @@ -833,7 +841,7 @@ def _classify_measure(fn_or_expr: Any, scope: Any) -> tuple[str, Any]: inferred_unnest = _infer_unnest(expr, table) requires_unnest = requires_unnest or inferred_unnest - return ("base", _make_base_measure(expr, description, requires_unnest)) + return ("base", _make_base_measure(expr, description, requires_unnest, metadata)) def _build_json_definition( @@ -953,6 +961,7 @@ class Dimension: is_event_timestamp: bool = False smallest_time_grain: str | None = None derived_dimensions: tuple[str, ...] = () + metadata: Mapping[str, Any] = field(factory=dict, eq=False, hash=False) def __call__(self, table: ir.Table, _dims: dict | None = None) -> ir.Value: try: @@ -990,6 +999,8 @@ def to_json(self) -> Mapping[str, Any]: base["smallest_time_grain"] = self.smallest_time_grain if self.derived_dimensions: base["derived_dimensions"] = list(self.derived_dimensions) + if self.metadata: + base.update(self.metadata) return base def __hash__(self) -> int: @@ -1011,6 +1022,7 @@ class Measure: description: str | None = None requires_unnest: tuple[str, ...] = () # Internal: Arrays that must be unnested original_expr: Any = field(default=None, eq=False, hash=False) + metadata: Mapping[str, Any] = field(factory=dict, eq=False, hash=False) def __call__(self, table: ir.Table) -> ir.Value: return self.expr.resolve(table) if _is_deferred(self.expr) else self.expr(table) @@ -1026,6 +1038,8 @@ def to_json(self) -> Mapping[str, Any]: base["locality"] = self.locality if self.requires_unnest: base["requires_unnest"] = list(self.requires_unnest) + if self.metadata: + base.update(self.metadata) return base def __hash__(self) -> int: diff --git a/src/boring_semantic_layer/tests/test_metadata.py b/src/boring_semantic_layer/tests/test_metadata.py new file mode 100644 index 0000000..856e4cd --- /dev/null +++ b/src/boring_semantic_layer/tests/test_metadata.py @@ -0,0 +1,152 @@ +"""Tests for the ``metadata`` field on Dimension and Measure.""" + +from pathlib import Path +import tempfile + +import ibis + +from boring_semantic_layer import from_yaml, to_semantic_table +from boring_semantic_layer.ops import Dimension, Measure + + +def _build_table(): + con = ibis.duckdb.connect(":memory:") + data = ibis.memtable( + { + "country": ["SE", "DE", "GB"], + "sales": [10.0, 20.0, 30.0], + } + ) + return con.create_table("orders", data) + + +def test_dimension_metadata_defaults_empty(): + dim = Dimension(expr=lambda t: t.country, description="Country") + assert dim.metadata == {} + assert dim.to_json() == {"description": "Country"} + + +def test_measure_metadata_defaults_empty(): + meas = Measure(expr=lambda t: t.sales.sum(), description="Total sales") + assert meas.metadata == {} + assert meas.to_json() == {"description": "Total sales"} + + +def test_dimension_metadata_flows_into_to_json(): + dim = Dimension( + expr=lambda t: t.country, + description="Country ISO code", + is_entity=True, + metadata={ + "entity_type": "market", + "format": "iso_country", + "example_values": ["SE", "DE", "GB"], + }, + ) + payload = dim.to_json() + assert payload["description"] == "Country ISO code" + assert payload["is_entity"] is True + assert payload["entity_type"] == "market" + assert payload["format"] == "iso_country" + assert payload["example_values"] == ["SE", "DE", "GB"] + + +def test_measure_metadata_flows_into_to_json(): + meas = Measure( + expr=lambda t: t.sales.sum(), + description="Total sales in EUR", + metadata={"format": "currency_eur", "unit": "EUR", "is_additive": True}, + ) + payload = meas.to_json() + assert payload["description"] == "Total sales in EUR" + assert payload["format"] == "currency_eur" + assert payload["unit"] == "EUR" + assert payload["is_additive"] is True + + +def test_metadata_appears_in_json_definition(): + tbl = _build_table() + model = ( + to_semantic_table(tbl, name="orders") + .with_dimensions( + country=Dimension( + expr=lambda t: t.country, + description="Country", + is_entity=True, + metadata={"entity_type": "market", "format": "iso_country"}, + ) + ) + .with_measures( + total_sales=Measure( + expr=lambda t: t.sales.sum(), + description="Total sales", + metadata={"format": "currency_eur", "unit": "EUR"}, + ) + ) + ) + json_def = model.json_definition + country = json_def["dimensions"]["country"] + assert country["entity_type"] == "market" + assert country["format"] == "iso_country" + total = json_def["measures"]["total_sales"] + assert total["format"] == "currency_eur" + assert total["unit"] == "EUR" + + +def test_metadata_equality_ignored(): + a = Dimension(expr=lambda t: t.country, metadata={"format": "iso_country"}) + b = Dimension(expr=a.expr, metadata={"format": "something_else"}) + assert a == b + assert hash(a) == hash(b) + + +def test_metadata_overrides_base_fields(): + """Metadata is merged after base fields, so matching keys win.""" + dim = Dimension( + expr=lambda t: t.country, + description="real", + metadata={"description": "override"}, + ) + assert dim.to_json()["description"] == "override" + + +def test_yaml_dimension_metadata_round_trip(): + yaml_content = """ +orders: + table: orders_tbl + dimensions: + country: + expr: _.country + description: Country ISO code + is_entity: true + metadata: + entity_type: market + format: iso_country + example_values: [SE, DE, GB] + measures: + total_sales: + expr: _.sales.sum() + description: Total sales + metadata: + format: currency_eur + unit: EUR + is_additive: true +""" + con = ibis.duckdb.connect(":memory:") + data = ibis.memtable( + {"country": ["SE", "DE"], "sales": [1.0, 2.0]} + ) + tbl = con.create_table("orders_tbl", data) + + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "models.yml" + path.write_text(yaml_content) + models = from_yaml(path, tables={"orders_tbl": tbl}) + + json_def = models["orders"].json_definition + assert json_def["dimensions"]["country"]["entity_type"] == "market" + assert json_def["dimensions"]["country"]["format"] == "iso_country" + assert json_def["dimensions"]["country"]["example_values"] == ["SE", "DE", "GB"] + assert json_def["measures"]["total_sales"]["format"] == "currency_eur" + assert json_def["measures"]["total_sales"]["unit"] == "EUR" + assert json_def["measures"]["total_sales"]["is_additive"] is True diff --git a/src/boring_semantic_layer/yaml.py b/src/boring_semantic_layer/yaml.py index 71af2bf..830b86c 100644 --- a/src/boring_semantic_layer/yaml.py +++ b/src/boring_semantic_layer/yaml.py @@ -30,6 +30,8 @@ def _parse_expression_config(name: str, config: str | dict, metric_type: str): extra_kwargs["is_time_dimension"] = config.get("is_time_dimension", False) extra_kwargs["smallest_time_grain"] = config.get("smallest_time_grain") extra_kwargs["derived_dimensions"] = tuple(config.get("derived_dimensions") or ()) + if "metadata" in config: + extra_kwargs["metadata"] = dict(config["metadata"] or {}) return config["expr"], config.get("description"), extra_kwargs else: raise ValueError(f"Invalid {metric_type} format for '{name}'. Must be a string or dict") @@ -51,15 +53,15 @@ def _parse_dimension_or_measure( is_time_dimension: true/false (dimensions only) smallest_time_grain: "TIME_GRAIN_DAY" (dimensions only) derived_dimensions: ["year", "month", "day"] (dimensions only) + metadata: {format: currency_eur, unit: EUR, ...} (free-form) """ expr_str, description, extra_kwargs = _parse_expression_config(name, config, metric_type) deferred = safe_eval(expr_str, context={"_": _}).unwrap() base_kwargs = {"expr": deferred, "description": description} - return ( - Dimension(**base_kwargs, **extra_kwargs) - if metric_type == "dimension" - else Measure(**base_kwargs) - ) + if metric_type == "dimension": + return Dimension(**base_kwargs, **extra_kwargs) + measure_kwargs = {"metadata": extra_kwargs["metadata"]} if "metadata" in extra_kwargs else {} + return Measure(**base_kwargs, **measure_kwargs) def _parse_calc_measure(name: str, config: str | dict) -> Measure: @@ -77,14 +79,15 @@ def _parse_calc_measure(name: str, config: str | dict) -> Measure: pct_of_total: expr: _.distance_sum / _.all(_.distance_sum) * 100 """ - expr_str, description, _ = _parse_expression_config(name, config, "measure") + expr_str, description, extra_kwargs = _parse_expression_config(name, config, "measure") def _make_calc_fn(source: str): def calc_fn(scope): return safe_eval(source, context={"_": scope}).unwrap() return calc_fn - return Measure(expr=_make_calc_fn(expr_str), description=description) + measure_kwargs = {"metadata": extra_kwargs["metadata"]} if "metadata" in extra_kwargs else {} + return Measure(expr=_make_calc_fn(expr_str), description=description, **measure_kwargs) def _parse_filter(filter_expr: str) -> callable: