From 2f749e3681ea03480b2448b03509111ee3a3daa3 Mon Sep 17 00:00:00 2001
From: boringdata <boringdata@users.noreply.github.com>
Date: Thu, 2 Apr 2026 07:22:41 +0000
Subject: [PATCH 1/4] feat: add OSI (Open Semantic Interchange) v0.1.1
 compatibility

Add bidirectional converter between BSL and OSI YAML format:
- to_osi() / to_osi_yaml(): Export BSL models to OSI-compliant YAML
- from_osi() / from_osi_yaml(): Import OSI YAML into BSL models
- ai_context field on Dimension and Measure for LLM metadata
- Expression translation between Ibis Deferred and SQL strings
- BSL-specific metadata preserved via OSI custom_extensions
- Round-trip tested: BSL->OSI->BSL and OSI->BSL->OSI

Closes #226

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/osi-compatibility.md                   | 115 ++++
 examples/flights_osi.yaml                   | 436 ++++++++++++
 src/boring_semantic_layer/__init__.py       |  10 +
 src/boring_semantic_layer/ops.py            |  20 +-
 src/boring_semantic_layer/osi.py            | 696 ++++++++++++++++++++
 src/boring_semantic_layer/tests/test_osi.py | 551 ++++++++++++++++
 src/boring_semantic_layer/yaml.py           |   2 +
 7 files changed, 1826 insertions(+), 4 deletions(-)
 create mode 100644 docs/osi-compatibility.md
 create mode 100644 examples/flights_osi.yaml
 create mode 100644 src/boring_semantic_layer/osi.py
 create mode 100644 src/boring_semantic_layer/tests/test_osi.py

diff --git a/docs/osi-compatibility.md b/docs/osi-compatibility.md
new file mode 100644
index 00000000..e03c56d3
--- /dev/null
+++ b/docs/osi-compatibility.md
@@ -0,0 +1,115 @@
+# BSL <> OSI Compatibility Gap Analysis
+
+**OSI Spec Version**: 0.1.1 (2025-12-11)
+**BSL Version**: 0.3.11
+
+## Overview
+
+The [Open Semantic Interchange (OSI)](https://github.com/open-semantic-interchange/OSI) spec defines
+a vendor-neutral YAML format for semantic model definitions. This document maps BSL's current format
+to OSI and identifies gaps that need bridging for full compatibility.
+
+## Structural Differences
+
+| Aspect | OSI | BSL | Status |
+|--------|-----|-----|--------|
+| Top-level wrapper | `semantic_model: [{name, datasets, ...}]` | Flat model-per-key | DIFFERENT |
+| Version field | `version: "0.1.1"` (required) | None | MISSING |
+| Dataset/model | `datasets` array with `name` + `source` | Top-level keys with `table` | DIFFERENT |
+| Fields | `fields` array (name + expression object) | `dimensions` dict with `_.expr` syntax | DIFFERENT |
+| Metrics | `metrics` array at semantic_model level | `measures`/`calculated_measures` dicts at model level | DIFFERENT |
+| Relationships | `relationships` array with `from`/`to` + column arrays | `joins` dict with `model`/`type`/`left_on`/`right_on` | DIFFERENT |
+
+## Field-Level Gaps
+
+### Missing in BSL (needed for OSI export)
+
+| OSI Field | Location | Description | Priority |
+|-----------|----------|-------------|----------|
+| `ai_context` | Every level | String or structured object with instructions/synonyms/examples | HIGH |
+| `primary_key` | Dataset | Array of column names forming PK | MEDIUM |
+| `unique_keys` | Dataset | Array of unique key arrays | LOW |
+| `custom_extensions` | Every level | Vendor-specific metadata (`vendor_name` + `data` JSON) | LOW |
+| `label` | Field | Categorization label (e.g., "filter") | LOW |
+| Multi-dialect `expression` | Field/Metric | `dialects: [{dialect, expression}]` | MEDIUM |
+
+### BSL has, OSI doesn't
+
+| BSL Field | Description | Handling |
+|-----------|-------------|----------|
+| `profile` | Database connection config | Omit from OSI export |
+| `filter` | Model-level filter expression | Store in `custom_extensions` |
+| `is_entity` | Entity/PK marker on dimension | Map to `primary_key` on dataset |
+| `is_event_timestamp` | Event timestamp marker | Store in `custom_extensions` |
+| `smallest_time_grain` | Time granularity | Store in `custom_extensions` |
+| `derived_dimensions` | Auto-derived time parts | Store in `custom_extensions` |
+| `calculated_measures` | Derived metrics referencing other measures | Export as metrics with `custom_extensions` |
+| `join.type` / `join.how` | Join cardinality + method | Inferred from relationship + `custom_extensions` |
+
+## Concept Mapping
+
+### Dimensions -> Fields
+
+```
+BSL dimension with is_time_dimension=true
+  -> OSI field with dimension.is_time=true
+
+BSL dimension with is_entity=true
+  -> OSI dataset.primary_key includes this field's column
+
+BSL dimension expression: _.column_name
+  -> OSI expression.dialects[{dialect: "ANSI_SQL", expression: "column_name"}]
+
+BSL computed dimension: _.first_name.concat(' ', _.last_name)
+  -> OSI expression: "first_name || ' ' || last_name" (SQL form)
+```
+
+### Measures -> Metrics
+
+```
+BSL measure: _.column.sum()
+  -> OSI metric expression: "SUM(dataset_name.column)"
+
+BSL measure: _.count()
+  -> OSI metric expression: "COUNT(*)"
+
+BSL calculated_measure: _.meas1 / _.meas2
+  -> OSI metric with custom_extension noting it's derived
+```
+
+### Joins -> Relationships
+
+```
+BSL join:
+  carriers:
+    model: carriers
+    type: one
+    left_on: carrier
+    right_on: code
+
+  -> OSI relationship:
+    name: flights_to_carriers
+    from: flights
+    to: carriers
+    from_columns: [carrier]
+    to_columns: [code]
+```
+
+## Implementation Plan
+
+### Phase 1: Bidirectional Converter (this PR)
+
+1. **`osi.py`** - New module with `to_osi()` and `from_osi()` functions
+2. **`ai_context`** on `Dimension` and `Measure` - Optional field for round-trip fidelity
+3. **Tests** - Round-trip conversion tests
+4. **Example** - flights.yml converted to OSI format
+
+### Expression Translation Strategy
+
+BSL uses Ibis Deferred expressions (`_.column`), while OSI uses SQL strings.
+For simple column references, extraction is straightforward. For complex expressions,
+we serialize to ANSI SQL via Ibis's SQL compiler.
+
+**Simple**: `_.column_name` -> `"column_name"`
+**Computed**: `_.first_name.concat(' ', _.last_name)` -> `"first_name || ' ' || last_name"`
+**Aggregate**: `_.amount.sum()` -> `"SUM(amount)"`
diff --git a/examples/flights_osi.yaml b/examples/flights_osi.yaml
new file mode 100644
index 00000000..5d6260ae
--- /dev/null
+++ b/examples/flights_osi.yaml
@@ -0,0 +1,436 @@
+# OSI (Open Semantic Interchange) version of the flights example
+# Spec: https://github.com/open-semantic-interchange/OSI (v0.1.1)
+#
+# This is the OSI-compliant equivalent of flights.yml.
+# Use from_osi_yaml() to load this into BSL:
+#
+#   from boring_semantic_layer.osi import from_osi_yaml
+#   models = from_osi_yaml("flights_osi.yaml", tables=tables)
+
+version: "0.1.1"
+
+semantic_model:
+  - name: flights_analytics
+    description: Flight data analytics model with carriers, aircraft, and airports
+    ai_context:
+      instructions: >
+        Use this model for analyzing US domestic flight data including
+        delays, distances, carriers, aircraft, and airports.
+      examples:
+        - "What are the top 10 carriers by total flights?"
+        - "What is the average flight distance by origin airport?"
+        - "Show me the distribution of departure delays"
+
+    datasets:
+      # ----------------------------------------------------------------
+      # Carriers
+      # ----------------------------------------------------------------
+      - name: carriers
+        source: carriers_tbl
+        primary_key: [code]
+        description: Airline carrier information
+        fields:
+          - name: code
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: code
+            dimension:
+              is_time: false
+            description: Airline carrier code
+
+          - name: name
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: name
+            dimension:
+              is_time: false
+            description: Full airline name
+
+          - name: nickname
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: nickname
+            dimension:
+              is_time: false
+
+      # ----------------------------------------------------------------
+      # Aircraft Models
+      # ----------------------------------------------------------------
+      - name: aircraft_models
+        source: aircraft_models_tbl
+        primary_key: [aircraft_model_code]
+        description: Aircraft model specifications
+        fields:
+          - name: aircraft_model_code
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: aircraft_model_code
+            dimension:
+              is_time: false
+
+          - name: manufacturer
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: manufacturer
+            dimension:
+              is_time: false
+
+          - name: model
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: model
+            dimension:
+              is_time: false
+
+          - name: aircraft_type_id
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: aircraft_type_id
+            dimension:
+              is_time: false
+
+          - name: aircraft_engine_type_id
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: aircraft_engine_type_id
+            dimension:
+              is_time: false
+
+          - name: aircraft_category_id
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: aircraft_category_id
+            dimension:
+              is_time: false
+
+      # ----------------------------------------------------------------
+      # Aircraft (Individual Planes)
+      # ----------------------------------------------------------------
+      - name: aircraft
+        source: aircraft_tbl
+        primary_key: [tail_num]
+        description: Individual aircraft registry
+        fields:
+          - name: tail_num
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: tail_num
+            dimension:
+              is_time: false
+
+          - name: aircraft_serial
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: aircraft_serial
+            dimension:
+              is_time: false
+
+          - name: aircraft_model_code
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: aircraft_model_code
+            dimension:
+              is_time: false
+
+          - name: aircraft_engine_code
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: aircraft_engine_code
+            dimension:
+              is_time: false
+
+          - name: aircraft_type_id
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: aircraft_type_id
+            dimension:
+              is_time: false
+
+      # ----------------------------------------------------------------
+      # Airports
+      # ----------------------------------------------------------------
+      - name: airports
+        source: airports_tbl
+        primary_key: [code]
+        description: Airport locations and information
+        ai_context:
+          synonyms:
+            - "airport"
+            - "airfield"
+            - "aerodrome"
+        fields:
+          - name: code
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: code
+            dimension:
+              is_time: false
+
+          - name: city
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: city
+            dimension:
+              is_time: false
+
+          - name: county
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: county
+            dimension:
+              is_time: false
+
+          - name: state
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: state
+            dimension:
+              is_time: false
+
+          - name: full_name
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: full_name
+            dimension:
+              is_time: false
+
+          - name: latitude
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: latitude
+            dimension:
+              is_time: false
+
+          - name: longitude
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: longitude
+            dimension:
+              is_time: false
+
+          - name: elevation
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: elevation
+            dimension:
+              is_time: false
+
+      # ----------------------------------------------------------------
+      # Flights
+      # ----------------------------------------------------------------
+      - name: flights
+        source: flights_tbl
+        description: Flight data with origin, destination, and metrics
+        ai_context:
+          synonyms:
+            - "flight records"
+            - "air travel"
+            - "flight segments"
+        fields:
+          - name: origin
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: origin
+            dimension:
+              is_time: false
+            description: Origin airport code
+            ai_context:
+              synonyms:
+                - "departure airport"
+                - "from"
+
+          - name: destination
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: destination
+            dimension:
+              is_time: false
+            description: Destination airport code
+            ai_context:
+              synonyms:
+                - "arrival airport"
+                - "to"
+
+          - name: carrier
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: carrier
+            dimension:
+              is_time: false
+
+          - name: tail_num
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: tail_num
+            dimension:
+              is_time: false
+
+          - name: dep_time
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: dep_time
+            dimension:
+              is_time: false
+            description: Departure timestamp
+
+          - name: arr_time
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: arr_time
+            dimension:
+              is_time: true
+            description: Arrival timestamp
+            custom_extensions:
+              - vendor_name: COMMON
+                data: '{"is_event_timestamp": false, "smallest_time_grain": "TIME_GRAIN_DAY"}'
+
+          - name: distance
+            expression:
+              dialects:
+                - dialect: ANSI_SQL
+                  expression: distance
+            dimension:
+              is_time: false
+            description: Flight distance in miles
+
+    relationships:
+      - name: flights_to_carriers
+        from: flights
+        to: carriers
+        from_columns: [carrier]
+        to_columns: [code]
+
+      - name: aircraft_to_aircraft_models
+        from: aircraft
+        to: aircraft_models
+        from_columns: [aircraft_model_code]
+        to_columns: [aircraft_model_code]
+
+      - name: flights_to_aircraft
+        from: flights
+        to: aircraft
+        from_columns: [tail_num]
+        to_columns: [tail_num]
+
+      - name: flights_to_origin_airport
+        from: flights
+        to: airports
+        from_columns: [origin]
+        to_columns: [code]
+
+    metrics:
+      - name: carrier_count
+        expression:
+          dialects:
+            - dialect: ANSI_SQL
+              expression: COUNT(*)
+        description: Number of carriers
+
+      - name: model_count
+        expression:
+          dialects:
+            - dialect: ANSI_SQL
+              expression: COUNT(*)
+        description: Number of aircraft models
+
+      - name: avg_seats
+        expression:
+          dialects:
+            - dialect: ANSI_SQL
+              expression: AVG(aircraft_models.seats)
+        description: Average number of seats
+
+      - name: aircraft_count
+        expression:
+          dialects:
+            - dialect: ANSI_SQL
+              expression: COUNT(*)
+        description: Number of aircraft
+
+      - name: airport_count
+        expression:
+          dialects:
+            - dialect: ANSI_SQL
+              expression: COUNT(*)
+        description: Number of airports
+
+      - name: avg_elevation
+        expression:
+          dialects:
+            - dialect: ANSI_SQL
+              expression: AVG(airports.elevation)
+        description: Average airport elevation
+
+      - name: flight_count
+        expression:
+          dialects:
+            - dialect: ANSI_SQL
+              expression: COUNT(*)
+        description: Total number of flights
+        ai_context:
+          synonyms:
+            - "number of flights"
+            - "total flights"
+
+      - name: total_distance
+        expression:
+          dialects:
+            - dialect: ANSI_SQL
+              expression: SUM(flights.distance)
+        description: Total distance flown
+
+      - name: avg_distance
+        expression:
+          dialects:
+            - dialect: ANSI_SQL
+              expression: AVG(flights.distance)
+        description: Average flight distance
+
+      - name: max_distance
+        expression:
+          dialects:
+            - dialect: ANSI_SQL
+              expression: MAX(flights.distance)
+        description: Maximum flight distance
+
+      - name: avg_delay
+        expression:
+          dialects:
+            - dialect: ANSI_SQL
+              expression: AVG(flights.dep_delay)
+        description: Average departure delay
+
+      - name: max_delay
+        expression:
+          dialects:
+            - dialect: ANSI_SQL
+              expression: MAX(flights.dep_delay)
+        description: Maximum departure delay
diff --git a/src/boring_semantic_layer/__init__.py b/src/boring_semantic_layer/__init__.py
index fe92c358..828cebc3 100644
--- a/src/boring_semantic_layer/__init__.py
+++ b/src/boring_semantic_layer/__init__.py
@@ -43,6 +43,12 @@
     from_config,
     from_yaml,
 )
+from .osi import (
+    from_osi,
+    from_osi_yaml,
+    to_osi,
+    to_osi_yaml,
+)
 
 __all__ = [
     "to_semantic_table",
@@ -57,6 +63,10 @@
     "Measure",
     "from_config",
     "from_yaml",
+    "to_osi",
+    "to_osi_yaml",
+    "from_osi",
+    "from_osi_yaml",
     "MCPSemanticModel",
     "LangGraphBackend",
     "options",
diff --git a/src/boring_semantic_layer/ops.py b/src/boring_semantic_layer/ops.py
index 3702816f..55469dbd 100644
--- a/src/boring_semantic_layer/ops.py
+++ b/src/boring_semantic_layer/ops.py
@@ -626,22 +626,24 @@ def _infer_unnest(fn: Callable, table: Any) -> tuple[str, ...]:
     return ()
 
 
-def _extract_measure_metadata(fn_or_expr: Any) -> tuple[Any, str | None, tuple]:
+def _extract_measure_metadata(fn_or_expr: Any) -> tuple[Any, str | None, tuple, Any]:
     """Extract metadata from various measure representations."""
     if isinstance(fn_or_expr, dict):
         return (
             fn_or_expr["expr"],
             fn_or_expr.get("description"),
             tuple(fn_or_expr.get("requires_unnest", [])),
+            fn_or_expr.get("ai_context"),
         )
     elif isinstance(fn_or_expr, Measure):
         return (
             fn_or_expr.expr,
             fn_or_expr.description,
             fn_or_expr.requires_unnest,
+            fn_or_expr.ai_context,
         )
     else:
-        return (fn_or_expr, None, ())
+        return (fn_or_expr, None, (), None)
 
 
 _AGG_METHODS = frozenset({"sum", "mean", "avg", "count", "min", "max"})
@@ -733,6 +735,7 @@ def _make_base_measure(
     expr: Any,
     description: str | None,
     requires_unnest: tuple,
+    ai_context: Any = None,
 ) -> Measure:
     """Create a base measure with proper callable wrapping using functional patterns."""
 
@@ -788,6 +791,7 @@ def wrapped_expr(t):
             description=description,
             requires_unnest=requires_unnest,
             original_expr=raw_expr,
+            ai_context=ai_context,
         )
 
     if callable(expr):
@@ -806,6 +810,7 @@ def wrapped_expr(t):
             description=description,
             requires_unnest=requires_unnest,
             original_expr=raw_expr,
+            ai_context=ai_context,
         )
     else:
         return Measure(
@@ -813,12 +818,13 @@ def wrapped_expr(t):
             description=description,
             requires_unnest=requires_unnest,
             original_expr=raw_expr,
+            ai_context=ai_context,
         )
 
 
 def _classify_measure(fn_or_expr: Any, scope: Any) -> tuple[str, Any]:
     """Classify measure as 'calc' or 'base' with appropriate handling."""
-    expr, description, requires_unnest = _extract_measure_metadata(fn_or_expr)
+    expr, description, requires_unnest, ai_context = _extract_measure_metadata(fn_or_expr)
 
     resolved = safe(lambda: _resolve_expr(expr, scope))().map(
         lambda val: ("calc", val) if _is_calculated_measure(val) else None
@@ -833,7 +839,7 @@ def _classify_measure(fn_or_expr: Any, scope: Any) -> tuple[str, Any]:
         inferred_unnest = _infer_unnest(expr, table)
         requires_unnest = requires_unnest or inferred_unnest
 
-    return ("base", _make_base_measure(expr, description, requires_unnest))
+    return ("base", _make_base_measure(expr, description, requires_unnest, ai_context))
 
 
 def _build_json_definition(
@@ -953,6 +959,7 @@ class Dimension:
     is_event_timestamp: bool = False
     smallest_time_grain: str | None = None
     derived_dimensions: tuple[str, ...] = ()
+    ai_context: str | dict | None = None
 
     def __call__(self, table: ir.Table, _dims: dict | None = None) -> ir.Value:
         try:
@@ -990,6 +997,8 @@ def to_json(self) -> Mapping[str, Any]:
             base["smallest_time_grain"] = self.smallest_time_grain
         if self.derived_dimensions:
             base["derived_dimensions"] = list(self.derived_dimensions)
+        if self.ai_context:
+            base["ai_context"] = self.ai_context
         return base
 
     def __hash__(self) -> int:
@@ -1011,6 +1020,7 @@ class Measure:
     description: str | None = None
     requires_unnest: tuple[str, ...] = ()  # Internal: Arrays that must be unnested
     original_expr: Any = field(default=None, eq=False, hash=False)
+    ai_context: str | dict | None = None
 
     def __call__(self, table: ir.Table) -> ir.Value:
         return self.expr.resolve(table) if _is_deferred(self.expr) else self.expr(table)
@@ -1026,6 +1036,8 @@ def to_json(self) -> Mapping[str, Any]:
             base["locality"] = self.locality
         if self.requires_unnest:
             base["requires_unnest"] = list(self.requires_unnest)
+        if self.ai_context:
+            base["ai_context"] = self.ai_context
         return base
 
     def __hash__(self) -> int:
diff --git a/src/boring_semantic_layer/osi.py b/src/boring_semantic_layer/osi.py
new file mode 100644
index 00000000..d2802f77
--- /dev/null
+++ b/src/boring_semantic_layer/osi.py
@@ -0,0 +1,696 @@
+"""
+OSI (Open Semantic Interchange) converter for Boring Semantic Layer.
+
+Provides bidirectional conversion between BSL's semantic model format
+and the OSI v0.1.1 YAML specification.
+
+See: https://github.com/open-semantic-interchange/OSI
+"""
+
+from __future__ import annotations
+
+import re
+from collections.abc import Mapping
+from typing import Any
+
+from ibis import _
+from ibis.common.deferred import Deferred
+
+from .api import to_semantic_table
+from .expr import SemanticModel, SemanticTable
+from .ops import Dimension, Measure, SemanticTableOp, _is_deferred
+from .utils import expr_to_ibis_string, safe_eval
+
+OSI_VERSION = "0.1.1"
+BSL_VENDOR = "BSL"
+
+
+# ---------------------------------------------------------------------------
+# Expression helpers
+# ---------------------------------------------------------------------------
+
+
+def _deferred_to_sql(expr: Deferred) -> str:
+    """Convert an Ibis Deferred expression to a simple SQL-like string.
+
+    Handles common patterns:
+      _.column_name           -> "column_name"
+      _.column.sum()          -> "SUM(column)"
+      _.column.mean()         -> "AVG(column)"
+      _.column.max()          -> "MAX(column)"
+      _.column.min()          -> "MIN(column)"
+      _.column.nunique()      -> "COUNT(DISTINCT column)"
+      _.count()               -> "COUNT(*)"
+    """
+    s = str(expr)  # e.g. "_.column_name" or "_.column.sum()"
+    return _ibis_string_to_sql(s)
+
+
+def _ibis_string_to_sql(s: str) -> str:
+    """Convert an Ibis deferred string repr to SQL expression."""
+    s = s.strip()
+
+    # _.count() -> COUNT(*)
+    if s == "_.count()":
+        return "COUNT(*)"
+
+    # _.col.agg() patterns
+    agg_map = {
+        "sum": "SUM",
+        "mean": "AVG",
+        "max": "MAX",
+        "min": "MIN",
+    }
+    for ibis_fn, sql_fn in agg_map.items():
+        pattern = rf"^_\.(.+)\.{ibis_fn}\(\)$"
+        m = re.match(pattern, s)
+        if m:
+            return f"{sql_fn}({m.group(1)})"
+
+    # _.col.nunique() -> COUNT(DISTINCT col)
+    m = re.match(r"^_\.(.+)\.nunique\(\)$", s)
+    if m:
+        return f"COUNT(DISTINCT {m.group(1)})"
+
+    # Simple column reference: _.col -> col
+    m = re.match(r"^_\.(\w+)$", s)
+    if m:
+        return m.group(1)
+
+    # Fallback: strip leading "_." and return as-is
+    if s.startswith("_."):
+        return s[2:]
+    return s
+
+
+def _expr_to_sql_string(expr: Any) -> str | None:
+    """Best-effort conversion of a BSL expression to a SQL string."""
+    if _is_deferred(expr):
+        return _deferred_to_sql(expr)
+
+    # Try the ibis string extraction utility
+    from returns.result import Success
+
+    result = expr_to_ibis_string(expr)
+    if isinstance(result, Success):
+        val = result.unwrap()
+        if val is not None:
+            return _ibis_string_to_sql(val)
+
+    return None
+
+
+def _sql_to_deferred(sql: str) -> Deferred:
+    """Convert a simple SQL expression back to an Ibis Deferred.
+
+    Handles:
+      "column_name"                -> _.column_name
+      "SUM(column)"                -> _.column.sum()
+      "AVG(column)"                -> _.column.mean()
+      "COUNT(*)"                   -> _.count()
+      "COUNT(DISTINCT column)"     -> _.column.nunique()
+    """
+    sql = sql.strip()
+
+    if sql == "COUNT(*)":
+        return safe_eval("_.count()", context={"_": _}).unwrap()
+
+    # COUNT(DISTINCT col)
+    m = re.match(r"^COUNT\(DISTINCT\s+(\w+)\)$", sql, re.IGNORECASE)
+    if m:
+        return safe_eval(f"_.{m.group(1)}.nunique()", context={"_": _}).unwrap()
+
+    # AGG(col) patterns
+    sql_to_ibis = {"SUM": "sum", "AVG": "mean", "MAX": "max", "MIN": "min"}
+    for sql_fn, ibis_fn in sql_to_ibis.items():
+        m = re.match(rf"^{sql_fn}\((\w+)\)$", sql, re.IGNORECASE)
+        if m:
+            return safe_eval(f"_.{m.group(1)}.{ibis_fn}()", context={"_": _}).unwrap()
+
+    # Simple column reference
+    if re.match(r"^\w+$", sql):
+        return safe_eval(f"_.{sql}", context={"_": _}).unwrap()
+
+    # Fallback: try eval as-is with underscore prefix
+    try:
+        return safe_eval(f"_.{sql}", context={"_": _}).unwrap()
+    except Exception:
+        # Last resort: return as deferred column access
+        return safe_eval(f"_.{sql.split('.')[0] if '.' in sql else sql}", context={"_": _}).unwrap()
+
+
+# ---------------------------------------------------------------------------
+# Export: BSL -> OSI
+# ---------------------------------------------------------------------------
+
+
+def _make_osi_expression(sql_expr: str, dialect: str = "ANSI_SQL") -> dict:
+    """Create an OSI expression object."""
+    return {"dialects": [{"dialect": dialect, "expression": sql_expr}]}
+
+
+def _dimension_to_osi_field(name: str, dim: Dimension) -> dict:
+    """Convert a BSL Dimension to an OSI field dict."""
+    sql = _expr_to_sql_string(dim.expr)
+    field: dict[str, Any] = {
+        "name": name,
+        "expression": _make_osi_expression(sql or name),
+    }
+
+    if dim.is_time_dimension or dim.is_event_timestamp:
+        field["dimension"] = {"is_time": True}
+    else:
+        field["dimension"] = {"is_time": False}
+
+    if dim.description:
+        field["description"] = dim.description
+
+    if dim.ai_context:
+        field["ai_context"] = dim.ai_context
+
+    # Store BSL-specific metadata in custom_extensions
+    bsl_data: dict[str, Any] = {}
+    if dim.is_entity:
+        bsl_data["is_entity"] = True
+    if dim.is_event_timestamp:
+        bsl_data["is_event_timestamp"] = True
+    if dim.smallest_time_grain:
+        bsl_data["smallest_time_grain"] = dim.smallest_time_grain
+    if dim.derived_dimensions:
+        bsl_data["derived_dimensions"] = list(dim.derived_dimensions)
+
+    if bsl_data:
+        field["custom_extensions"] = [
+            {"vendor_name": "COMMON", "data": _json_dumps(bsl_data)}
+        ]
+
+    return field
+
+
+def _measure_to_osi_metric(
+    name: str, measure: Measure, dataset_name: str | None = None
+) -> dict:
+    """Convert a BSL Measure to an OSI metric dict."""
+    sql = _expr_to_sql_string(measure.expr)
+    if sql and dataset_name:
+        # Prefix column references in aggregate functions with dataset name
+        # e.g. SUM(amount) -> SUM(dataset.amount)
+        sql = _prefix_columns_in_sql(sql, dataset_name)
+
+    metric: dict[str, Any] = {
+        "name": name,
+        "expression": _make_osi_expression(sql or name),
+    }
+
+    if measure.description:
+        metric["description"] = measure.description
+
+    if measure.ai_context:
+        metric["ai_context"] = measure.ai_context
+
+    return metric
+
+
+def _prefix_columns_in_sql(sql: str, dataset: str) -> str:
+    """Add dataset prefix to bare column references inside aggregate functions.
+
+    SUM(amount) -> SUM(dataset.amount)
+    COUNT(*) stays as COUNT(*)
+    COUNT(DISTINCT col) -> COUNT(DISTINCT dataset.col)
+    """
+    if sql == "COUNT(*)":
+        return sql
+
+    # Handle aggregate functions: FN(col) -> FN(dataset.col)
+    def _prefix_match(m: re.Match) -> str:
+        fn = m.group(1)
+        inner = m.group(2).strip()
+        # Handle DISTINCT keyword
+        if inner.upper().startswith("DISTINCT "):
+            col = inner[9:].strip()
+            if "." not in col:
+                return f"{fn}(DISTINCT {dataset}.{col})"
+            return m.group(0)
+        if "." not in inner and inner != "*":
+            return f"{fn}({dataset}.{inner})"
+        return m.group(0)
+
+    return re.sub(r"(\w+)\(([^)]+)\)", _prefix_match, sql)
+
+
+def _json_dumps(obj: Any) -> str:
+    """Serialize to JSON string for custom_extensions."""
+    import json
+
+    return json.dumps(obj)
+
+
+def _extract_join_info(model: SemanticModel) -> list[dict]:
+    """Extract relationship info from a model's join chain.
+
+    Returns list of OSI relationship dicts by inspecting the model's
+    SemanticJoinOp chain.
+    """
+    relationships = []
+    op = model.op()
+
+    # Walk up the op tree looking for SemanticJoinOp nodes
+    from .ops import SemanticJoinOp
+
+    def _walk_joins(node, parent_name: str | None = None):
+        if isinstance(node, SemanticJoinOp):
+            rel: dict[str, Any] = {
+                "name": f"{_get_model_name(node.left)}_{_get_model_name(node.right)}",
+                "from": _get_model_name(node.left),
+                "to": _get_model_name(node.right),
+            }
+            # Try to extract join columns from the predicate
+            left_cols, right_cols = _extract_join_columns(node)
+            if left_cols and right_cols:
+                rel["from_columns"] = left_cols
+                rel["to_columns"] = right_cols
+            else:
+                rel["from_columns"] = ["unknown"]
+                rel["to_columns"] = ["unknown"]
+
+            # Store cardinality in custom_extensions
+            if hasattr(node, "cardinality"):
+                rel["custom_extensions"] = [
+                    {
+                        "vendor_name": "COMMON",
+                        "data": _json_dumps({"cardinality": node.cardinality}),
+                    }
+                ]
+
+            relationships.append(rel)
+            _walk_joins(node.left, _get_model_name(node.left))
+            _walk_joins(node.right, _get_model_name(node.right))
+        elif isinstance(node, SemanticTableOp):
+            pass  # Base case
+        elif hasattr(node, "table"):
+            _walk_joins(node.table if not hasattr(node.table, "op") else node.table.op())
+
+    _walk_joins(op)
+    return relationships
+
+
+def _get_model_name(node) -> str:
+    """Extract model name from an op node."""
+    if isinstance(node, SemanticTableOp):
+        return node.name or "unnamed"
+    if hasattr(node, "name") and node.name:
+        return node.name
+    if hasattr(node, "table"):
+        inner = node.table if not hasattr(node.table, "op") else node.table.op()
+        return _get_model_name(inner)
+    return "unnamed"
+
+
+def _extract_join_columns(join_op) -> tuple[list[str], list[str]]:
+    """Try to extract column names from a join predicate. Returns (left_cols, right_cols)."""
+    # This is best-effort; join predicates are lambdas and hard to introspect
+    return [], []
+
+
+def to_osi(
+    models: dict[str, SemanticModel] | SemanticModel,
+    name: str = "semantic_model",
+    description: str | None = None,
+    ai_context: str | dict | None = None,
+) -> dict[str, Any]:
+    """Convert BSL SemanticModel(s) to an OSI-compliant dict.
+
+    Args:
+        models: A single SemanticModel or dict of name -> SemanticModel
+        name: Name for the OSI semantic model
+        description: Optional description
+        ai_context: Optional AI context
+
+    Returns:
+        Dict that can be serialized to OSI YAML via yaml.dump()
+
+    Example:
+        >>> from boring_semantic_layer import from_yaml
+        >>> from boring_semantic_layer.osi import to_osi
+        >>> models = from_yaml("flights.yml")
+        >>> osi = to_osi(models, name="flights_analytics")
+        >>> import yaml
+        >>> print(yaml.dump(osi, sort_keys=False))
+    """
+    if isinstance(models, (SemanticModel, SemanticTable)):
+        op = models.op()
+        model_name = op.name or "model"
+        models = {model_name: models}
+
+    datasets = []
+    all_metrics: list[dict] = []
+    all_relationships: list[dict] = []
+    seen_relationship_names: set[str] = set()
+
+    for model_name, model in models.items():
+        op = model.op()
+
+        # --- Dataset ---
+        dataset: dict[str, Any] = {"name": model_name}
+
+        # Source: try to get table name
+        try:
+            source_table = op.to_untagged()
+            if hasattr(source_table, "get_name"):
+                dataset["source"] = source_table.get_name()
+            elif hasattr(source_table, "op") and hasattr(source_table.op(), "name"):
+                dataset["source"] = source_table.op().name or model_name
+            else:
+                dataset["source"] = model_name
+        except Exception:
+            dataset["source"] = model_name
+
+        # Primary key from entity dimensions
+        pk_cols = []
+        dims = op.get_dimensions()
+        for dim_name, dim in dims.items():
+            if dim.is_entity:
+                sql = _expr_to_sql_string(dim.expr)
+                pk_cols.append(sql or dim_name)
+        if pk_cols:
+            dataset["primary_key"] = pk_cols
+
+        # Description
+        if op.description:
+            dataset["description"] = op.description
+
+        # Fields from dimensions
+        fields = []
+        for dim_name, dim in dims.items():
+            fields.append(_dimension_to_osi_field(dim_name, dim))
+        if fields:
+            dataset["fields"] = fields
+
+        datasets.append(dataset)
+
+        # --- Metrics from measures ---
+        measures = op.get_measures()
+        for meas_name, meas in measures.items():
+            all_metrics.append(_measure_to_osi_metric(meas_name, meas, model_name))
+
+        # --- Metrics from calculated measures ---
+        calc_measures = op.get_calculated_measures()
+        for cm_name, cm_fn in calc_measures.items():
+            metric: dict[str, Any] = {
+                "name": cm_name,
+                "expression": _make_osi_expression(cm_name),
+            }
+            if isinstance(cm_fn, Measure) and cm_fn.description:
+                metric["description"] = cm_fn.description
+            metric.setdefault("custom_extensions", []).append(
+                {
+                    "vendor_name": "COMMON",
+                    "data": _json_dumps({"bsl_type": "calculated_measure"}),
+                }
+            )
+            all_metrics.append(metric)
+
+        # --- Relationships from joins ---
+        rels = _extract_join_info(model)
+        for rel in rels:
+            if rel["name"] not in seen_relationship_names:
+                all_relationships.append(rel)
+                seen_relationship_names.add(rel["name"])
+
+    # Build the OSI document
+    semantic_model: dict[str, Any] = {"name": name, "datasets": datasets}
+
+    if description:
+        semantic_model["description"] = description
+    if ai_context:
+        semantic_model["ai_context"] = ai_context
+    if all_relationships:
+        semantic_model["relationships"] = all_relationships
+    if all_metrics:
+        semantic_model["metrics"] = all_metrics
+
+    return {"version": OSI_VERSION, "semantic_model": [semantic_model]}
+
+
+def to_osi_yaml(
+    models: dict[str, SemanticModel] | SemanticModel,
+    name: str = "semantic_model",
+    description: str | None = None,
+    ai_context: str | dict | None = None,
+) -> str:
+    """Convert BSL models to an OSI YAML string.
+
+    Convenience wrapper around to_osi() that returns a formatted YAML string.
+    """
+    import yaml
+
+    osi_dict = to_osi(models, name=name, description=description, ai_context=ai_context)
+    return yaml.dump(osi_dict, sort_keys=False, default_flow_style=False)
+
+
+# ---------------------------------------------------------------------------
+# Import: OSI -> BSL
+# ---------------------------------------------------------------------------
+
+
+def _parse_osi_expression(expr_obj: dict, prefer_dialect: str = "ANSI_SQL") -> str:
+    """Extract the SQL expression string from an OSI expression object.
+
+    Prefers the specified dialect, falls back to the first available.
+    """
+    dialects = expr_obj.get("dialects", [])
+    if not dialects:
+        raise ValueError("OSI expression has no dialects")
+
+    # Try preferred dialect first
+    for d in dialects:
+        if d.get("dialect") == prefer_dialect:
+            return d["expression"]
+
+    # Fallback to first
+    return dialects[0]["expression"]
+
+
+def _osi_field_to_dimension(field: dict) -> tuple[str, Dimension]:
+    """Convert an OSI field dict to a (name, BSL Dimension) tuple."""
+    name = field["name"]
+    sql_expr = _parse_osi_expression(field["expression"])
+    deferred = _sql_to_deferred(sql_expr)
+
+    kwargs: dict[str, Any] = {
+        "expr": deferred,
+        "description": field.get("description"),
+    }
+
+    # Dimension metadata
+    dim_meta = field.get("dimension", {})
+    if dim_meta.get("is_time"):
+        kwargs["is_time_dimension"] = True
+
+    # AI context
+    if "ai_context" in field:
+        kwargs["ai_context"] = field["ai_context"]
+
+    # BSL-specific from custom_extensions
+    import json
+
+    for ext in field.get("custom_extensions", []):
+        if ext.get("vendor_name") == "COMMON":
+            try:
+                data = json.loads(ext["data"])
+                if data.get("is_entity"):
+                    kwargs["is_entity"] = True
+                if data.get("is_event_timestamp"):
+                    kwargs["is_event_timestamp"] = True
+                if data.get("smallest_time_grain"):
+                    kwargs["smallest_time_grain"] = data["smallest_time_grain"]
+                if data.get("derived_dimensions"):
+                    kwargs["derived_dimensions"] = tuple(data["derived_dimensions"])
+            except (json.JSONDecodeError, KeyError):
+                pass
+
+    return name, Dimension(**kwargs)
+
+
+def _osi_metric_to_measure(metric: dict) -> tuple[str, Measure]:
+    """Convert an OSI metric dict to a (name, BSL Measure) tuple."""
+    name = metric["name"]
+    sql_expr = _parse_osi_expression(metric["expression"])
+
+    # Strip dataset prefix from column refs for BSL (which scopes per-model)
+    # e.g., "SUM(flights.distance)" -> "SUM(distance)" via deferred
+    sql_expr = _strip_dataset_prefix(sql_expr)
+    deferred = _sql_to_deferred(sql_expr)
+
+    kwargs: dict[str, Any] = {
+        "expr": deferred,
+        "description": metric.get("description"),
+    }
+
+    if "ai_context" in metric:
+        kwargs["ai_context"] = metric["ai_context"]
+
+    return name, Measure(**kwargs)
+
+
+def _strip_dataset_prefix(sql: str) -> str:
+    """Remove dataset.column prefixes from SQL, keeping just column name.
+
+    SUM(flights.distance) -> SUM(distance)
+    COUNT(DISTINCT customers.id) -> COUNT(DISTINCT id)
+    """
+
+    def _strip_match(m: re.Match) -> str:
+        fn = m.group(1)
+        inner = m.group(2).strip()
+        # Handle DISTINCT
+        if inner.upper().startswith("DISTINCT "):
+            rest = inner[9:].strip()
+            if "." in rest:
+                col = rest.split(".")[-1]
+                return f"{fn}(DISTINCT {col})"
+            return m.group(0)
+        if "." in inner and inner != "*":
+            col = inner.split(".")[-1]
+            return f"{fn}({col})"
+        return m.group(0)
+
+    return re.sub(r"(\w+)\(([^)]+)\)", _strip_match, sql)
+
+
+def from_osi(
+    osi_config: dict[str, Any],
+    tables: Mapping[str, Any] | None = None,
+) -> dict[str, SemanticModel]:
+    """Convert an OSI YAML dict to BSL SemanticModel instances.
+
+    Args:
+        osi_config: Parsed OSI YAML dict (as returned by yaml.safe_load)
+        tables: Optional mapping of dataset names to ibis table expressions.
+                If not provided, models are created without backing tables
+                (metadata-only, useful for inspection but not query execution).
+
+    Returns:
+        Dict mapping model names to SemanticModel instances
+
+    Example:
+        >>> import yaml
+        >>> from boring_semantic_layer.osi import from_osi
+        >>> with open("model.osi.yaml") as f:
+        ...     osi = yaml.safe_load(f)
+        >>> models = from_osi(osi, tables={"flights": con.table("flights")})
+    """
+    tables = dict(tables) if tables else {}
+
+    semantic_models = osi_config.get("semantic_model", [])
+    if not semantic_models:
+        raise ValueError("No semantic_model found in OSI config")
+
+    result: dict[str, SemanticModel] = {}
+
+    for sm in semantic_models:
+        datasets = sm.get("datasets", [])
+        metrics = sm.get("metrics", [])
+        relationships = sm.get("relationships", [])
+
+        # Build a mapping of dataset_name -> metrics that reference it
+        # (by checking column prefixes in metric expressions)
+        dataset_names = {ds["name"] for ds in datasets}
+
+        for ds in datasets:
+            ds_name = ds["name"]
+
+            # Get or create table
+            if ds_name in tables:
+                table = tables[ds_name]
+            elif ds.get("source") and ds["source"] in tables:
+                table = tables[ds["source"]]
+            else:
+                # Create a dummy table from field schema for metadata-only use
+                table = _create_placeholder_table(ds)
+                if table is None:
+                    continue
+
+            # Create semantic model
+            model = to_semantic_table(table, name=ds_name, description=ds.get("description"))
+
+            # Parse fields into dimensions
+            dimensions: dict[str, Dimension] = {}
+            for field in ds.get("fields", []):
+                dim_name, dim = _osi_field_to_dimension(field)
+                dimensions[dim_name] = dim
+
+            if dimensions:
+                model = model.with_dimensions(**dimensions)
+
+            # Find metrics that belong to this dataset
+            ds_measures: dict[str, Measure] = {}
+            for metric in metrics:
+                sql_expr = _parse_osi_expression(metric["expression"])
+                # Check if metric references this dataset (or has no prefix)
+                if f"{ds_name}." in sql_expr or not any(
+                    f"{other}." in sql_expr for other in dataset_names if other != ds_name
+                ):
+                    meas_name, meas = _osi_metric_to_measure(metric)
+                    ds_measures[meas_name] = meas
+
+            if ds_measures:
+                model = model.with_measures(**ds_measures)
+
+            result[ds_name] = model
+
+        # Apply relationships as joins (if tables are provided)
+        if tables and relationships:
+            for rel in relationships:
+                from_ds = rel.get("from", "")
+                to_ds = rel.get("to", "")
+                if from_ds in result and to_ds in result:
+                    from_cols = rel.get("from_columns", [])
+                    to_cols = rel.get("to_columns", [])
+                    if from_cols and to_cols and from_cols[0] != "unknown":
+                        left_col = from_cols[0]
+                        right_col = to_cols[0]
+
+                        def make_join_cond(lc, rc):
+                            return lambda left, right: getattr(left, lc) == getattr(right, rc)
+
+                        result[from_ds] = result[from_ds].join_one(
+                            result[to_ds],
+                            on=make_join_cond(left_col, right_col),
+                        )
+
+    return result
+
+
+def from_osi_yaml(
+    yaml_path: str,
+    tables: Mapping[str, Any] | None = None,
+) -> dict[str, SemanticModel]:
+    """Load BSL models from an OSI YAML file.
+
+    Convenience wrapper around from_osi() that reads and parses the YAML file.
+    """
+    from .utils import read_yaml_file
+
+    osi_config = read_yaml_file(yaml_path)
+    return from_osi(osi_config, tables=tables)
+
+
+def _create_placeholder_table(dataset: dict) -> Any:
+    """Create a placeholder ibis table from OSI field definitions.
+
+    This allows metadata inspection without a real database connection.
+    Returns None if no fields are defined.
+    """
+    import ibis
+
+    fields = dataset.get("fields", [])
+    if not fields:
+        return None
+
+    # Create a simple schema from field names (all as string type as placeholder)
+    schema = {f["name"]: "string" for f in fields}
+    try:
+        return ibis.table(schema, name=dataset["name"])
+    except Exception:
+        return None
diff --git a/src/boring_semantic_layer/tests/test_osi.py b/src/boring_semantic_layer/tests/test_osi.py
new file mode 100644
index 00000000..f1d52e46
--- /dev/null
+++ b/src/boring_semantic_layer/tests/test_osi.py
@@ -0,0 +1,551 @@
+"""Tests for OSI (Open Semantic Interchange) converter."""
+
+import json
+
+import ibis
+import pytest
+
+from boring_semantic_layer import (
+    Dimension,
+    Measure,
+    to_semantic_table,
+)
+from boring_semantic_layer.osi import (
+    OSI_VERSION,
+    _deferred_to_sql,
+    _ibis_string_to_sql,
+    _sql_to_deferred,
+    _strip_dataset_prefix,
+    from_osi,
+    to_osi,
+    to_osi_yaml,
+)
+
+
+# ---------------------------------------------------------------------------
+# Expression conversion tests
+# ---------------------------------------------------------------------------
+
+
+class TestIbisStringToSql:
+    def test_simple_column(self):
+        assert _ibis_string_to_sql("_.column_name") == "column_name"
+
+    def test_count(self):
+        assert _ibis_string_to_sql("_.count()") == "COUNT(*)"
+
+    def test_sum(self):
+        assert _ibis_string_to_sql("_.amount.sum()") == "SUM(amount)"
+
+    def test_mean(self):
+        assert _ibis_string_to_sql("_.amount.mean()") == "AVG(amount)"
+
+    def test_max(self):
+        assert _ibis_string_to_sql("_.amount.max()") == "MAX(amount)"
+
+    def test_min(self):
+        assert _ibis_string_to_sql("_.amount.min()") == "MIN(amount)"
+
+    def test_nunique(self):
+        assert _ibis_string_to_sql("_.customer_id.nunique()") == "COUNT(DISTINCT customer_id)"
+
+
+class TestDeferredToSql:
+    def test_simple_column(self):
+        from ibis import _
+
+        assert _deferred_to_sql(_.column_name) == "column_name"
+
+    def test_count(self):
+        from ibis import _
+
+        assert _deferred_to_sql(_.count()) == "COUNT(*)"
+
+    def test_sum(self):
+        from ibis import _
+
+        result = _deferred_to_sql(_.amount.sum())
+        assert result == "SUM(amount)"
+
+    def test_mean(self):
+        from ibis import _
+
+        result = _deferred_to_sql(_.amount.mean())
+        assert result == "AVG(amount)"
+
+
+class TestSqlToDeferred:
+    def test_simple_column(self):
+        d = _sql_to_deferred("column_name")
+        # Verify it's a Deferred by checking str representation
+        assert "column_name" in str(d)
+
+    def test_count_star(self):
+        d = _sql_to_deferred("COUNT(*)")
+        assert "count" in str(d).lower()
+
+    def test_sum(self):
+        d = _sql_to_deferred("SUM(amount)")
+        assert "sum" in str(d).lower() or "amount" in str(d).lower()
+
+    def test_avg(self):
+        d = _sql_to_deferred("AVG(price)")
+        assert "mean" in str(d).lower() or "price" in str(d).lower()
+
+
+class TestStripDatasetPrefix:
+    def test_sum_with_prefix(self):
+        assert _strip_dataset_prefix("SUM(flights.distance)") == "SUM(distance)"
+
+    def test_count_star(self):
+        assert _strip_dataset_prefix("COUNT(*)") == "COUNT(*)"
+
+    def test_count_distinct_with_prefix(self):
+        assert (
+            _strip_dataset_prefix("COUNT(DISTINCT customers.id)")
+            == "COUNT(DISTINCT id)"
+        )
+
+    def test_no_prefix(self):
+        assert _strip_dataset_prefix("SUM(distance)") == "SUM(distance)"
+
+
+# ---------------------------------------------------------------------------
+# Export tests: BSL -> OSI
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def simple_model():
+    """A simple BSL model with dimensions and measures."""
+    table = ibis.table(
+        {"order_id": "int64", "customer_id": "int64", "amount": "float64", "created_at": "timestamp"},
+        name="orders",
+    )
+    model = to_semantic_table(table, name="orders", description="Order transactions")
+    model = model.with_dimensions(
+        order_id=Dimension(expr=ibis._.order_id, description="Order ID", is_entity=True),
+        customer_id=Dimension(expr=ibis._.customer_id, description="Customer ID"),
+        created_at=Dimension(
+            expr=ibis._.created_at,
+            description="Order creation timestamp",
+            is_time_dimension=True,
+            smallest_time_grain="TIME_GRAIN_DAY",
+        ),
+    )
+    model = model.with_measures(
+        order_count=Measure(expr=ibis._.count(), description="Total orders"),
+        total_amount=Measure(expr=ibis._.amount.sum(), description="Total order amount"),
+        avg_amount=Measure(expr=ibis._.amount.mean(), description="Average order amount"),
+    )
+    return model
+
+
+@pytest.fixture
+def model_with_ai_context():
+    """A BSL model with ai_context on dimensions and measures."""
+    table = ibis.table(
+        {"product_id": "int64", "name": "string", "price": "float64"},
+        name="products",
+    )
+    model = to_semantic_table(table, name="products", description="Product catalog")
+    model = model.with_dimensions(
+        product_id=Dimension(
+            expr=ibis._.product_id,
+            description="Product identifier",
+            is_entity=True,
+            ai_context={"synonyms": ["SKU", "item ID"]},
+        ),
+        name=Dimension(
+            expr=ibis._.name,
+            description="Product name",
+            ai_context="Product display name shown to customers",
+        ),
+    )
+    model = model.with_measures(
+        avg_price=Measure(
+            expr=ibis._.price.mean(),
+            description="Average product price",
+            ai_context={"synonyms": ["mean price", "price average"]},
+        ),
+    )
+    return model
+
+
+class TestToOsi:
+    def test_basic_structure(self, simple_model):
+        osi = to_osi(simple_model, name="test_model")
+        assert osi["version"] == OSI_VERSION
+        assert len(osi["semantic_model"]) == 1
+        sm = osi["semantic_model"][0]
+        assert sm["name"] == "test_model"
+        assert "datasets" in sm
+
+    def test_dataset_fields(self, simple_model):
+        osi = to_osi(simple_model)
+        ds = osi["semantic_model"][0]["datasets"][0]
+        assert ds["name"] == "orders"
+        assert ds["description"] == "Order transactions"
+        assert "fields" in ds
+        field_names = {f["name"] for f in ds["fields"]}
+        assert "order_id" in field_names
+        assert "customer_id" in field_names
+        assert "created_at" in field_names
+
+    def test_primary_key_from_entity(self, simple_model):
+        osi = to_osi(simple_model)
+        ds = osi["semantic_model"][0]["datasets"][0]
+        assert "primary_key" in ds
+        assert "order_id" in ds["primary_key"]
+
+    def test_time_dimension(self, simple_model):
+        osi = to_osi(simple_model)
+        ds = osi["semantic_model"][0]["datasets"][0]
+        created_at = next(f for f in ds["fields"] if f["name"] == "created_at")
+        assert created_at["dimension"]["is_time"] is True
+
+    def test_non_time_dimension(self, simple_model):
+        osi = to_osi(simple_model)
+        ds = osi["semantic_model"][0]["datasets"][0]
+        customer = next(f for f in ds["fields"] if f["name"] == "customer_id")
+        assert customer["dimension"]["is_time"] is False
+
+    def test_metrics(self, simple_model):
+        osi = to_osi(simple_model)
+        sm = osi["semantic_model"][0]
+        assert "metrics" in sm
+        metric_names = {m["name"] for m in sm["metrics"]}
+        assert "order_count" in metric_names
+        assert "total_amount" in metric_names
+        assert "avg_amount" in metric_names
+
+    def test_metric_expressions(self, simple_model):
+        osi = to_osi(simple_model)
+        sm = osi["semantic_model"][0]
+        count_metric = next(m for m in sm["metrics"] if m["name"] == "order_count")
+        expr = count_metric["expression"]
+        assert "dialects" in expr
+        assert len(expr["dialects"]) >= 1
+        assert expr["dialects"][0]["dialect"] == "ANSI_SQL"
+
+    def test_field_expression_format(self, simple_model):
+        osi = to_osi(simple_model)
+        ds = osi["semantic_model"][0]["datasets"][0]
+        order_id_field = next(f for f in ds["fields"] if f["name"] == "order_id")
+        expr = order_id_field["expression"]
+        assert "dialects" in expr
+        assert expr["dialects"][0]["dialect"] == "ANSI_SQL"
+        assert expr["dialects"][0]["expression"] == "order_id"
+
+    def test_custom_extensions_for_bsl_metadata(self, simple_model):
+        osi = to_osi(simple_model)
+        ds = osi["semantic_model"][0]["datasets"][0]
+        order_id_field = next(f for f in ds["fields"] if f["name"] == "order_id")
+        assert "custom_extensions" in order_id_field
+        ext = order_id_field["custom_extensions"][0]
+        assert ext["vendor_name"] == "COMMON"
+        data = json.loads(ext["data"])
+        assert data["is_entity"] is True
+
+    def test_time_grain_in_custom_extensions(self, simple_model):
+        osi = to_osi(simple_model)
+        ds = osi["semantic_model"][0]["datasets"][0]
+        created_at_field = next(f for f in ds["fields"] if f["name"] == "created_at")
+        ext = created_at_field["custom_extensions"][0]
+        data = json.loads(ext["data"])
+        assert data["smallest_time_grain"] == "TIME_GRAIN_DAY"
+
+    def test_ai_context_on_dimensions(self, model_with_ai_context):
+        osi = to_osi(model_with_ai_context)
+        ds = osi["semantic_model"][0]["datasets"][0]
+        product_field = next(f for f in ds["fields"] if f["name"] == "product_id")
+        assert "ai_context" in product_field
+        assert product_field["ai_context"]["synonyms"] == ["SKU", "item ID"]
+
+    def test_ai_context_on_metrics(self, model_with_ai_context):
+        osi = to_osi(model_with_ai_context)
+        sm = osi["semantic_model"][0]
+        avg_price = next(m for m in sm["metrics"] if m["name"] == "avg_price")
+        assert "ai_context" in avg_price
+        assert avg_price["ai_context"]["synonyms"] == ["mean price", "price average"]
+
+    def test_with_description_and_ai_context(self, simple_model):
+        osi = to_osi(
+            simple_model,
+            name="my_model",
+            description="A test model",
+            ai_context={"instructions": "Use for order analysis"},
+        )
+        sm = osi["semantic_model"][0]
+        assert sm["description"] == "A test model"
+        assert sm["ai_context"]["instructions"] == "Use for order analysis"
+
+    def test_multiple_models(self):
+        orders = ibis.table({"id": "int64", "amount": "float64"}, name="orders")
+        customers = ibis.table({"id": "int64", "name": "string"}, name="customers")
+
+        m1 = to_semantic_table(orders, name="orders")
+        m1 = m1.with_dimensions(id=Dimension(expr=ibis._.id))
+        m1 = m1.with_measures(total=Measure(expr=ibis._.amount.sum()))
+
+        m2 = to_semantic_table(customers, name="customers")
+        m2 = m2.with_dimensions(id=Dimension(expr=ibis._.id))
+
+        osi = to_osi({"orders": m1, "customers": m2}, name="ecommerce")
+        assert len(osi["semantic_model"][0]["datasets"]) == 2
+
+
+class TestToOsiYaml:
+    def test_yaml_output(self, simple_model):
+        yaml_str = to_osi_yaml(simple_model, name="test")
+        assert "version:" in yaml_str
+        assert "semantic_model:" in yaml_str
+        assert "datasets:" in yaml_str
+
+
+# ---------------------------------------------------------------------------
+# Import tests: OSI -> BSL
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def osi_config():
+    """A minimal OSI config dict."""
+    return {
+        "version": "0.1.1",
+        "semantic_model": [
+            {
+                "name": "test_model",
+                "description": "Test semantic model",
+                "datasets": [
+                    {
+                        "name": "orders",
+                        "source": "orders_table",
+                        "primary_key": ["order_id"],
+                        "description": "Order transactions",
+                        "fields": [
+                            {
+                                "name": "order_id",
+                                "expression": {
+                                    "dialects": [
+                                        {"dialect": "ANSI_SQL", "expression": "order_id"}
+                                    ]
+                                },
+                                "dimension": {"is_time": False},
+                                "description": "Order identifier",
+                                "custom_extensions": [
+                                    {
+                                        "vendor_name": "COMMON",
+                                        "data": json.dumps({"is_entity": True}),
+                                    }
+                                ],
+                            },
+                            {
+                                "name": "created_at",
+                                "expression": {
+                                    "dialects": [
+                                        {"dialect": "ANSI_SQL", "expression": "created_at"}
+                                    ]
+                                },
+                                "dimension": {"is_time": True},
+                                "description": "Creation timestamp",
+                            },
+                            {
+                                "name": "amount",
+                                "expression": {
+                                    "dialects": [
+                                        {"dialect": "ANSI_SQL", "expression": "amount"}
+                                    ]
+                                },
+                                "dimension": {"is_time": False},
+                            },
+                        ],
+                    },
+                ],
+                "metrics": [
+                    {
+                        "name": "total_amount",
+                        "expression": {
+                            "dialects": [
+                                {"dialect": "ANSI_SQL", "expression": "SUM(orders.amount)"}
+                            ]
+                        },
+                        "description": "Total order amount",
+                    },
+                    {
+                        "name": "order_count",
+                        "expression": {
+                            "dialects": [
+                                {"dialect": "ANSI_SQL", "expression": "COUNT(*)"}
+                            ]
+                        },
+                        "description": "Number of orders",
+                    },
+                ],
+            }
+        ],
+    }
+
+
+class TestFromOsi:
+    def test_basic_import(self, osi_config):
+        models = from_osi(osi_config)
+        assert "orders" in models
+
+    def test_model_description(self, osi_config):
+        models = from_osi(osi_config)
+        op = models["orders"].op()
+        assert op.description == "Order transactions"
+
+    def test_dimensions_imported(self, osi_config):
+        models = from_osi(osi_config)
+        dims = models["orders"].op().get_dimensions()
+        assert "order_id" in dims
+        assert "created_at" in dims
+        assert "amount" in dims
+
+    def test_dimension_descriptions(self, osi_config):
+        models = from_osi(osi_config)
+        dims = models["orders"].op().get_dimensions()
+        assert dims["order_id"].description == "Order identifier"
+        assert dims["created_at"].description == "Creation timestamp"
+
+    def test_time_dimension_flag(self, osi_config):
+        models = from_osi(osi_config)
+        dims = models["orders"].op().get_dimensions()
+        assert dims["created_at"].is_time_dimension is True
+        assert dims["order_id"].is_time_dimension is False
+
+    def test_entity_from_custom_extensions(self, osi_config):
+        models = from_osi(osi_config)
+        dims = models["orders"].op().get_dimensions()
+        assert dims["order_id"].is_entity is True
+
+    def test_measures_imported(self, osi_config):
+        models = from_osi(osi_config)
+        measures = models["orders"].op().get_measures()
+        assert "total_amount" in measures
+        assert "order_count" in measures
+
+    def test_measure_descriptions(self, osi_config):
+        models = from_osi(osi_config)
+        measures = models["orders"].op().get_measures()
+        assert measures["total_amount"].description == "Total order amount"
+        assert measures["order_count"].description == "Number of orders"
+
+    def test_with_real_table(self, osi_config):
+        """Test import with a real DuckDB table backing."""
+        con = ibis.duckdb.connect()
+        con.raw_sql(
+            "CREATE TABLE orders_table (order_id INT, created_at TIMESTAMP, amount DOUBLE)"
+        )
+        table = con.table("orders_table")
+        models = from_osi(osi_config, tables={"orders": table})
+        assert "orders" in models
+
+    def test_ai_context_preserved(self):
+        config = {
+            "version": "0.1.1",
+            "semantic_model": [
+                {
+                    "name": "test",
+                    "datasets": [
+                        {
+                            "name": "items",
+                            "source": "items",
+                            "fields": [
+                                {
+                                    "name": "item_id",
+                                    "expression": {
+                                        "dialects": [
+                                            {"dialect": "ANSI_SQL", "expression": "item_id"}
+                                        ]
+                                    },
+                                    "ai_context": {"synonyms": ["SKU", "product_id"]},
+                                }
+                            ],
+                        }
+                    ],
+                }
+            ],
+        }
+        models = from_osi(config)
+        dims = models["items"].op().get_dimensions()
+        assert dims["item_id"].ai_context == {"synonyms": ["SKU", "product_id"]}
+
+
+# ---------------------------------------------------------------------------
+# Round-trip tests
+# ---------------------------------------------------------------------------
+
+
+class TestRoundTrip:
+    def test_bsl_to_osi_to_bsl(self, simple_model):
+        """BSL -> OSI -> BSL preserves key semantics."""
+        # Export
+        osi = to_osi(simple_model, name="round_trip_test")
+
+        # Import back
+        models = from_osi(osi)
+        assert "orders" in models
+
+        # Check dimensions preserved
+        orig_dims = simple_model.op().get_dimensions()
+        new_dims = models["orders"].op().get_dimensions()
+        assert set(orig_dims.keys()) == set(new_dims.keys())
+
+        # Check descriptions preserved
+        for name in orig_dims:
+            assert orig_dims[name].description == new_dims[name].description
+
+        # Check time dimension flag preserved
+        assert new_dims["created_at"].is_time_dimension is True
+        assert new_dims["order_id"].is_time_dimension is False
+
+        # Check entity flag preserved (via custom_extensions round-trip)
+        assert new_dims["order_id"].is_entity is True
+
+    def test_bsl_to_osi_to_bsl_measures(self, simple_model):
+        """BSL -> OSI -> BSL preserves measure semantics."""
+        osi = to_osi(simple_model)
+        models = from_osi(osi)
+
+        orig_measures = simple_model.op().get_measures()
+        new_measures = models["orders"].op().get_measures()
+
+        # All measures should be present
+        assert set(orig_measures.keys()) == set(new_measures.keys())
+
+        # Descriptions preserved
+        for name in orig_measures:
+            assert orig_measures[name].description == new_measures[name].description
+
+    def test_osi_to_bsl_to_osi(self, osi_config):
+        """OSI -> BSL -> OSI preserves key structure."""
+        # Import
+        models = from_osi(osi_config)
+
+        # Export back
+        osi_out = to_osi(models, name="round_trip")
+
+        # Check structure
+        assert osi_out["version"] == OSI_VERSION
+        ds_out = osi_out["semantic_model"][0]["datasets"][0]
+        assert ds_out["name"] == "orders"
+        assert ds_out["description"] == "Order transactions"
+
+        # Field count preserved
+        ds_in = osi_config["semantic_model"][0]["datasets"][0]
+        assert len(ds_out["fields"]) == len(ds_in["fields"])
+
+    def test_ai_context_round_trip(self, model_with_ai_context):
+        """ai_context survives BSL -> OSI -> BSL round-trip."""
+        osi = to_osi(model_with_ai_context)
+        models = from_osi(osi)
+
+        dims = models["products"].op().get_dimensions()
+        assert dims["product_id"].ai_context == {"synonyms": ["SKU", "item ID"]}
+        assert dims["name"].ai_context == "Product display name shown to customers"
+
+        measures = models["products"].op().get_measures()
+        assert measures["avg_price"].ai_context == {"synonyms": ["mean price", "price average"]}
diff --git a/src/boring_semantic_layer/yaml.py b/src/boring_semantic_layer/yaml.py
index 71af2bfc..25f92d46 100644
--- a/src/boring_semantic_layer/yaml.py
+++ b/src/boring_semantic_layer/yaml.py
@@ -30,6 +30,8 @@ def _parse_expression_config(name: str, config: str | dict, metric_type: str):
             extra_kwargs["is_time_dimension"] = config.get("is_time_dimension", False)
             extra_kwargs["smallest_time_grain"] = config.get("smallest_time_grain")
             extra_kwargs["derived_dimensions"] = tuple(config.get("derived_dimensions") or ())
+        if "ai_context" in config:
+            extra_kwargs["ai_context"] = config["ai_context"]
         return config["expr"], config.get("description"), extra_kwargs
     else:
         raise ValueError(f"Invalid {metric_type} format for '{name}'. Must be a string or dict")

From 387a6d5b4a76e11947c49caff72280c241af1787 Mon Sep 17 00:00:00 2001
From: boringdata <boringdata@users.noreply.github.com>
Date: Thu, 2 Apr 2026 07:29:53 +0000
Subject: [PATCH 2/4] refactor: make from_yaml/from_config natively parse OSI
 YAML
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of a separate from_osi() conversion layer, from_config() now
auto-detects OSI format (version + semantic_model keys) and parses it
directly. This means from_yaml("model.osi.yaml") just works — BSL
natively speaks OSI.

- OSI parsing logic moved from osi.py into yaml.py
- osi.py slimmed to export-only (to_osi/to_osi_yaml) + expression helpers
- from_osi/from_osi_yaml kept as thin aliases to from_config/from_yaml
- Removed from_osi/from_osi_yaml from top-level __init__.py exports
- Tests updated to use from_config for OSI import (the native path)
- Added format detection tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/boring_semantic_layer/__init__.py       |   4 -
 src/boring_semantic_layer/osi.py            | 514 ++++----------------
 src/boring_semantic_layer/tests/test_osi.py | 299 ++++--------
 src/boring_semantic_layer/yaml.py           | 380 ++++++++++++---
 4 files changed, 492 insertions(+), 705 deletions(-)

diff --git a/src/boring_semantic_layer/__init__.py b/src/boring_semantic_layer/__init__.py
index 828cebc3..608d05bd 100644
--- a/src/boring_semantic_layer/__init__.py
+++ b/src/boring_semantic_layer/__init__.py
@@ -44,8 +44,6 @@
     from_yaml,
 )
 from .osi import (
-    from_osi,
-    from_osi_yaml,
     to_osi,
     to_osi_yaml,
 )
@@ -65,8 +63,6 @@
     "from_yaml",
     "to_osi",
     "to_osi_yaml",
-    "from_osi",
-    "from_osi_yaml",
     "MCPSemanticModel",
     "LangGraphBackend",
     "options",
diff --git a/src/boring_semantic_layer/osi.py b/src/boring_semantic_layer/osi.py
index d2802f77..c7b734ab 100644
--- a/src/boring_semantic_layer/osi.py
+++ b/src/boring_semantic_layer/osi.py
@@ -1,8 +1,10 @@
 """
-OSI (Open Semantic Interchange) converter for Boring Semantic Layer.
+OSI (Open Semantic Interchange) support for Boring Semantic Layer.
 
-Provides bidirectional conversion between BSL's semantic model format
-and the OSI v0.1.1 YAML specification.
+Export: ``to_osi`` / ``to_osi_yaml`` convert BSL models to OSI v0.1.1 YAML.
+Import: ``from_yaml`` / ``from_config`` natively detect and parse OSI YAML
+        (no separate import step needed).  ``from_osi`` / ``from_osi_yaml``
+        are kept as convenience aliases.
 
 See: https://github.com/open-semantic-interchange/OSI
 """
@@ -13,71 +15,46 @@
 from collections.abc import Mapping
 from typing import Any
 
-from ibis import _
 from ibis.common.deferred import Deferred
 
-from .api import to_semantic_table
 from .expr import SemanticModel, SemanticTable
 from .ops import Dimension, Measure, SemanticTableOp, _is_deferred
-from .utils import expr_to_ibis_string, safe_eval
+from .utils import expr_to_ibis_string
 
 OSI_VERSION = "0.1.1"
-BSL_VENDOR = "BSL"
 
 
 # ---------------------------------------------------------------------------
-# Expression helpers
+# Expression helpers  (BSL -> SQL for OSI export)
 # ---------------------------------------------------------------------------
 
 
 def _deferred_to_sql(expr: Deferred) -> str:
-    """Convert an Ibis Deferred expression to a simple SQL-like string.
-
-    Handles common patterns:
-      _.column_name           -> "column_name"
-      _.column.sum()          -> "SUM(column)"
-      _.column.mean()         -> "AVG(column)"
-      _.column.max()          -> "MAX(column)"
-      _.column.min()          -> "MIN(column)"
-      _.column.nunique()      -> "COUNT(DISTINCT column)"
-      _.count()               -> "COUNT(*)"
-    """
-    s = str(expr)  # e.g. "_.column_name" or "_.column.sum()"
-    return _ibis_string_to_sql(s)
+    """Convert an Ibis Deferred expression to a simple SQL-like string."""
+    return _ibis_string_to_sql(str(expr))
 
 
 def _ibis_string_to_sql(s: str) -> str:
     """Convert an Ibis deferred string repr to SQL expression."""
     s = s.strip()
 
-    # _.count() -> COUNT(*)
     if s == "_.count()":
         return "COUNT(*)"
 
-    # _.col.agg() patterns
-    agg_map = {
-        "sum": "SUM",
-        "mean": "AVG",
-        "max": "MAX",
-        "min": "MIN",
-    }
+    agg_map = {"sum": "SUM", "mean": "AVG", "max": "MAX", "min": "MIN"}
     for ibis_fn, sql_fn in agg_map.items():
-        pattern = rf"^_\.(.+)\.{ibis_fn}\(\)$"
-        m = re.match(pattern, s)
+        m = re.match(rf"^_\.(.+)\.{ibis_fn}\(\)$", s)
         if m:
             return f"{sql_fn}({m.group(1)})"
 
-    # _.col.nunique() -> COUNT(DISTINCT col)
     m = re.match(r"^_\.(.+)\.nunique\(\)$", s)
     if m:
         return f"COUNT(DISTINCT {m.group(1)})"
 
-    # Simple column reference: _.col -> col
     m = re.match(r"^_\.(\w+)$", s)
     if m:
         return m.group(1)
 
-    # Fallback: strip leading "_." and return as-is
     if s.startswith("_."):
         return s[2:]
     return s
@@ -88,7 +65,6 @@ def _expr_to_sql_string(expr: Any) -> str | None:
     if _is_deferred(expr):
         return _deferred_to_sql(expr)
 
-    # Try the ibis string extraction utility
     from returns.result import Success
 
     result = expr_to_ibis_string(expr)
@@ -100,52 +76,17 @@ def _expr_to_sql_string(expr: Any) -> str | None:
     return None
 
 
-def _sql_to_deferred(sql: str) -> Deferred:
-    """Convert a simple SQL expression back to an Ibis Deferred.
-
-    Handles:
-      "column_name"                -> _.column_name
-      "SUM(column)"                -> _.column.sum()
-      "AVG(column)"                -> _.column.mean()
-      "COUNT(*)"                   -> _.count()
-      "COUNT(DISTINCT column)"     -> _.column.nunique()
-    """
-    sql = sql.strip()
-
-    if sql == "COUNT(*)":
-        return safe_eval("_.count()", context={"_": _}).unwrap()
-
-    # COUNT(DISTINCT col)
-    m = re.match(r"^COUNT\(DISTINCT\s+(\w+)\)$", sql, re.IGNORECASE)
-    if m:
-        return safe_eval(f"_.{m.group(1)}.nunique()", context={"_": _}).unwrap()
-
-    # AGG(col) patterns
-    sql_to_ibis = {"SUM": "sum", "AVG": "mean", "MAX": "max", "MIN": "min"}
-    for sql_fn, ibis_fn in sql_to_ibis.items():
-        m = re.match(rf"^{sql_fn}\((\w+)\)$", sql, re.IGNORECASE)
-        if m:
-            return safe_eval(f"_.{m.group(1)}.{ibis_fn}()", context={"_": _}).unwrap()
-
-    # Simple column reference
-    if re.match(r"^\w+$", sql):
-        return safe_eval(f"_.{sql}", context={"_": _}).unwrap()
-
-    # Fallback: try eval as-is with underscore prefix
-    try:
-        return safe_eval(f"_.{sql}", context={"_": _}).unwrap()
-    except Exception:
-        # Last resort: return as deferred column access
-        return safe_eval(f"_.{sql.split('.')[0] if '.' in sql else sql}", context={"_": _}).unwrap()
-
-
 # ---------------------------------------------------------------------------
-# Export: BSL -> OSI
+# Export helpers
 # ---------------------------------------------------------------------------
 
 
+def _json_dumps(obj: Any) -> str:
+    import json
+    return json.dumps(obj)
+
+
 def _make_osi_expression(sql_expr: str, dialect: str = "ANSI_SQL") -> dict:
-    """Create an OSI expression object."""
     return {"dialects": [{"dialect": dialect, "expression": sql_expr}]}
 
 
@@ -157,18 +98,13 @@ def _dimension_to_osi_field(name: str, dim: Dimension) -> dict:
         "expression": _make_osi_expression(sql or name),
     }
 
-    if dim.is_time_dimension or dim.is_event_timestamp:
-        field["dimension"] = {"is_time": True}
-    else:
-        field["dimension"] = {"is_time": False}
+    field["dimension"] = {"is_time": bool(dim.is_time_dimension or dim.is_event_timestamp)}
 
     if dim.description:
         field["description"] = dim.description
-
     if dim.ai_context:
         field["ai_context"] = dim.ai_context
 
-    # Store BSL-specific metadata in custom_extensions
     bsl_data: dict[str, Any] = {}
     if dim.is_entity:
         bsl_data["is_entity"] = True
@@ -187,45 +123,13 @@ def _dimension_to_osi_field(name: str, dim: Dimension) -> dict:
     return field
 
 
-def _measure_to_osi_metric(
-    name: str, measure: Measure, dataset_name: str | None = None
-) -> dict:
-    """Convert a BSL Measure to an OSI metric dict."""
-    sql = _expr_to_sql_string(measure.expr)
-    if sql and dataset_name:
-        # Prefix column references in aggregate functions with dataset name
-        # e.g. SUM(amount) -> SUM(dataset.amount)
-        sql = _prefix_columns_in_sql(sql, dataset_name)
-
-    metric: dict[str, Any] = {
-        "name": name,
-        "expression": _make_osi_expression(sql or name),
-    }
-
-    if measure.description:
-        metric["description"] = measure.description
-
-    if measure.ai_context:
-        metric["ai_context"] = measure.ai_context
-
-    return metric
-
-
 def _prefix_columns_in_sql(sql: str, dataset: str) -> str:
-    """Add dataset prefix to bare column references inside aggregate functions.
-
-    SUM(amount) -> SUM(dataset.amount)
-    COUNT(*) stays as COUNT(*)
-    COUNT(DISTINCT col) -> COUNT(DISTINCT dataset.col)
-    """
+    """``SUM(amount)`` -> ``SUM(dataset.amount)``"""
     if sql == "COUNT(*)":
         return sql
 
-    # Handle aggregate functions: FN(col) -> FN(dataset.col)
     def _prefix_match(m: re.Match) -> str:
-        fn = m.group(1)
-        inner = m.group(2).strip()
-        # Handle DISTINCT keyword
+        fn, inner = m.group(1), m.group(2).strip()
         if inner.upper().startswith("DISTINCT "):
             col = inner[9:].strip()
             if "." not in col:
@@ -238,78 +142,65 @@ def _prefix_match(m: re.Match) -> str:
     return re.sub(r"(\w+)\(([^)]+)\)", _prefix_match, sql)
 
 
-def _json_dumps(obj: Any) -> str:
-    """Serialize to JSON string for custom_extensions."""
-    import json
+def _measure_to_osi_metric(name: str, measure: Measure, dataset_name: str | None = None) -> dict:
+    sql = _expr_to_sql_string(measure.expr)
+    if sql and dataset_name:
+        sql = _prefix_columns_in_sql(sql, dataset_name)
 
-    return json.dumps(obj)
+    metric: dict[str, Any] = {
+        "name": name,
+        "expression": _make_osi_expression(sql or name),
+    }
+    if measure.description:
+        metric["description"] = measure.description
+    if measure.ai_context:
+        metric["ai_context"] = measure.ai_context
+    return metric
 
 
 def _extract_join_info(model: SemanticModel) -> list[dict]:
-    """Extract relationship info from a model's join chain.
+    """Extract relationship info from a model's join chain."""
+    from .ops import SemanticJoinOp
 
-    Returns list of OSI relationship dicts by inspecting the model's
-    SemanticJoinOp chain.
-    """
-    relationships = []
+    relationships: list[dict] = []
     op = model.op()
 
-    # Walk up the op tree looking for SemanticJoinOp nodes
-    from .ops import SemanticJoinOp
-
-    def _walk_joins(node, parent_name: str | None = None):
+    def _name(node) -> str:
+        if isinstance(node, SemanticTableOp):
+            return node.name or "unnamed"
+        if hasattr(node, "name") and node.name:
+            return node.name
+        if hasattr(node, "table"):
+            inner = node.table if not hasattr(node.table, "op") else node.table.op()
+            return _name(inner)
+        return "unnamed"
+
+    def _walk(node):
         if isinstance(node, SemanticJoinOp):
             rel: dict[str, Any] = {
-                "name": f"{_get_model_name(node.left)}_{_get_model_name(node.right)}",
-                "from": _get_model_name(node.left),
-                "to": _get_model_name(node.right),
+                "name": f"{_name(node.left)}_{_name(node.right)}",
+                "from": _name(node.left),
+                "to": _name(node.right),
+                "from_columns": ["unknown"],
+                "to_columns": ["unknown"],
             }
-            # Try to extract join columns from the predicate
-            left_cols, right_cols = _extract_join_columns(node)
-            if left_cols and right_cols:
-                rel["from_columns"] = left_cols
-                rel["to_columns"] = right_cols
-            else:
-                rel["from_columns"] = ["unknown"]
-                rel["to_columns"] = ["unknown"]
-
-            # Store cardinality in custom_extensions
             if hasattr(node, "cardinality"):
                 rel["custom_extensions"] = [
-                    {
-                        "vendor_name": "COMMON",
-                        "data": _json_dumps({"cardinality": node.cardinality}),
-                    }
+                    {"vendor_name": "COMMON", "data": _json_dumps({"cardinality": node.cardinality})}
                 ]
-
             relationships.append(rel)
-            _walk_joins(node.left, _get_model_name(node.left))
-            _walk_joins(node.right, _get_model_name(node.right))
-        elif isinstance(node, SemanticTableOp):
-            pass  # Base case
+            _walk(node.left)
+            _walk(node.right)
         elif hasattr(node, "table"):
-            _walk_joins(node.table if not hasattr(node.table, "op") else node.table.op())
+            _walk(node.table if not hasattr(node.table, "op") else node.table.op())
 
-    _walk_joins(op)
+    _walk(op)
     return relationships
 
 
-def _get_model_name(node) -> str:
-    """Extract model name from an op node."""
-    if isinstance(node, SemanticTableOp):
-        return node.name or "unnamed"
-    if hasattr(node, "name") and node.name:
-        return node.name
-    if hasattr(node, "table"):
-        inner = node.table if not hasattr(node.table, "op") else node.table.op()
-        return _get_model_name(inner)
-    return "unnamed"
-
-
-def _extract_join_columns(join_op) -> tuple[list[str], list[str]]:
-    """Try to extract column names from a join predicate. Returns (left_cols, right_cols)."""
-    # This is best-effort; join predicates are lambdas and hard to introspect
-    return [], []
+# ---------------------------------------------------------------------------
+# Public: Export BSL -> OSI
+# ---------------------------------------------------------------------------
 
 
 def to_osi(
@@ -327,15 +218,14 @@ def to_osi(
         ai_context: Optional AI context
 
     Returns:
-        Dict that can be serialized to OSI YAML via yaml.dump()
+        Dict that can be serialized to OSI YAML via ``yaml.dump()``
+
+    Example::
 
-    Example:
         >>> from boring_semantic_layer import from_yaml
         >>> from boring_semantic_layer.osi import to_osi
         >>> models = from_yaml("flights.yml")
         >>> osi = to_osi(models, name="flights_analytics")
-        >>> import yaml
-        >>> print(yaml.dump(osi, sort_keys=False))
     """
     if isinstance(models, (SemanticModel, SemanticTable)):
         op = models.op()
@@ -350,10 +240,8 @@ def to_osi(
     for model_name, model in models.items():
         op = model.op()
 
-        # --- Dataset ---
         dataset: dict[str, Any] = {"name": model_name}
 
-        # Source: try to get table name
         try:
             source_table = op.to_untagged()
             if hasattr(source_table, "get_name"):
@@ -365,7 +253,6 @@ def to_osi(
         except Exception:
             dataset["source"] = model_name
 
-        # Primary key from entity dimensions
         pk_cols = []
         dims = op.get_dimensions()
         for dim_name, dim in dims.items():
@@ -375,27 +262,19 @@ def to_osi(
         if pk_cols:
             dataset["primary_key"] = pk_cols
 
-        # Description
         if op.description:
             dataset["description"] = op.description
 
-        # Fields from dimensions
-        fields = []
-        for dim_name, dim in dims.items():
-            fields.append(_dimension_to_osi_field(dim_name, dim))
+        fields = [_dimension_to_osi_field(n, d) for n, d in dims.items()]
         if fields:
             dataset["fields"] = fields
 
         datasets.append(dataset)
 
-        # --- Metrics from measures ---
-        measures = op.get_measures()
-        for meas_name, meas in measures.items():
+        for meas_name, meas in op.get_measures().items():
             all_metrics.append(_measure_to_osi_metric(meas_name, meas, model_name))
 
-        # --- Metrics from calculated measures ---
-        calc_measures = op.get_calculated_measures()
-        for cm_name, cm_fn in calc_measures.items():
+        for cm_name, cm_fn in op.get_calculated_measures().items():
             metric: dict[str, Any] = {
                 "name": cm_name,
                 "expression": _make_osi_expression(cm_name),
@@ -403,23 +282,16 @@ def to_osi(
             if isinstance(cm_fn, Measure) and cm_fn.description:
                 metric["description"] = cm_fn.description
             metric.setdefault("custom_extensions", []).append(
-                {
-                    "vendor_name": "COMMON",
-                    "data": _json_dumps({"bsl_type": "calculated_measure"}),
-                }
+                {"vendor_name": "COMMON", "data": _json_dumps({"bsl_type": "calculated_measure"})}
             )
             all_metrics.append(metric)
 
-        # --- Relationships from joins ---
-        rels = _extract_join_info(model)
-        for rel in rels:
+        for rel in _extract_join_info(model):
             if rel["name"] not in seen_relationship_names:
                 all_relationships.append(rel)
                 seen_relationship_names.add(rel["name"])
 
-    # Build the OSI document
     semantic_model: dict[str, Any] = {"name": name, "datasets": datasets}
-
     if description:
         semantic_model["description"] = description
     if ai_context:
@@ -438,228 +310,32 @@ def to_osi_yaml(
     description: str | None = None,
     ai_context: str | dict | None = None,
 ) -> str:
-    """Convert BSL models to an OSI YAML string.
-
-    Convenience wrapper around to_osi() that returns a formatted YAML string.
-    """
+    """Convert BSL models to an OSI YAML string."""
     import yaml
-
-    osi_dict = to_osi(models, name=name, description=description, ai_context=ai_context)
-    return yaml.dump(osi_dict, sort_keys=False, default_flow_style=False)
+    return yaml.dump(
+        to_osi(models, name=name, description=description, ai_context=ai_context),
+        sort_keys=False,
+        default_flow_style=False,
+    )
 
 
 # ---------------------------------------------------------------------------
-# Import: OSI -> BSL
+# Convenience aliases — import delegates to from_config (which auto-detects)
 # ---------------------------------------------------------------------------
 
 
-def _parse_osi_expression(expr_obj: dict, prefer_dialect: str = "ANSI_SQL") -> str:
-    """Extract the SQL expression string from an OSI expression object.
-
-    Prefers the specified dialect, falls back to the first available.
-    """
-    dialects = expr_obj.get("dialects", [])
-    if not dialects:
-        raise ValueError("OSI expression has no dialects")
-
-    # Try preferred dialect first
-    for d in dialects:
-        if d.get("dialect") == prefer_dialect:
-            return d["expression"]
-
-    # Fallback to first
-    return dialects[0]["expression"]
-
-
-def _osi_field_to_dimension(field: dict) -> tuple[str, Dimension]:
-    """Convert an OSI field dict to a (name, BSL Dimension) tuple."""
-    name = field["name"]
-    sql_expr = _parse_osi_expression(field["expression"])
-    deferred = _sql_to_deferred(sql_expr)
-
-    kwargs: dict[str, Any] = {
-        "expr": deferred,
-        "description": field.get("description"),
-    }
-
-    # Dimension metadata
-    dim_meta = field.get("dimension", {})
-    if dim_meta.get("is_time"):
-        kwargs["is_time_dimension"] = True
-
-    # AI context
-    if "ai_context" in field:
-        kwargs["ai_context"] = field["ai_context"]
-
-    # BSL-specific from custom_extensions
-    import json
-
-    for ext in field.get("custom_extensions", []):
-        if ext.get("vendor_name") == "COMMON":
-            try:
-                data = json.loads(ext["data"])
-                if data.get("is_entity"):
-                    kwargs["is_entity"] = True
-                if data.get("is_event_timestamp"):
-                    kwargs["is_event_timestamp"] = True
-                if data.get("smallest_time_grain"):
-                    kwargs["smallest_time_grain"] = data["smallest_time_grain"]
-                if data.get("derived_dimensions"):
-                    kwargs["derived_dimensions"] = tuple(data["derived_dimensions"])
-            except (json.JSONDecodeError, KeyError):
-                pass
-
-    return name, Dimension(**kwargs)
-
-
-def _osi_metric_to_measure(metric: dict) -> tuple[str, Measure]:
-    """Convert an OSI metric dict to a (name, BSL Measure) tuple."""
-    name = metric["name"]
-    sql_expr = _parse_osi_expression(metric["expression"])
-
-    # Strip dataset prefix from column refs for BSL (which scopes per-model)
-    # e.g., "SUM(flights.distance)" -> "SUM(distance)" via deferred
-    sql_expr = _strip_dataset_prefix(sql_expr)
-    deferred = _sql_to_deferred(sql_expr)
-
-    kwargs: dict[str, Any] = {
-        "expr": deferred,
-        "description": metric.get("description"),
-    }
-
-    if "ai_context" in metric:
-        kwargs["ai_context"] = metric["ai_context"]
-
-    return name, Measure(**kwargs)
-
-
-def _strip_dataset_prefix(sql: str) -> str:
-    """Remove dataset.column prefixes from SQL, keeping just column name.
-
-    SUM(flights.distance) -> SUM(distance)
-    COUNT(DISTINCT customers.id) -> COUNT(DISTINCT id)
-    """
-
-    def _strip_match(m: re.Match) -> str:
-        fn = m.group(1)
-        inner = m.group(2).strip()
-        # Handle DISTINCT
-        if inner.upper().startswith("DISTINCT "):
-            rest = inner[9:].strip()
-            if "." in rest:
-                col = rest.split(".")[-1]
-                return f"{fn}(DISTINCT {col})"
-            return m.group(0)
-        if "." in inner and inner != "*":
-            col = inner.split(".")[-1]
-            return f"{fn}({col})"
-        return m.group(0)
-
-    return re.sub(r"(\w+)\(([^)]+)\)", _strip_match, sql)
-
-
 def from_osi(
     osi_config: dict[str, Any],
     tables: Mapping[str, Any] | None = None,
 ) -> dict[str, SemanticModel]:
-    """Convert an OSI YAML dict to BSL SemanticModel instances.
-
-    Args:
-        osi_config: Parsed OSI YAML dict (as returned by yaml.safe_load)
-        tables: Optional mapping of dataset names to ibis table expressions.
-                If not provided, models are created without backing tables
-                (metadata-only, useful for inspection but not query execution).
+    """Parse an OSI config dict into BSL models.
 
-    Returns:
-        Dict mapping model names to SemanticModel instances
-
-    Example:
-        >>> import yaml
-        >>> from boring_semantic_layer.osi import from_osi
-        >>> with open("model.osi.yaml") as f:
-        ...     osi = yaml.safe_load(f)
-        >>> models = from_osi(osi, tables={"flights": con.table("flights")})
+    This is a convenience alias for ``from_config(osi_config, tables=tables)``.
+    You can also call ``from_config`` or ``from_yaml`` directly — they
+    auto-detect OSI format.
     """
-    tables = dict(tables) if tables else {}
-
-    semantic_models = osi_config.get("semantic_model", [])
-    if not semantic_models:
-        raise ValueError("No semantic_model found in OSI config")
-
-    result: dict[str, SemanticModel] = {}
-
-    for sm in semantic_models:
-        datasets = sm.get("datasets", [])
-        metrics = sm.get("metrics", [])
-        relationships = sm.get("relationships", [])
-
-        # Build a mapping of dataset_name -> metrics that reference it
-        # (by checking column prefixes in metric expressions)
-        dataset_names = {ds["name"] for ds in datasets}
-
-        for ds in datasets:
-            ds_name = ds["name"]
-
-            # Get or create table
-            if ds_name in tables:
-                table = tables[ds_name]
-            elif ds.get("source") and ds["source"] in tables:
-                table = tables[ds["source"]]
-            else:
-                # Create a dummy table from field schema for metadata-only use
-                table = _create_placeholder_table(ds)
-                if table is None:
-                    continue
-
-            # Create semantic model
-            model = to_semantic_table(table, name=ds_name, description=ds.get("description"))
-
-            # Parse fields into dimensions
-            dimensions: dict[str, Dimension] = {}
-            for field in ds.get("fields", []):
-                dim_name, dim = _osi_field_to_dimension(field)
-                dimensions[dim_name] = dim
-
-            if dimensions:
-                model = model.with_dimensions(**dimensions)
-
-            # Find metrics that belong to this dataset
-            ds_measures: dict[str, Measure] = {}
-            for metric in metrics:
-                sql_expr = _parse_osi_expression(metric["expression"])
-                # Check if metric references this dataset (or has no prefix)
-                if f"{ds_name}." in sql_expr or not any(
-                    f"{other}." in sql_expr for other in dataset_names if other != ds_name
-                ):
-                    meas_name, meas = _osi_metric_to_measure(metric)
-                    ds_measures[meas_name] = meas
-
-            if ds_measures:
-                model = model.with_measures(**ds_measures)
-
-            result[ds_name] = model
-
-        # Apply relationships as joins (if tables are provided)
-        if tables and relationships:
-            for rel in relationships:
-                from_ds = rel.get("from", "")
-                to_ds = rel.get("to", "")
-                if from_ds in result and to_ds in result:
-                    from_cols = rel.get("from_columns", [])
-                    to_cols = rel.get("to_columns", [])
-                    if from_cols and to_cols and from_cols[0] != "unknown":
-                        left_col = from_cols[0]
-                        right_col = to_cols[0]
-
-                        def make_join_cond(lc, rc):
-                            return lambda left, right: getattr(left, lc) == getattr(right, rc)
-
-                        result[from_ds] = result[from_ds].join_one(
-                            result[to_ds],
-                            on=make_join_cond(left_col, right_col),
-                        )
-
-    return result
+    from .yaml import from_config
+    return from_config(osi_config, tables=tables)
 
 
 def from_osi_yaml(
@@ -668,29 +344,7 @@ def from_osi_yaml(
 ) -> dict[str, SemanticModel]:
     """Load BSL models from an OSI YAML file.
 
-    Convenience wrapper around from_osi() that reads and parses the YAML file.
-    """
-    from .utils import read_yaml_file
-
-    osi_config = read_yaml_file(yaml_path)
-    return from_osi(osi_config, tables=tables)
-
-
-def _create_placeholder_table(dataset: dict) -> Any:
-    """Create a placeholder ibis table from OSI field definitions.
-
-    This allows metadata inspection without a real database connection.
-    Returns None if no fields are defined.
+    This is a convenience alias for ``from_yaml(yaml_path, tables=tables)``.
     """
-    import ibis
-
-    fields = dataset.get("fields", [])
-    if not fields:
-        return None
-
-    # Create a simple schema from field names (all as string type as placeholder)
-    schema = {f["name"]: "string" for f in fields}
-    try:
-        return ibis.table(schema, name=dataset["name"])
-    except Exception:
-        return None
+    from .yaml import from_yaml
+    return from_yaml(yaml_path, tables=tables)
diff --git a/src/boring_semantic_layer/tests/test_osi.py b/src/boring_semantic_layer/tests/test_osi.py
index f1d52e46..164a9101 100644
--- a/src/boring_semantic_layer/tests/test_osi.py
+++ b/src/boring_semantic_layer/tests/test_osi.py
@@ -1,4 +1,9 @@
-"""Tests for OSI (Open Semantic Interchange) converter."""
+"""Tests for OSI (Open Semantic Interchange) support.
+
+Import tests use ``from_config`` (the native entry point) to verify that OSI
+format is auto-detected and parsed correctly — no separate ``from_osi`` call
+needed.  Export tests use ``to_osi`` / ``to_osi_yaml``.
+"""
 
 import json
 
@@ -8,18 +13,21 @@
 from boring_semantic_layer import (
     Dimension,
     Measure,
+    from_config,
     to_semantic_table,
 )
 from boring_semantic_layer.osi import (
     OSI_VERSION,
     _deferred_to_sql,
     _ibis_string_to_sql,
-    _sql_to_deferred,
-    _strip_dataset_prefix,
-    from_osi,
     to_osi,
     to_osi_yaml,
 )
+from boring_semantic_layer.yaml import (
+    _is_osi_config,
+    _sql_to_deferred,
+    _strip_dataset_prefix,
+)
 
 
 # ---------------------------------------------------------------------------
@@ -52,32 +60,21 @@ def test_nunique(self):
 
 class TestDeferredToSql:
     def test_simple_column(self):
-        from ibis import _
-
-        assert _deferred_to_sql(_.column_name) == "column_name"
+        assert _deferred_to_sql(ibis._.column_name) == "column_name"
 
     def test_count(self):
-        from ibis import _
-
-        assert _deferred_to_sql(_.count()) == "COUNT(*)"
+        assert _deferred_to_sql(ibis._.count()) == "COUNT(*)"
 
     def test_sum(self):
-        from ibis import _
-
-        result = _deferred_to_sql(_.amount.sum())
-        assert result == "SUM(amount)"
+        assert _deferred_to_sql(ibis._.amount.sum()) == "SUM(amount)"
 
     def test_mean(self):
-        from ibis import _
-
-        result = _deferred_to_sql(_.amount.mean())
-        assert result == "AVG(amount)"
+        assert _deferred_to_sql(ibis._.amount.mean()) == "AVG(amount)"
 
 
 class TestSqlToDeferred:
     def test_simple_column(self):
         d = _sql_to_deferred("column_name")
-        # Verify it's a Deferred by checking str representation
         assert "column_name" in str(d)
 
     def test_count_star(self):
@@ -101,15 +98,29 @@ def test_count_star(self):
         assert _strip_dataset_prefix("COUNT(*)") == "COUNT(*)"
 
     def test_count_distinct_with_prefix(self):
-        assert (
-            _strip_dataset_prefix("COUNT(DISTINCT customers.id)")
-            == "COUNT(DISTINCT id)"
-        )
+        assert _strip_dataset_prefix("COUNT(DISTINCT customers.id)") == "COUNT(DISTINCT id)"
 
     def test_no_prefix(self):
         assert _strip_dataset_prefix("SUM(distance)") == "SUM(distance)"
 
 
+# ---------------------------------------------------------------------------
+# Format detection
+# ---------------------------------------------------------------------------
+
+
+class TestFormatDetection:
+    def test_detects_osi(self):
+        assert _is_osi_config({"version": "0.1.1", "semantic_model": []})
+
+    def test_rejects_bsl(self):
+        assert not _is_osi_config({"flights": {"table": "flights_tbl"}})
+
+    def test_rejects_partial(self):
+        assert not _is_osi_config({"version": "0.1.1"})
+        assert not _is_osi_config({"semantic_model": []})
+
+
 # ---------------------------------------------------------------------------
 # Export tests: BSL -> OSI
 # ---------------------------------------------------------------------------
@@ -186,16 +197,12 @@ def test_dataset_fields(self, simple_model):
         ds = osi["semantic_model"][0]["datasets"][0]
         assert ds["name"] == "orders"
         assert ds["description"] == "Order transactions"
-        assert "fields" in ds
         field_names = {f["name"] for f in ds["fields"]}
-        assert "order_id" in field_names
-        assert "customer_id" in field_names
-        assert "created_at" in field_names
+        assert {"order_id", "customer_id", "created_at"} <= field_names
 
     def test_primary_key_from_entity(self, simple_model):
         osi = to_osi(simple_model)
         ds = osi["semantic_model"][0]["datasets"][0]
-        assert "primary_key" in ds
         assert "order_id" in ds["primary_key"]
 
     def test_time_dimension(self, simple_model):
@@ -212,99 +219,71 @@ def test_non_time_dimension(self, simple_model):
 
     def test_metrics(self, simple_model):
         osi = to_osi(simple_model)
-        sm = osi["semantic_model"][0]
-        assert "metrics" in sm
-        metric_names = {m["name"] for m in sm["metrics"]}
-        assert "order_count" in metric_names
-        assert "total_amount" in metric_names
-        assert "avg_amount" in metric_names
+        metric_names = {m["name"] for m in osi["semantic_model"][0]["metrics"]}
+        assert {"order_count", "total_amount", "avg_amount"} <= metric_names
 
     def test_metric_expressions(self, simple_model):
         osi = to_osi(simple_model)
-        sm = osi["semantic_model"][0]
-        count_metric = next(m for m in sm["metrics"] if m["name"] == "order_count")
-        expr = count_metric["expression"]
-        assert "dialects" in expr
-        assert len(expr["dialects"]) >= 1
-        assert expr["dialects"][0]["dialect"] == "ANSI_SQL"
+        count_metric = next(m for m in osi["semantic_model"][0]["metrics"] if m["name"] == "order_count")
+        assert count_metric["expression"]["dialects"][0]["dialect"] == "ANSI_SQL"
 
     def test_field_expression_format(self, simple_model):
         osi = to_osi(simple_model)
         ds = osi["semantic_model"][0]["datasets"][0]
-        order_id_field = next(f for f in ds["fields"] if f["name"] == "order_id")
-        expr = order_id_field["expression"]
-        assert "dialects" in expr
-        assert expr["dialects"][0]["dialect"] == "ANSI_SQL"
-        assert expr["dialects"][0]["expression"] == "order_id"
+        f = next(f for f in ds["fields"] if f["name"] == "order_id")
+        assert f["expression"]["dialects"][0] == {"dialect": "ANSI_SQL", "expression": "order_id"}
 
     def test_custom_extensions_for_bsl_metadata(self, simple_model):
         osi = to_osi(simple_model)
         ds = osi["semantic_model"][0]["datasets"][0]
-        order_id_field = next(f for f in ds["fields"] if f["name"] == "order_id")
-        assert "custom_extensions" in order_id_field
-        ext = order_id_field["custom_extensions"][0]
-        assert ext["vendor_name"] == "COMMON"
-        data = json.loads(ext["data"])
+        f = next(f for f in ds["fields"] if f["name"] == "order_id")
+        data = json.loads(f["custom_extensions"][0]["data"])
         assert data["is_entity"] is True
 
     def test_time_grain_in_custom_extensions(self, simple_model):
         osi = to_osi(simple_model)
         ds = osi["semantic_model"][0]["datasets"][0]
-        created_at_field = next(f for f in ds["fields"] if f["name"] == "created_at")
-        ext = created_at_field["custom_extensions"][0]
-        data = json.loads(ext["data"])
+        f = next(f for f in ds["fields"] if f["name"] == "created_at")
+        data = json.loads(f["custom_extensions"][0]["data"])
         assert data["smallest_time_grain"] == "TIME_GRAIN_DAY"
 
     def test_ai_context_on_dimensions(self, model_with_ai_context):
         osi = to_osi(model_with_ai_context)
         ds = osi["semantic_model"][0]["datasets"][0]
-        product_field = next(f for f in ds["fields"] if f["name"] == "product_id")
-        assert "ai_context" in product_field
-        assert product_field["ai_context"]["synonyms"] == ["SKU", "item ID"]
+        f = next(f for f in ds["fields"] if f["name"] == "product_id")
+        assert f["ai_context"]["synonyms"] == ["SKU", "item ID"]
 
     def test_ai_context_on_metrics(self, model_with_ai_context):
         osi = to_osi(model_with_ai_context)
-        sm = osi["semantic_model"][0]
-        avg_price = next(m for m in sm["metrics"] if m["name"] == "avg_price")
-        assert "ai_context" in avg_price
-        assert avg_price["ai_context"]["synonyms"] == ["mean price", "price average"]
+        m = next(m for m in osi["semantic_model"][0]["metrics"] if m["name"] == "avg_price")
+        assert m["ai_context"]["synonyms"] == ["mean price", "price average"]
 
     def test_with_description_and_ai_context(self, simple_model):
-        osi = to_osi(
-            simple_model,
-            name="my_model",
-            description="A test model",
-            ai_context={"instructions": "Use for order analysis"},
-        )
+        osi = to_osi(simple_model, name="my_model", description="A test", ai_context={"instructions": "test"})
         sm = osi["semantic_model"][0]
-        assert sm["description"] == "A test model"
-        assert sm["ai_context"]["instructions"] == "Use for order analysis"
+        assert sm["description"] == "A test"
+        assert sm["ai_context"]["instructions"] == "test"
 
     def test_multiple_models(self):
-        orders = ibis.table({"id": "int64", "amount": "float64"}, name="orders")
-        customers = ibis.table({"id": "int64", "name": "string"}, name="customers")
-
-        m1 = to_semantic_table(orders, name="orders")
+        m1 = to_semantic_table(ibis.table({"id": "int64", "amount": "float64"}, name="o"), name="orders")
         m1 = m1.with_dimensions(id=Dimension(expr=ibis._.id))
         m1 = m1.with_measures(total=Measure(expr=ibis._.amount.sum()))
-
-        m2 = to_semantic_table(customers, name="customers")
+        m2 = to_semantic_table(ibis.table({"id": "int64", "name": "string"}, name="c"), name="customers")
         m2 = m2.with_dimensions(id=Dimension(expr=ibis._.id))
-
         osi = to_osi({"orders": m1, "customers": m2}, name="ecommerce")
         assert len(osi["semantic_model"][0]["datasets"]) == 2
 
 
 class TestToOsiYaml:
     def test_yaml_output(self, simple_model):
-        yaml_str = to_osi_yaml(simple_model, name="test")
-        assert "version:" in yaml_str
-        assert "semantic_model:" in yaml_str
-        assert "datasets:" in yaml_str
+        s = to_osi_yaml(simple_model, name="test")
+        assert "version:" in s
+        assert "semantic_model:" in s
+        assert "datasets:" in s
 
 
 # ---------------------------------------------------------------------------
-# Import tests: OSI -> BSL
+# Import tests: OSI parsed natively via from_config
 # ---------------------------------------------------------------------------
 
 
@@ -316,7 +295,6 @@ def osi_config():
         "semantic_model": [
             {
                 "name": "test_model",
-                "description": "Test semantic model",
                 "datasets": [
                     {
                         "name": "orders",
@@ -326,37 +304,20 @@ def osi_config():
                         "fields": [
                             {
                                 "name": "order_id",
-                                "expression": {
-                                    "dialects": [
-                                        {"dialect": "ANSI_SQL", "expression": "order_id"}
-                                    ]
-                                },
+                                "expression": {"dialects": [{"dialect": "ANSI_SQL", "expression": "order_id"}]},
                                 "dimension": {"is_time": False},
                                 "description": "Order identifier",
-                                "custom_extensions": [
-                                    {
-                                        "vendor_name": "COMMON",
-                                        "data": json.dumps({"is_entity": True}),
-                                    }
-                                ],
+                                "custom_extensions": [{"vendor_name": "COMMON", "data": json.dumps({"is_entity": True})}],
                             },
                             {
                                 "name": "created_at",
-                                "expression": {
-                                    "dialects": [
-                                        {"dialect": "ANSI_SQL", "expression": "created_at"}
-                                    ]
-                                },
+                                "expression": {"dialects": [{"dialect": "ANSI_SQL", "expression": "created_at"}]},
                                 "dimension": {"is_time": True},
                                 "description": "Creation timestamp",
                             },
                             {
                                 "name": "amount",
-                                "expression": {
-                                    "dialects": [
-                                        {"dialect": "ANSI_SQL", "expression": "amount"}
-                                    ]
-                                },
+                                "expression": {"dialects": [{"dialect": "ANSI_SQL", "expression": "amount"}]},
                                 "dimension": {"is_time": False},
                             },
                         ],
@@ -365,20 +326,12 @@ def osi_config():
                 "metrics": [
                     {
                         "name": "total_amount",
-                        "expression": {
-                            "dialects": [
-                                {"dialect": "ANSI_SQL", "expression": "SUM(orders.amount)"}
-                            ]
-                        },
+                        "expression": {"dialects": [{"dialect": "ANSI_SQL", "expression": "SUM(orders.amount)"}]},
                         "description": "Total order amount",
                     },
                     {
                         "name": "order_count",
-                        "expression": {
-                            "dialects": [
-                                {"dialect": "ANSI_SQL", "expression": "COUNT(*)"}
-                            ]
-                        },
+                        "expression": {"dialects": [{"dialect": "ANSI_SQL", "expression": "COUNT(*)"}]},
                         "description": "Number of orders",
                     },
                 ],
@@ -387,90 +340,66 @@ def osi_config():
     }
 
 
-class TestFromOsi:
+class TestFromConfig_OSI:
+    """Test that from_config auto-detects and parses OSI format."""
+
     def test_basic_import(self, osi_config):
-        models = from_osi(osi_config)
+        models = from_config(osi_config)
         assert "orders" in models
 
     def test_model_description(self, osi_config):
-        models = from_osi(osi_config)
-        op = models["orders"].op()
-        assert op.description == "Order transactions"
+        models = from_config(osi_config)
+        assert models["orders"].op().description == "Order transactions"
 
     def test_dimensions_imported(self, osi_config):
-        models = from_osi(osi_config)
-        dims = models["orders"].op().get_dimensions()
-        assert "order_id" in dims
-        assert "created_at" in dims
-        assert "amount" in dims
+        dims = from_config(osi_config)["orders"].op().get_dimensions()
+        assert {"order_id", "created_at", "amount"} == set(dims.keys())
 
     def test_dimension_descriptions(self, osi_config):
-        models = from_osi(osi_config)
-        dims = models["orders"].op().get_dimensions()
+        dims = from_config(osi_config)["orders"].op().get_dimensions()
         assert dims["order_id"].description == "Order identifier"
         assert dims["created_at"].description == "Creation timestamp"
 
     def test_time_dimension_flag(self, osi_config):
-        models = from_osi(osi_config)
-        dims = models["orders"].op().get_dimensions()
+        dims = from_config(osi_config)["orders"].op().get_dimensions()
         assert dims["created_at"].is_time_dimension is True
         assert dims["order_id"].is_time_dimension is False
 
     def test_entity_from_custom_extensions(self, osi_config):
-        models = from_osi(osi_config)
-        dims = models["orders"].op().get_dimensions()
+        dims = from_config(osi_config)["orders"].op().get_dimensions()
         assert dims["order_id"].is_entity is True
 
     def test_measures_imported(self, osi_config):
-        models = from_osi(osi_config)
-        measures = models["orders"].op().get_measures()
-        assert "total_amount" in measures
-        assert "order_count" in measures
+        measures = from_config(osi_config)["orders"].op().get_measures()
+        assert {"total_amount", "order_count"} == set(measures.keys())
 
     def test_measure_descriptions(self, osi_config):
-        models = from_osi(osi_config)
-        measures = models["orders"].op().get_measures()
+        measures = from_config(osi_config)["orders"].op().get_measures()
         assert measures["total_amount"].description == "Total order amount"
-        assert measures["order_count"].description == "Number of orders"
 
     def test_with_real_table(self, osi_config):
-        """Test import with a real DuckDB table backing."""
         con = ibis.duckdb.connect()
-        con.raw_sql(
-            "CREATE TABLE orders_table (order_id INT, created_at TIMESTAMP, amount DOUBLE)"
-        )
-        table = con.table("orders_table")
-        models = from_osi(osi_config, tables={"orders": table})
+        con.raw_sql("CREATE TABLE orders_table (order_id INT, created_at TIMESTAMP, amount DOUBLE)")
+        models = from_config(osi_config, tables={"orders": con.table("orders_table")})
         assert "orders" in models
 
     def test_ai_context_preserved(self):
         config = {
             "version": "0.1.1",
-            "semantic_model": [
-                {
-                    "name": "test",
-                    "datasets": [
-                        {
-                            "name": "items",
-                            "source": "items",
-                            "fields": [
-                                {
-                                    "name": "item_id",
-                                    "expression": {
-                                        "dialects": [
-                                            {"dialect": "ANSI_SQL", "expression": "item_id"}
-                                        ]
-                                    },
-                                    "ai_context": {"synonyms": ["SKU", "product_id"]},
-                                }
-                            ],
-                        }
-                    ],
-                }
-            ],
+            "semantic_model": [{
+                "name": "test",
+                "datasets": [{
+                    "name": "items",
+                    "source": "items",
+                    "fields": [{
+                        "name": "item_id",
+                        "expression": {"dialects": [{"dialect": "ANSI_SQL", "expression": "item_id"}]},
+                        "ai_context": {"synonyms": ["SKU", "product_id"]},
+                    }],
+                }],
+            }],
         }
-        models = from_osi(config)
-        dims = models["items"].op().get_dimensions()
+        dims = from_config(config)["items"].op().get_dimensions()
         assert dims["item_id"].ai_context == {"synonyms": ["SKU", "product_id"]}
 
 
@@ -482,66 +411,44 @@ def test_ai_context_preserved(self):
 class TestRoundTrip:
     def test_bsl_to_osi_to_bsl(self, simple_model):
         """BSL -> OSI -> BSL preserves key semantics."""
-        # Export
         osi = to_osi(simple_model, name="round_trip_test")
-
-        # Import back
-        models = from_osi(osi)
+        models = from_config(osi)  # auto-detects OSI
         assert "orders" in models
 
-        # Check dimensions preserved
         orig_dims = simple_model.op().get_dimensions()
         new_dims = models["orders"].op().get_dimensions()
         assert set(orig_dims.keys()) == set(new_dims.keys())
 
-        # Check descriptions preserved
         for name in orig_dims:
             assert orig_dims[name].description == new_dims[name].description
 
-        # Check time dimension flag preserved
         assert new_dims["created_at"].is_time_dimension is True
-        assert new_dims["order_id"].is_time_dimension is False
-
-        # Check entity flag preserved (via custom_extensions round-trip)
         assert new_dims["order_id"].is_entity is True
 
     def test_bsl_to_osi_to_bsl_measures(self, simple_model):
-        """BSL -> OSI -> BSL preserves measure semantics."""
         osi = to_osi(simple_model)
-        models = from_osi(osi)
-
-        orig_measures = simple_model.op().get_measures()
-        new_measures = models["orders"].op().get_measures()
+        models = from_config(osi)
 
-        # All measures should be present
-        assert set(orig_measures.keys()) == set(new_measures.keys())
-
-        # Descriptions preserved
-        for name in orig_measures:
-            assert orig_measures[name].description == new_measures[name].description
+        orig = simple_model.op().get_measures()
+        new = models["orders"].op().get_measures()
+        assert set(orig.keys()) == set(new.keys())
+        for name in orig:
+            assert orig[name].description == new[name].description
 
     def test_osi_to_bsl_to_osi(self, osi_config):
-        """OSI -> BSL -> OSI preserves key structure."""
-        # Import
-        models = from_osi(osi_config)
-
-        # Export back
+        models = from_config(osi_config)
         osi_out = to_osi(models, name="round_trip")
 
-        # Check structure
         assert osi_out["version"] == OSI_VERSION
         ds_out = osi_out["semantic_model"][0]["datasets"][0]
         assert ds_out["name"] == "orders"
         assert ds_out["description"] == "Order transactions"
-
-        # Field count preserved
         ds_in = osi_config["semantic_model"][0]["datasets"][0]
         assert len(ds_out["fields"]) == len(ds_in["fields"])
 
     def test_ai_context_round_trip(self, model_with_ai_context):
-        """ai_context survives BSL -> OSI -> BSL round-trip."""
         osi = to_osi(model_with_ai_context)
-        models = from_osi(osi)
+        models = from_config(osi)
 
         dims = models["products"].op().get_dimensions()
         assert dims["product_id"].ai_context == {"synonyms": ["SKU", "item ID"]}
diff --git a/src/boring_semantic_layer/yaml.py b/src/boring_semantic_layer/yaml.py
index 25f92d46..6488baf4 100644
--- a/src/boring_semantic_layer/yaml.py
+++ b/src/boring_semantic_layer/yaml.py
@@ -1,7 +1,13 @@
 """
 YAML loader for Boring Semantic Layer models using the semantic API.
+
+Supports both BSL native YAML format and OSI (Open Semantic Interchange)
+v0.1.1 format. The format is auto-detected based on the presence of
+``version`` and ``semantic_model`` keys.
 """
 
+import json
+import re
 from collections.abc import Mapping
 from typing import Any
 
@@ -14,6 +20,276 @@
 from .utils import read_yaml_file, safe_eval
 
 
+# ---------------------------------------------------------------------------
+# Format detection
+# ---------------------------------------------------------------------------
+
+
+def _is_osi_config(config: Mapping[str, Any]) -> bool:
+    """Return True if *config* looks like an OSI YAML document."""
+    return "semantic_model" in config and "version" in config
+
+
+# ---------------------------------------------------------------------------
+# OSI expression helpers (SQL <-> Ibis Deferred)
+# ---------------------------------------------------------------------------
+
+
+def _sql_to_deferred(sql: str):
+    """Convert a simple SQL expression to an Ibis Deferred.
+
+    Handles:
+      "column_name"                -> _.column_name
+      "SUM(column)"                -> _.column.sum()
+      "AVG(column)"                -> _.column.mean()
+      "COUNT(*)"                   -> _.count()
+      "COUNT(DISTINCT column)"     -> _.column.nunique()
+    """
+    sql = sql.strip()
+
+    if sql == "COUNT(*)":
+        return safe_eval("_.count()", context={"_": _}).unwrap()
+
+    # COUNT(DISTINCT col)
+    m = re.match(r"^COUNT\(DISTINCT\s+(\w+)\)$", sql, re.IGNORECASE)
+    if m:
+        return safe_eval(f"_.{m.group(1)}.nunique()", context={"_": _}).unwrap()
+
+    # AGG(col) patterns
+    sql_to_ibis = {"SUM": "sum", "AVG": "mean", "MAX": "max", "MIN": "min"}
+    for sql_fn, ibis_fn in sql_to_ibis.items():
+        m = re.match(rf"^{sql_fn}\((\w+)\)$", sql, re.IGNORECASE)
+        if m:
+            return safe_eval(f"_.{m.group(1)}.{ibis_fn}()", context={"_": _}).unwrap()
+
+    # Simple column reference
+    if re.match(r"^\w+$", sql):
+        return safe_eval(f"_.{sql}", context={"_": _}).unwrap()
+
+    # Fallback: try eval as-is with underscore prefix
+    try:
+        return safe_eval(f"_.{sql}", context={"_": _}).unwrap()
+    except Exception:
+        return safe_eval(
+            f"_.{sql.split('.')[0] if '.' in sql else sql}", context={"_": _}
+        ).unwrap()
+
+
+def _parse_osi_expression(expr_obj: dict, prefer_dialect: str = "ANSI_SQL") -> str:
+    """Extract the SQL expression string from an OSI expression object."""
+    dialects = expr_obj.get("dialects", [])
+    if not dialects:
+        raise ValueError("OSI expression has no dialects")
+    for d in dialects:
+        if d.get("dialect") == prefer_dialect:
+            return d["expression"]
+    return dialects[0]["expression"]
+
+
+def _strip_dataset_prefix(sql: str) -> str:
+    """Remove dataset.column prefixes from SQL aggregates.
+
+    ``SUM(flights.distance)`` -> ``SUM(distance)``
+    """
+
+    def _strip_match(m: re.Match) -> str:
+        fn = m.group(1)
+        inner = m.group(2).strip()
+        if inner.upper().startswith("DISTINCT "):
+            rest = inner[9:].strip()
+            if "." in rest:
+                return f"{fn}(DISTINCT {rest.split('.')[-1]})"
+            return m.group(0)
+        if "." in inner and inner != "*":
+            return f"{fn}({inner.split('.')[-1]})"
+        return m.group(0)
+
+    return re.sub(r"(\w+)\(([^)]+)\)", _strip_match, sql)
+
+
+# ---------------------------------------------------------------------------
+# OSI field / metric -> BSL Dimension / Measure
+# ---------------------------------------------------------------------------
+
+
+def _osi_field_to_dimension(field: dict) -> tuple[str, Dimension]:
+    """Convert an OSI field dict to a ``(name, Dimension)`` pair."""
+    name = field["name"]
+    sql_expr = _parse_osi_expression(field["expression"])
+    deferred = _sql_to_deferred(sql_expr)
+
+    kwargs: dict[str, Any] = {
+        "expr": deferred,
+        "description": field.get("description"),
+    }
+
+    dim_meta = field.get("dimension", {})
+    if dim_meta.get("is_time"):
+        kwargs["is_time_dimension"] = True
+
+    if "ai_context" in field:
+        kwargs["ai_context"] = field["ai_context"]
+
+    # Recover BSL-specific metadata stored in custom_extensions
+    for ext in field.get("custom_extensions", []):
+        if ext.get("vendor_name") == "COMMON":
+            try:
+                data = json.loads(ext["data"])
+                if data.get("is_entity"):
+                    kwargs["is_entity"] = True
+                if data.get("is_event_timestamp"):
+                    kwargs["is_event_timestamp"] = True
+                if data.get("smallest_time_grain"):
+                    kwargs["smallest_time_grain"] = data["smallest_time_grain"]
+                if data.get("derived_dimensions"):
+                    kwargs["derived_dimensions"] = tuple(data["derived_dimensions"])
+            except (json.JSONDecodeError, KeyError):
+                pass
+
+    return name, Dimension(**kwargs)
+
+
+def _osi_metric_to_measure(metric: dict) -> tuple[str, Measure]:
+    """Convert an OSI metric dict to a ``(name, Measure)`` pair."""
+    name = metric["name"]
+    sql_expr = _parse_osi_expression(metric["expression"])
+    sql_expr = _strip_dataset_prefix(sql_expr)
+    deferred = _sql_to_deferred(sql_expr)
+
+    kwargs: dict[str, Any] = {
+        "expr": deferred,
+        "description": metric.get("description"),
+    }
+    if "ai_context" in metric:
+        kwargs["ai_context"] = metric["ai_context"]
+
+    return name, Measure(**kwargs)
+
+
+# ---------------------------------------------------------------------------
+# OSI config -> BSL models  (called from from_config when OSI is detected)
+# ---------------------------------------------------------------------------
+
+
+def _create_placeholder_table(dataset: dict):
+    """Create a placeholder ibis table from OSI field definitions."""
+    import ibis
+
+    fields = dataset.get("fields", [])
+    if not fields:
+        return None
+    schema = {f["name"]: "string" for f in fields}
+    try:
+        return ibis.table(schema, name=dataset["name"])
+    except Exception:
+        return None
+
+
+def _from_osi_config(
+    config: Mapping[str, Any],
+    tables: Mapping[str, Any] | None = None,
+    profile: str | None = None,
+    profile_path: str | None = None,
+) -> dict[str, SemanticModel]:
+    """Parse an OSI config dict into BSL SemanticModel instances.
+
+    This is an internal entry-point invoked by :func:`from_config` when it
+    detects OSI format.  Users should call ``from_config`` / ``from_yaml``
+    directly — those work for *both* BSL and OSI files.
+    """
+    tables = dict(tables) if tables else {}
+
+    # Load tables from profile if not provided
+    if not tables:
+        profile_config = profile or config.get("profile")
+        if profile_config or profile_path:
+            connection = get_connection(
+                profile_config or profile_path,
+                profile_file=profile_path if profile_config else None,
+            )
+            tables = {name: connection.table(name) for name in connection.list_tables()}
+
+    semantic_models = config.get("semantic_model", [])
+    if not semantic_models:
+        raise ValueError("No semantic_model found in OSI config")
+
+    result: dict[str, SemanticModel] = {}
+
+    for sm in semantic_models:
+        datasets = sm.get("datasets", [])
+        metrics = sm.get("metrics", [])
+        relationships = sm.get("relationships", [])
+        dataset_names = {ds["name"] for ds in datasets}
+
+        for ds in datasets:
+            ds_name = ds["name"]
+
+            # Resolve backing table
+            if ds_name in tables:
+                table = tables[ds_name]
+            elif ds.get("source") and ds["source"] in tables:
+                table = tables[ds["source"]]
+            else:
+                table = _create_placeholder_table(ds)
+                if table is None:
+                    continue
+
+            model = to_semantic_table(
+                table, name=ds_name, description=ds.get("description")
+            )
+
+            # Fields -> Dimensions
+            dimensions: dict[str, Dimension] = {}
+            for field in ds.get("fields", []):
+                dim_name, dim = _osi_field_to_dimension(field)
+                dimensions[dim_name] = dim
+            if dimensions:
+                model = model.with_dimensions(**dimensions)
+
+            # Metrics -> Measures (assign to the dataset they reference)
+            ds_measures: dict[str, Measure] = {}
+            for metric in metrics:
+                sql_expr = _parse_osi_expression(metric["expression"])
+                if f"{ds_name}." in sql_expr or not any(
+                    f"{other}." in sql_expr
+                    for other in dataset_names
+                    if other != ds_name
+                ):
+                    meas_name, meas = _osi_metric_to_measure(metric)
+                    ds_measures[meas_name] = meas
+            if ds_measures:
+                model = model.with_measures(**ds_measures)
+
+            result[ds_name] = model
+
+        # Relationships -> Joins
+        if tables and relationships:
+            for rel in relationships:
+                from_ds = rel.get("from", "")
+                to_ds = rel.get("to", "")
+                if from_ds in result and to_ds in result:
+                    from_cols = rel.get("from_columns", [])
+                    to_cols = rel.get("to_columns", [])
+                    if from_cols and to_cols and from_cols[0] != "unknown":
+
+                        def _make_join_cond(lc, rc):
+                            return lambda left, right: getattr(left, lc) == getattr(
+                                right, rc
+                            )
+
+                        result[from_ds] = result[from_ds].join_one(
+                            result[to_ds],
+                            on=_make_join_cond(from_cols[0], to_cols[0]),
+                        )
+
+    return result
+
+
+# ---------------------------------------------------------------------------
+# BSL native YAML helpers
+# ---------------------------------------------------------------------------
+
+
 def _parse_expression_config(name: str, config: str | dict, metric_type: str):
     """Extract expression string, description, and extra kwargs from config."""
     if isinstance(config, str):
@@ -322,6 +598,11 @@ def _load_table_for_yaml_model(
     return tables, tables[table_name]
 
 
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
 def from_config(
     config: Mapping[str, Any],
     tables: Mapping[str, Any] | None = None,
@@ -331,48 +612,27 @@ def from_config(
     """
     Load semantic tables from a configuration dictionary.
 
-    This is useful when you have already loaded your configuration through
-    custom logic (e.g., Kedro catalog, external config management) and want
-    to construct SemanticTable objects without going through YAML file loading.
+    Accepts **both** BSL native YAML format and OSI (Open Semantic Interchange)
+    v0.1.1 format.  The format is auto-detected: if the dict contains
+    ``version`` and ``semantic_model`` keys it is treated as OSI, otherwise
+    as BSL native.
 
     Args:
-        config: Configuration dictionary with model definitions
+        config: Configuration dictionary (BSL or OSI format)
         tables: Optional mapping of table names to ibis table expressions
         profile: Optional profile name to load tables from
         profile_path: Optional path to profile file
 
     Returns:
         Dict mapping model names to SemanticModel instances
-
-    Example config format:
-        {
-            "flights": {
-                "table": "flights_tbl",
-                "description": "Flight data model",
-                "database": ["analytics", "prod"],  # optional: catalog.schema
-                "dimensions": {
-                    "origin": {"expr": "_.origin", "description": "Origin airport"},
-                    "destination": "_.destination",
-                },
-                "measures": {
-                    "flight_count": "_.count()",
-                    "avg_distance": "_.distance.mean()",
-                },
-            }
-        }
-
-    The optional 'database' field can be a string or list for multi-part identifiers
-    (e.g., ["catalog", "schema"] for catalog.schema.table). This is passed to
-    ibis connection.table() and is useful for loading tables from different
-    databases/schemas under the same connection.
-
-    Example usage with pre-loaded tables:
-        >>> import ibis
-        >>> con = ibis.duckdb.connect()
-        >>> flights_tbl = con.table("flights")
-        >>> config = {"flights": {"table": "flights_tbl", "dimensions": {...}}}
-        >>> models = from_config(config, tables={"flights_tbl": flights_tbl})
     """
+    # ---- Auto-detect OSI format ----
+    if _is_osi_config(config):
+        return _from_osi_config(
+            config, tables=tables, profile=profile, profile_path=profile_path
+        )
+
+    # ---- BSL native format ----
     tables = _load_tables_from_references(dict(tables) if tables else {})
 
     # Load tables from profile if not provided
@@ -453,12 +713,13 @@ def from_yaml(
     profile_path: str | None = None,
 ) -> dict[str, SemanticModel]:
     """
-    Load semantic tables from a YAML file with optional profile-based table loading.
+    Load semantic tables from a YAML file.
 
-    This is a convenience wrapper around from_config() that loads the YAML file first.
+    Accepts **both** BSL native YAML format and OSI (Open Semantic Interchange)
+    v0.1.1 format.  The format is auto-detected.
 
     Args:
-        yaml_path: Path to the YAML configuration file
+        yaml_path: Path to the YAML configuration file (BSL or OSI format)
         tables: Optional mapping of table names to ibis table expressions
         profile: Optional profile name to load tables from
         profile_path: Optional path to profile file
@@ -466,45 +727,14 @@ def from_yaml(
     Returns:
         Dict mapping model names to SemanticModel instances
 
-    Example YAML format:
-        flights:
-          table: flights_tbl
-          description: "Flight data model"
-          database:  # optional: for loading from specific database/schema
-            - analytics
-            - prod
-          dimensions:
-            origin:
-              expr: _.origin
-              description: "Origin airport code"
-              is_entity: true
-            destination: _.destination
-            carrier: _.carrier
-            arr_time:
-              expr: _.arr_time
-              description: "Arrival time"
-              is_event_timestamp: true
-              is_time_dimension: true
-              smallest_time_grain: "TIME_GRAIN_DAY"
-          measures:
-            flight_count: _.count()
-            avg_distance: _.distance.mean()
-            total_distance:
-              expr: _.distance.sum()
-              description: "Total distance flown"
-          calculated_measures:
-            avg_per_flight:
-              expr: _.total_distance / _.flight_count
-              description: "Average distance per flight"
-            pct_of_total:
-              expr: _.total_distance / _.all(_.total_distance) * 100
-              description: "Percentage of total distance"
-          joins:
-            carriers:
-              model: carriers
-              type: one
-              left_on: carrier
-              right_on: code
+    Examples:
+        Load a BSL native YAML file::
+
+            models = from_yaml("flights.yml")
+
+        Load an OSI YAML file::
+
+            models = from_yaml("flights_osi.yaml", tables=tables)
     """
     yaml_configs = read_yaml_file(yaml_path)
     return from_config(yaml_configs, tables=tables, profile=profile, profile_path=profile_path)

From 52638f0b8ff0b1b0f988a17f18bac1c91fb885ec Mon Sep 17 00:00:00 2001
From: boringdata <boringdata@users.noreply.github.com>
Date: Thu, 2 Apr 2026 10:25:08 +0000
Subject: [PATCH 3/4] =?UTF-8?q?feat:=20close=20OSI=20import=20gaps=20?=
 =?UTF-8?q?=E2=80=94=20primary=5Fkey,=20ai=5Fcontext,=20label,=20multi-col?=
 =?UTF-8?q?umn=20joins?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Four fixes to make OSI import near-lossless:

1. primary_key -> is_entity: fields matching dataset.primary_key are
   automatically marked is_entity=True on import (no custom_extensions
   needed for standard OSI files)

2. Dataset-level ai_context: added to SemanticTableOp, threaded through
   to_semantic_table/SemanticModel/with_dimensions/with_measures. Stored
   as JSON string internally for ibis hashability, deserialized via
   get_ai_context(). Round-trips through to_osi export.

3. label on Dimension: new optional field, parsed on OSI import, emitted
   on export. Supports the OSI field.label categorization concept.

4. Multi-column relationship joins: all from_columns/to_columns pairs
   are now used to build compound join predicates, not just the first.

All 56 OSI tests pass. Export validates against official OSI JSON schema.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/boring_semantic_layer/api.py            |  8 +-
 src/boring_semantic_layer/expr.py           |  9 +++
 src/boring_semantic_layer/ops.py            | 20 +++++
 src/boring_semantic_layer/osi.py            |  5 ++
 src/boring_semantic_layer/tests/test_osi.py | 88 +++++++++++++++++++++
 src/boring_semantic_layer/yaml.py           | 59 +++++++++++---
 6 files changed, 176 insertions(+), 13 deletions(-)

diff --git a/src/boring_semantic_layer/api.py b/src/boring_semantic_layer/api.py
index b3564c20..6bfb2003 100644
--- a/src/boring_semantic_layer/api.py
+++ b/src/boring_semantic_layer/api.py
@@ -18,7 +18,10 @@
 
 
 def to_semantic_table(
-    ibis_table: ir.Table, name: str | None = None, description: str | None = None
+    ibis_table: ir.Table,
+    name: str | None = None,
+    description: str | None = None,
+    ai_context: str | dict | None = None,
 ) -> SemanticModel:
     """Create a SemanticModel from an Ibis table.
 
@@ -26,6 +29,8 @@ def to_semantic_table(
         ibis_table: An Ibis table expression (can be regular ibis or xorq vendored ibis)
         name: Optional name for the semantic table
         description: Optional description for the semantic table
+        ai_context: Optional AI context (string or structured object with
+            instructions, synonyms, examples) per the OSI specification
 
     Returns:
         A new SemanticModel wrapping the table
@@ -41,6 +46,7 @@ def to_semantic_table(
         calc_measures=None,
         name=name,
         description=description,
+        ai_context=ai_context,
     )
 
 
diff --git a/src/boring_semantic_layer/expr.py b/src/boring_semantic_layer/expr.py
index b08f9984..8ff5a4d0 100644
--- a/src/boring_semantic_layer/expr.py
+++ b/src/boring_semantic_layer/expr.py
@@ -433,6 +433,7 @@ def __init__(
         calc_measures: Mapping[str, Any] | None = None,
         name: str | None = None,
         description: str | None = None,
+        ai_context: str | dict | None = None,
         _source_join: Any | None = None,
     ) -> None:
         # Keep tables in regular ibis - only convert to xorq at execution time if needed
@@ -452,6 +453,11 @@ def __init__(
 
         derived_name = name or _derive_name(table)
 
+        # Serialize dict ai_context to JSON string for ibis hashability
+        import json as _json
+
+        _ai_ctx = _json.dumps(ai_context, sort_keys=True) if isinstance(ai_context, dict) else ai_context
+
         op = SemanticTableOp(
             table=table,
             dimensions=dims,
@@ -459,6 +465,7 @@ def __init__(
             calc_measures=calc_meas,
             name=derived_name,
             description=description,
+            ai_context=_ai_ctx,
             _source_join=_source_join,
         )
 
@@ -525,6 +532,7 @@ def with_dimensions(self, **dims) -> SemanticModel:
             calc_measures=self.get_calculated_measures(),
             name=self.name,
             description=self.description,
+            ai_context=self.op().ai_context,
         )
 
     def with_measures(self, **meas) -> SemanticModel:
@@ -548,6 +556,7 @@ def with_measures(self, **meas) -> SemanticModel:
             calc_measures=new_calc_meas,
             name=self.name,
             description=self.description,
+            ai_context=self.op().ai_context,
         )
 
     def join_one(
diff --git a/src/boring_semantic_layer/ops.py b/src/boring_semantic_layer/ops.py
index 55469dbd..73e62c1e 100644
--- a/src/boring_semantic_layer/ops.py
+++ b/src/boring_semantic_layer/ops.py
@@ -960,6 +960,7 @@ class Dimension:
     smallest_time_grain: str | None = None
     derived_dimensions: tuple[str, ...] = ()
     ai_context: str | dict | None = None
+    label: str | None = None
 
     def __call__(self, table: ir.Table, _dims: dict | None = None) -> ir.Value:
         try:
@@ -999,6 +1000,8 @@ def to_json(self) -> Mapping[str, Any]:
             base["derived_dimensions"] = list(self.derived_dimensions)
         if self.ai_context:
             base["ai_context"] = self.ai_context
+        if self.label:
+            base["label"] = self.label
         return base
 
     def __hash__(self) -> int:
@@ -1059,10 +1062,25 @@ class SemanticTableOp(Relation):
     calc_measures: FrozenDict[str, Any]
     name: str | None = None
     description: str | None = None
+    ai_context: str | None = None  # JSON string when dict; plain string otherwise
     _source_join: Any = field(
         default=None, repr=False
     )  # Track if this wraps a join (SemanticJoinOp) for optimization
 
+    def get_ai_context(self) -> str | dict | None:
+        """Return ai_context, deserializing JSON-encoded dicts."""
+        val = self.ai_context
+        if val is None:
+            return None
+        try:
+            import json
+            parsed = json.loads(val)
+            if isinstance(parsed, dict):
+                return parsed
+        except (json.JSONDecodeError, TypeError, ValueError):
+            pass
+        return val
+
     def __init__(
         self,
         table: ir.Table,
@@ -1071,6 +1089,7 @@ def __init__(
         calc_measures: dict[str, Any] | FrozenDict[str, Any],
         name: str | None = None,
         description: str | None = None,
+        ai_context: str | dict | None = None,
         _source_join: Any = None,
     ) -> None:
         # Accept both regular ibis and xorq tables without conversion
@@ -1086,6 +1105,7 @@ def __init__(
             else calc_measures,
             name=name,
             description=description,
+            ai_context=ai_context,
             _source_join=_source_join,
         )
 
diff --git a/src/boring_semantic_layer/osi.py b/src/boring_semantic_layer/osi.py
index c7b734ab..c5c82f00 100644
--- a/src/boring_semantic_layer/osi.py
+++ b/src/boring_semantic_layer/osi.py
@@ -102,6 +102,8 @@ def _dimension_to_osi_field(name: str, dim: Dimension) -> dict:
 
     if dim.description:
         field["description"] = dim.description
+    if dim.label:
+        field["label"] = dim.label
     if dim.ai_context:
         field["ai_context"] = dim.ai_context
 
@@ -264,6 +266,9 @@ def to_osi(
 
         if op.description:
             dataset["description"] = op.description
+        ds_ai_ctx = op.get_ai_context()
+        if ds_ai_ctx:
+            dataset["ai_context"] = ds_ai_ctx
 
         fields = [_dimension_to_osi_field(n, d) for n, d in dims.items()]
         if fields:
diff --git a/src/boring_semantic_layer/tests/test_osi.py b/src/boring_semantic_layer/tests/test_osi.py
index 164a9101..51ee8fed 100644
--- a/src/boring_semantic_layer/tests/test_osi.py
+++ b/src/boring_semantic_layer/tests/test_osi.py
@@ -403,6 +403,94 @@ def test_ai_context_preserved(self):
         assert dims["item_id"].ai_context == {"synonyms": ["SKU", "product_id"]}
 
 
+    def test_primary_key_sets_is_entity(self):
+        """primary_key field names should become is_entity=True dimensions."""
+        config = {
+            "version": "0.1.1",
+            "semantic_model": [{
+                "name": "test",
+                "datasets": [{
+                    "name": "users",
+                    "source": "users",
+                    "primary_key": ["user_id"],
+                    "fields": [
+                        {
+                            "name": "user_id",
+                            "expression": {"dialects": [{"dialect": "ANSI_SQL", "expression": "user_id"}]},
+                        },
+                        {
+                            "name": "email",
+                            "expression": {"dialects": [{"dialect": "ANSI_SQL", "expression": "email"}]},
+                        },
+                    ],
+                }],
+            }],
+        }
+        dims = from_config(config)["users"].op().get_dimensions()
+        assert dims["user_id"].is_entity is True
+        assert dims["email"].is_entity is False
+
+    def test_dataset_ai_context(self):
+        """Dataset-level ai_context should be stored on the model."""
+        config = {
+            "version": "0.1.1",
+            "semantic_model": [{
+                "name": "test",
+                "datasets": [{
+                    "name": "products",
+                    "source": "products",
+                    "ai_context": {"synonyms": ["items", "SKUs"]},
+                    "fields": [{
+                        "name": "id",
+                        "expression": {"dialects": [{"dialect": "ANSI_SQL", "expression": "id"}]},
+                    }],
+                }],
+            }],
+        }
+        op = from_config(config)["products"].op()
+        assert op.get_ai_context() == {"synonyms": ["items", "SKUs"]}
+
+    def test_field_label(self):
+        """Field label should be stored on the Dimension."""
+        config = {
+            "version": "0.1.1",
+            "semantic_model": [{
+                "name": "test",
+                "datasets": [{
+                    "name": "events",
+                    "source": "events",
+                    "fields": [{
+                        "name": "status",
+                        "expression": {"dialects": [{"dialect": "ANSI_SQL", "expression": "status"}]},
+                        "label": "filter",
+                    }],
+                }],
+            }],
+        }
+        dims = from_config(config)["events"].op().get_dimensions()
+        assert dims["status"].label == "filter"
+
+    def test_label_round_trip(self):
+        """label survives BSL -> OSI -> BSL."""
+        import ibis
+        t = ibis.table({"status": "string"}, name="events")
+        m = to_semantic_table(t, name="events")
+        m = m.with_dimensions(status=Dimension(expr=ibis._.status, label="filter"))
+        osi = to_osi(m)
+        models = from_config(osi)
+        assert models["events"].op().get_dimensions()["status"].label == "filter"
+
+    def test_dataset_ai_context_round_trip(self):
+        """Dataset ai_context survives BSL -> OSI -> BSL."""
+        import ibis
+        t = ibis.table({"id": "int64"}, name="items")
+        m = to_semantic_table(t, name="items", ai_context={"synonyms": ["products"]})
+        m = m.with_dimensions(id=Dimension(expr=ibis._.id))
+        osi = to_osi(m)
+        op = from_config(osi)["items"].op()
+        assert op.get_ai_context() == {"synonyms": ["products"]}
+
+
 # ---------------------------------------------------------------------------
 # Round-trip tests
 # ---------------------------------------------------------------------------
diff --git a/src/boring_semantic_layer/yaml.py b/src/boring_semantic_layer/yaml.py
index 6488baf4..a11945c3 100644
--- a/src/boring_semantic_layer/yaml.py
+++ b/src/boring_semantic_layer/yaml.py
@@ -112,8 +112,17 @@ def _strip_match(m: re.Match) -> str:
 # ---------------------------------------------------------------------------
 
 
-def _osi_field_to_dimension(field: dict) -> tuple[str, Dimension]:
-    """Convert an OSI field dict to a ``(name, Dimension)`` pair."""
+def _osi_field_to_dimension(
+    field: dict, primary_key_cols: set[str] | None = None
+) -> tuple[str, Dimension]:
+    """Convert an OSI field dict to a ``(name, Dimension)`` pair.
+
+    Args:
+        field: OSI field definition.
+        primary_key_cols: Column names from the dataset's ``primary_key``.
+            If the field's expression matches one of these, the dimension
+            is marked ``is_entity=True``.
+    """
     name = field["name"]
     sql_expr = _parse_osi_expression(field["expression"])
     deferred = _sql_to_deferred(sql_expr)
@@ -130,6 +139,13 @@ def _osi_field_to_dimension(field: dict) -> tuple[str, Dimension]:
     if "ai_context" in field:
         kwargs["ai_context"] = field["ai_context"]
 
+    if field.get("label"):
+        kwargs["label"] = field["label"]
+
+    # Mark as entity if the field appears in the dataset's primary_key
+    if primary_key_cols and (sql_expr in primary_key_cols or name in primary_key_cols):
+        kwargs["is_entity"] = True
+
     # Recover BSL-specific metadata stored in custom_extensions
     for ext in field.get("custom_extensions", []):
         if ext.get("vendor_name") == "COMMON":
@@ -235,13 +251,19 @@ def _from_osi_config(
                     continue
 
             model = to_semantic_table(
-                table, name=ds_name, description=ds.get("description")
+                table,
+                name=ds_name,
+                description=ds.get("description"),
+                ai_context=ds.get("ai_context"),
             )
 
+            # primary_key column names for is_entity detection
+            pk_cols = set(ds.get("primary_key") or [])
+
             # Fields -> Dimensions
             dimensions: dict[str, Dimension] = {}
             for field in ds.get("fields", []):
-                dim_name, dim = _osi_field_to_dimension(field)
+                dim_name, dim = _osi_field_to_dimension(field, pk_cols)
                 dimensions[dim_name] = dim
             if dimensions:
                 model = model.with_dimensions(**dimensions)
@@ -262,7 +284,7 @@ def _from_osi_config(
 
             result[ds_name] = model
 
-        # Relationships -> Joins
+        # Relationships -> Joins (supports multi-column keys)
         if tables and relationships:
             for rel in relationships:
                 from_ds = rel.get("from", "")
@@ -270,16 +292,29 @@ def _from_osi_config(
                 if from_ds in result and to_ds in result:
                     from_cols = rel.get("from_columns", [])
                     to_cols = rel.get("to_columns", [])
-                    if from_cols and to_cols and from_cols[0] != "unknown":
-
-                        def _make_join_cond(lc, rc):
-                            return lambda left, right: getattr(left, lc) == getattr(
-                                right, rc
-                            )
+                    if (
+                        from_cols
+                        and to_cols
+                        and len(from_cols) == len(to_cols)
+                        and from_cols[0] != "unknown"
+                    ):
+
+                        def _make_join_cond(lcols, rcols):
+                            def cond(left, right):
+                                pairs = [
+                                    getattr(left, lc) == getattr(right, rc)
+                                    for lc, rc in zip(lcols, rcols)
+                                ]
+                                result = pairs[0]
+                                for p in pairs[1:]:
+                                    result = result & p
+                                return result
+
+                            return cond
 
                         result[from_ds] = result[from_ds].join_one(
                             result[to_ds],
-                            on=_make_join_cond(from_cols[0], to_cols[0]),
+                            on=_make_join_cond(from_cols, to_cols),
                         )
 
     return result

From a57744bea194e8858d65cd8f2396d7860d1774b2 Mon Sep 17 00:00:00 2001
From: boringdata <boringdata@users.noreply.github.com>
Date: Thu, 2 Apr 2026 18:51:31 +0000
Subject: [PATCH 4/4] fix: address all 6 Codex review findings

P1 fixes:
- Calculated measures: extract formula from closure/original_expr
  instead of emitting the metric name as a self-reference
- Join key export: introspect lambda predicates by evaluating against
  mock tables and walking the Equals expression tree to extract column
  names, instead of hardcoding ["unknown"]
- Unqualified metrics (COUNT(*)): only assign to the first dataset
  instead of duplicating across all datasets in multi-dataset imports

P2 fixes:
- Relationship cardinality: read from custom_extensions and use
  join_many() when cardinality is "many" instead of always join_one()
- Expression fallback: return None for non-trivial Ibis expressions
  instead of stripping "_." prefix which leaks method syntax as
  invalid SQL

P3 fixes:
- BSL YAML measure ai_context: pass extra_kwargs["ai_context"] through
  to Measure() constructor instead of silently dropping it

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/boring_semantic_layer/osi.py  | 91 +++++++++++++++++++++++++++++--
 src/boring_semantic_layer/yaml.py | 72 ++++++++++++++++++------
 2 files changed, 139 insertions(+), 24 deletions(-)

diff --git a/src/boring_semantic_layer/osi.py b/src/boring_semantic_layer/osi.py
index c5c82f00..b4c1b3c2 100644
--- a/src/boring_semantic_layer/osi.py
+++ b/src/boring_semantic_layer/osi.py
@@ -55,9 +55,10 @@ def _ibis_string_to_sql(s: str) -> str:
     if m:
         return m.group(1)
 
-    if s.startswith("_."):
-        return s[2:]
-    return s
+    # For non-trivial expressions (concat, case, etc.) that don't match
+    # known patterns, return None to signal the caller that we can't produce
+    # valid SQL. Stripping "_." would leak Ibis method syntax.
+    return None
 
 
 def _expr_to_sql_string(expr: Any) -> str | None:
@@ -160,6 +161,43 @@ def _measure_to_osi_metric(name: str, measure: Measure, dataset_name: str | None
     return metric
 
 
+def _walk_predicate_for_columns(
+    op_node, left_cols: set[str], right_cols: set[str]
+) -> tuple[list[str], list[str]]:
+    """Extract (from_columns, to_columns) from an ibis Equals expression tree."""
+    from_list: list[str] = []
+    to_list: list[str] = []
+
+    def _extract_eq(node):
+        """Handle Equals(Field, Field) nodes."""
+        cls_name = type(node).__name__
+        if cls_name == "Equals":
+            args = node.args
+            names = []
+            for arg in args:
+                if hasattr(arg, "name") and isinstance(arg.name, str):
+                    names.append(arg.name)
+            if len(names) == 2:
+                a, b = names
+                if a in left_cols and b in right_cols:
+                    from_list.append(a)
+                    to_list.append(b)
+                elif b in left_cols and a in right_cols:
+                    from_list.append(b)
+                    to_list.append(a)
+        elif cls_name == "And":
+            for arg in node.args:
+                _extract_eq(arg)
+        # Recurse into args that are ops
+        if not from_list:
+            for arg in getattr(node, "args", []):
+                if hasattr(arg, "args"):
+                    _extract_eq(arg)
+
+    _extract_eq(op_node)
+    return from_list, to_list
+
+
 def _extract_join_info(model: SemanticModel) -> list[dict]:
     """Extract relationship info from a model's join chain."""
     from .ops import SemanticJoinOp
@@ -177,14 +215,37 @@ def _name(node) -> str:
             return _name(inner)
         return "unnamed"
 
+    def _extract_join_cols(node) -> tuple[list[str], list[str]]:
+        """Try to extract join column names from a SemanticJoinOp predicate."""
+        if node.on is None:
+            return [], []
+        try:
+            # Build mock tables from left/right schemas
+            import ibis
+            left_tbl = node.left.to_expr() if hasattr(node.left, "to_expr") else None
+            right_tbl = node.right.to_expr() if hasattr(node.right, "to_expr") else None
+            if left_tbl is None or right_tbl is None:
+                return [], []
+            # Evaluate the predicate to get an ibis expression
+            pred = node.on(left_tbl, right_tbl)
+            # Walk the expression tree to find Equals(field, field) patterns
+            op_node = pred.op()
+            from_cols, to_cols = _walk_predicate_for_columns(
+                op_node, set(left_tbl.columns), set(right_tbl.columns)
+            )
+            return from_cols, to_cols
+        except Exception:
+            return [], []
+
     def _walk(node):
         if isinstance(node, SemanticJoinOp):
+            from_cols, to_cols = _extract_join_cols(node)
             rel: dict[str, Any] = {
                 "name": f"{_name(node.left)}_{_name(node.right)}",
                 "from": _name(node.left),
                 "to": _name(node.right),
-                "from_columns": ["unknown"],
-                "to_columns": ["unknown"],
+                "from_columns": from_cols or ["unknown"],
+                "to_columns": to_cols or ["unknown"],
             }
             if hasattr(node, "cardinality"):
                 rel["custom_extensions"] = [
@@ -280,9 +341,27 @@ def to_osi(
             all_metrics.append(_measure_to_osi_metric(meas_name, meas, model_name))
 
         for cm_name, cm_fn in op.get_calculated_measures().items():
+            # Try to extract the formula from the calculated measure
+            cm_expr_str = None
+            if isinstance(cm_fn, Measure) and cm_fn.original_expr is not None:
+                cm_expr_str = _expr_to_sql_string(cm_fn.original_expr)
+            if cm_expr_str is None and isinstance(cm_fn, Measure):
+                cm_expr_str = _expr_to_sql_string(cm_fn.expr)
+            # Try inspecting closure for the source expression string
+            if cm_expr_str is None and callable(cm_fn):
+                import inspect
+                try:
+                    closure = inspect.getclosurevars(cm_fn)
+                    if "source" in closure.nonlocals:
+                        src = closure.nonlocals["source"]
+                        # Convert _.meas_a / _.meas_b style to readable form
+                        cm_expr_str = src.replace("_.", "")
+                except Exception:
+                    pass
+
             metric: dict[str, Any] = {
                 "name": cm_name,
-                "expression": _make_osi_expression(cm_name),
+                "expression": _make_osi_expression(cm_expr_str or cm_name),
             }
             if isinstance(cm_fn, Measure) and cm_fn.description:
                 metric["description"] = cm_fn.description
diff --git a/src/boring_semantic_layer/yaml.py b/src/boring_semantic_layer/yaml.py
index a11945c3..f2690d86 100644
--- a/src/boring_semantic_layer/yaml.py
+++ b/src/boring_semantic_layer/yaml.py
@@ -272,11 +272,8 @@ def _from_osi_config(
             ds_measures: dict[str, Measure] = {}
             for metric in metrics:
                 sql_expr = _parse_osi_expression(metric["expression"])
-                if f"{ds_name}." in sql_expr or not any(
-                    f"{other}." in sql_expr
-                    for other in dataset_names
-                    if other != ds_name
-                ):
+                # Explicitly references this dataset
+                if f"{ds_name}." in sql_expr:
                     meas_name, meas = _osi_metric_to_measure(metric)
                     ds_measures[meas_name] = meas
             if ds_measures:
@@ -284,7 +281,27 @@ def _from_osi_config(
 
             result[ds_name] = model
 
-        # Relationships -> Joins (supports multi-column keys)
+        # Second pass: assign unqualified metrics (no dataset. prefix) to the
+        # first dataset only, to avoid duplicating them across all datasets.
+        if datasets and metrics:
+            first_ds_name = datasets[0]["name"]
+            if first_ds_name in result:
+                unqualified: dict[str, Measure] = {}
+                for metric in metrics:
+                    sql_expr = _parse_osi_expression(metric["expression"])
+                    # Skip if it explicitly references any dataset
+                    if any(f"{dn}." in sql_expr for dn in dataset_names):
+                        continue
+                    meas_name, meas = _osi_metric_to_measure(metric)
+                    # Only add if not already present on this model
+                    if meas_name not in result[first_ds_name].op().get_measures():
+                        unqualified[meas_name] = meas
+                if unqualified:
+                    result[first_ds_name] = result[first_ds_name].with_measures(
+                        **unqualified
+                    )
+
+        # Relationships -> Joins (supports multi-column keys + cardinality)
         if tables and relationships:
             for rel in relationships:
                 from_ds = rel.get("from", "")
@@ -305,17 +322,33 @@ def cond(left, right):
                                     getattr(left, lc) == getattr(right, rc)
                                     for lc, rc in zip(lcols, rcols)
                                 ]
-                                result = pairs[0]
+                                res = pairs[0]
                                 for p in pairs[1:]:
-                                    result = result & p
-                                return result
+                                    res = res & p
+                                return res
 
                             return cond
 
-                        result[from_ds] = result[from_ds].join_one(
-                            result[to_ds],
-                            on=_make_join_cond(from_cols, to_cols),
-                        )
+                        # Detect cardinality from custom_extensions
+                        cardinality = "one"
+                        for ext in rel.get("custom_extensions", []):
+                            if ext.get("vendor_name") == "COMMON":
+                                try:
+                                    data = json.loads(ext["data"])
+                                    if data.get("cardinality") in ("one", "many"):
+                                        cardinality = data["cardinality"]
+                                except (json.JSONDecodeError, KeyError):
+                                    pass
+
+                        on_cond = _make_join_cond(from_cols, to_cols)
+                        if cardinality == "many":
+                            result[from_ds] = result[from_ds].join_many(
+                                result[to_ds], on=on_cond
+                            )
+                        else:
+                            result[from_ds] = result[from_ds].join_one(
+                                result[to_ds], on=on_cond
+                            )
 
     return result
 
@@ -368,11 +401,14 @@ def _parse_dimension_or_measure(
     expr_str, description, extra_kwargs = _parse_expression_config(name, config, metric_type)
     deferred = safe_eval(expr_str, context={"_": _}).unwrap()
     base_kwargs = {"expr": deferred, "description": description}
-    return (
-        Dimension(**base_kwargs, **extra_kwargs)
-        if metric_type == "dimension"
-        else Measure(**base_kwargs)
-    )
+    if metric_type == "dimension":
+        return Dimension(**base_kwargs, **extra_kwargs)
+    else:
+        # Pass through ai_context for measures too
+        meas_kwargs = base_kwargs
+        if "ai_context" in extra_kwargs:
+            meas_kwargs["ai_context"] = extra_kwargs["ai_context"]
+        return Measure(**meas_kwargs)
 
 
 def _parse_calc_measure(name: str, config: str | dict) -> Measure: