Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
390 changes: 357 additions & 33 deletions docs/Auto_Docs.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@ class BFDataFrame: # pragma: no cover - placeholder for runtime type hints
...


__lineage__ = {
"email_domain": [
{
"from_relation": "seed_users",
"from_column": "email",
"transformed": True,
"confidence": "annotated",
}
],
"latest_signup_date": [
{"from_relation": "seed_users", "from_column": "signup_date", "confidence": "annotated"}
],
}


def _get_bigframes() -> Any:
try:
import bigframes.pandas as bpd_mod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,21 @@
from fastflowtransform import engine_model


__lineage__ = {
"email_domain": [
{
"from_relation": "seed_users",
"from_column": "email",
"transformed": True,
"confidence": "annotated",
}
],
"latest_signup_date": [
{"from_relation": "seed_users", "from_column": "signup_date", "confidence": "annotated"}
],
}


@engine_model(
env_match={
"FF_ENGINE": "bigquery",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,21 @@
F = Any


__lineage__ = {
"email_domain": [
{
"from_relation": "seed_users",
"from_column": "email",
"transformed": True,
"confidence": "annotated",
}
],
"latest_signup_date": [
{"from_relation": "seed_users", "from_column": "signup_date", "confidence": "annotated"}
],
}


def _get_spark_utils() -> tuple[Any, Any]:
try:
from pyspark.sql import Window as _Window
Expand Down
15 changes: 15 additions & 0 deletions examples/basic_demo/models/engines/duckdb/mart_latest_signup.ff.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,21 @@
from fastflowtransform import engine_model


__lineage__ = {
"email_domain": [
{
"from_relation": "seed_users",
"from_column": "email",
"transformed": True,
"confidence": "annotated",
}
],
"latest_signup_date": [
{"from_relation": "seed_users", "from_column": "signup_date", "confidence": "annotated"}
],
}


@engine_model(
only="duckdb",
name="mart_latest_signup",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,21 @@
from fastflowtransform import engine_model


__lineage__ = {
"email_domain": [
{
"from_relation": "seed_users",
"from_column": "email",
"transformed": True,
"confidence": "annotated",
}
],
"latest_signup_date": [
{"from_relation": "seed_users", "from_column": "signup_date", "confidence": "annotated"}
],
}


@engine_model(
only="postgres",
name="mart_latest_signup",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@
WindowSpec = Any


__lineage__ = {
"email_domain": [
{
"from_relation": "seed_users",
"from_column": "email",
"transformed": True,
"confidence": "annotated",
}
],
"latest_signup_date": [
{"from_relation": "seed_users", "from_column": "signup_date", "confidence": "annotated"}
],
}


def _get_snowpark_utils() -> tuple[Any, Any]:
try:
from snowflake.snowpark import functions as _F
Expand Down
6 changes: 6 additions & 0 deletions examples/basic_demo/models/marts/mart_users_by_domain.ff.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
],
) }}

/* lineage:
user_count <- {{ ref('users_clean.ff') }}.user_id xform
first_signup <- {{ ref('users_clean.ff') }}.signup_date xform
last_signup <- {{ ref('users_clean.ff') }}.signup_date xform
*/

with base as (
select
email_domain,
Expand Down
3 changes: 3 additions & 0 deletions examples/basic_demo/models/staging/users_clean.ff.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
],
) }}

-- lineage: email_domain <- {{ source('crm','users') }}.email xform
-- lineage: signup_date <- {{ source('crm','users') }}.signup_date

with raw_users as (
select
cast(id as integer) as user_id,
Expand Down
5 changes: 5 additions & 0 deletions examples/basic_demo/project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ docs:
description: "Lowercased domain extracted from email."
mart_users_by_domain.ff:
description: "Aggregates signup counts per email domain."
lineage:
user_count:
transformed: true
from:
- { table: users_clean, column: user_id }

# Project-level variables accessible via {{ var('key') }} inside models.
# Example:
Expand Down
63 changes: 62 additions & 1 deletion src/fastflowtransform/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,63 @@ def _clean_lineage(lin: dict[str, list[dict[str, Any]]]) -> dict[str, list[dict[
return out


def _build_lineage_ref_map(models: list[ModelDoc], sources: list[SourceDoc]) -> dict[str, str]:
"""
Build a mapping of "friendly"/logical relation names -> fully qualified relation strings
that exist in the docs manifest (so the SPA can link them).

Examples it tries to cover:
- model name: "users_clean.ff" -> "<schema>.users_clean"
- model name without ".ff": "users_clean" -> "<schema>.users_clean"
- relation tail: "users_clean" -> "<schema>.users_clean"
- source key: "crm.users" -> "<schema>.seed_users"
- relation tail: "seed_users" -> "<schema>.seed_users"
"""
ref_map: dict[str, str] = {}

def add(alias: str | None, target_rel: str | None) -> None:
a = str(alias or "").strip()
t = str(target_rel or "").strip()
if not a or not t:
return
# preserve first writer to reduce accidental collisions
ref_map.setdefault(a, t)
ref_map.setdefault(a.lower(), t)
ref_map.setdefault(a.upper(), t)

def add_relation_tails(rel: str, target_rel: str) -> None:
rel_s = str(rel or "").strip()
if not rel_s:
return
# Add last identifier and last two identifiers (schema.table) as aliases
# (works for db.schema.table too)
parts = [p for p in rel_s.replace("`", "").replace('"', "").split(".") if p]
if not parts:
return
add(parts[-1], target_rel)
if len(parts) >= 2:
add(".".join(parts[-2:]), target_rel)

# Models
for m in models or []:
add(m.name, m.relation)
if m.name.endswith(".ff"):
add(m.name[:-3], m.relation) # "users_clean.ff" -> "users_clean"
else:
add(m.name + ".ff", m.relation) # "users_clean" -> "users_clean.ff" (best-effort)
add(m.relation, m.relation)
add_relation_tails(m.relation, m.relation)

# Sources
for s in sources or []:
add(f"{s.source_name}.{s.table_name}", s.relation)
add(s.table_name, s.relation)
add(s.relation, s.relation)
add_relation_tails(s.relation, s.relation)

return ref_map


def _infer_and_attach_lineage(
models: list[ModelDoc],
executor: Any | None,
Expand All @@ -840,6 +897,7 @@ def _infer_and_attach_lineage(
*,
with_schema: bool,
rendered_sql_by_model: dict[str, str] | None = None,
lineage_ref_map: dict[str, str] | None = None,
) -> None:
"""Best-effort Lineage ermitteln (SQL/Python) und auf Columns mappen."""
for m in models:
Expand Down Expand Up @@ -870,7 +928,7 @@ def _infer_and_attach_lineage(
elif m.kind == "python":
func = getattr(REGISTRY, "py_funcs", {}).get(m.name)
src = m.python_source or (inspect.getsource(func) if callable(func) else "")
inferred = infer_py_lineage(src)
inferred = infer_py_lineage(src, ref_map=lineage_ref_map)
_mark_lineage_confidence(inferred, "inferred")
except Exception:
inferred = {}
Expand Down Expand Up @@ -1076,13 +1134,16 @@ def render_site(
cols_by_table = _collect_columns(executor) if (executor and with_schema) else {}

_apply_descriptions_to_models(models, docs_meta, cols_by_table, with_schema=with_schema)
lineage_ref_map = _build_lineage_ref_map(models, sources)

_infer_and_attach_lineage(
models,
executor,
docs_meta,
cols_by_table,
with_schema=with_schema,
rendered_sql_by_model=(rendered_sql_by_model or None),
lineage_ref_map=lineage_ref_map,
)

used_by = _reverse_deps(nodes)
Expand Down
4 changes: 2 additions & 2 deletions src/fastflowtransform/templates/assets/spa.js
Original file line number Diff line number Diff line change
Expand Up @@ -2759,7 +2759,7 @@ function renderLineage(state, items) {
renderRelationColRef(state, it.from_relation, it.from_column),
" ",
renderConfPill(conf),
it.transformed ? el("span", { class: "pillSmall" }, "XFORM") : ""
it.transformed ? el("span", { class: "pillSmall" }, "TRANSFORMED") : ""
)
);
}
Expand Down Expand Up @@ -3037,7 +3037,7 @@ function buildColumnsCard(state, m, colFromRoute) {
renderRelationColRef(state, e.from_relation, e.from_column),
" ",
renderConfPill(e.confidence),
e.transformed ? el("span", { class: "pillSmall" }, "XFORM") : ""
e.transformed ? el("span", { class: "pillSmall" }, "TRANSFORMED") : ""
)
)
)
Expand Down