From 0c2776b05a409fa20639e8b0c9eb51fc53376a04 Mon Sep 17 00:00:00 2001 From: bosd <5e2fd43-d292-4c90-9d1f-74ff3436329a@anonaddy.me> Date: Thu, 11 Jun 2026 11:10:36 +0200 Subject: [PATCH 1/2] feat(orm): --resolve-relation CLI flag + ORM-minimizing docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The resolve_relations engine hook (pre-resolve relations in Polars so Odoo skips name_search) was only reachable programmatically / via a transform script. Add a repeatable --resolve-relation flag (source_column:model:key_field:relation_field [:xmlid|dbid]) that parses into the resolve_relations spec list. Also document the ORM-minimizing optimizations (pre-resolve relations, --skip-unchanged anti-join, side-effect suppression, --auto-clean) in the performance-tuning guide — they were implemented but undocumented. Parser + CLI tests included. --- docs/guides/performance_tuning.md | 51 +++++++++++++++++ src/fluvo/__main__.py | 53 ++++++++++++++++++ tests/test_resolve_relations_cli.py | 86 +++++++++++++++++++++++++++++ 3 files changed, 190 insertions(+) create mode 100644 tests/test_resolve_relations_cli.py diff --git a/docs/guides/performance_tuning.md b/docs/guides/performance_tuning.md index 7b011d3c..f14ccc5a 100644 --- a/docs/guides/performance_tuning.md +++ b/docs/guides/performance_tuning.md @@ -395,6 +395,57 @@ The choice of mappers can impact performance. **Recommendation**: If you need to map values based on data in Odoo, it is much more performant to first export the necessary mapping data from Odoo (e.g., using `fluvo export`) into a Python dictionary or a separate CSV file, and then use the much faster `mapper.map_val` or other in-memory lookups to do the translation. +--- + +## Minimizing Odoo's ORM Work + +For most imports the wall-clock time is dominated by what **Odoo's ORM does on the +server** — resolving each relational value with a `name_search`, recomputing fields, +firing mail/tracking side-effects — not by the client sending data. The biggest wins +come from doing that work on the client, in Polars, *before* `load()`. + +### Pre-resolve relations (`--resolve-relation`) + +When a column holds a natural key (a country name, a partner reference), Odoo would +normally run a `name_search` **per row** to turn it into a database id. Instead, fluvo +can resolve the whole column in one vectorized Polars join against a cached id-map of +the related model, and hand `load()` an already-resolved `field/id` column — so Odoo +performs **no `name_search`** for that field. + +```bash +# 'country' column holds res.country codes -> resolve into country_id, no name_search. +fluvo import --connection-file conf/connection.conf --file partners.csv \ + --model res.partner \ + --resolve-relation country:res.country:code:country_id +``` + +Format: `source_column:model:key_field:relation_field[:xmlid|dbid]` (repeatable). +`xmlid` (default) is portable; `dbid` is fastest (zero server resolution) but +database-specific. The id-map is cached to parquet and reused across runs. From a +transform script, the same is available as `Processor.resolve_relation(...)`. + +### Skip unchanged records (`--skip-unchanged`) + +On a re-import, fluvo can fetch the current field values, compare them to the incoming +rows with a **vectorized Polars anti-join**, and send only the rows that are new or +changed. Re-running an unchanged dataset then sends ~0 rows. + +```bash +fluvo import --connection-file conf/connection.conf --file partners.csv \ + --model res.partner --skip-unchanged +``` + +### Suppress side-effects (default) and auto-clean + +By default fluvo imports with `tracking_disable`, `mail_create_nolog`, and +`mail_notrack` set, so Odoo skips chatter/tracking work (override any of them with +`--context '{"tracking_disable": false}'`). `--auto-clean` applies safe, type-aware +coercions (whitespace, null tokens, booleans) on the client before load; an +uncoercible value routes that **row** to the fail file rather than aborting the batch. + +> These optimizations are all opt-in (except the default side-effect suppression) and +> correctness-preserving: the resulting Odoo state is identical to a naive import. + --- ## Performance Strategy for Relational Data (Automatic Two-Pass Import) diff --git a/src/fluvo/__main__.py b/src/fluvo/__main__.py index 83fff4ee..65276099 100644 --- a/src/fluvo/__main__.py +++ b/src/fluvo/__main__.py @@ -30,6 +30,45 @@ from .writer import run_write +def _parse_resolve_relation_specs(specs: tuple[str, ...]) -> list[dict[str, Any]]: + """Parse --resolve-relation strings into resolve_relations specs. + + Each string is ``source_column:model:key_field:relation_field[:to]`` where + ``to`` is ``xmlid`` (default) or ``dbid``. + + Args: + specs: Raw --resolve-relation option values. + + Returns: + list[dict[str, Any]]: Spec dicts for run_import's resolve_relations. + + Raises: + click.BadParameter: If a spec string is malformed. + """ + parsed: list[dict[str, Any]] = [] + for raw in specs: + parts = [p.strip() for p in raw.split(":")] + if len(parts) not in (4, 5): + raise click.BadParameter( + f"--resolve-relation {raw!r}: expected " + "'source_column:model:key_field:relation_field[:xmlid|dbid]'." + ) + spec: dict[str, Any] = { + "source_column": parts[0], + "model": parts[1], + "key_field": parts[2], + "relation_field": parts[3], + } + if len(parts) == 5: + if parts[4] not in ("xmlid", "dbid"): + raise click.BadParameter( + f"--resolve-relation {raw!r}: 'to' must be 'xmlid' or 'dbid'." + ) + spec["to"] = parts[4] + parsed.append(spec) + return parsed + + def _run_dry_run_validation(connection_file: str, **kwargs: Any) -> None: """Run dry-run validation mode without importing.""" from .lib.conf_lib import get_connection_from_config, get_connection_from_dict @@ -1107,6 +1146,15 @@ def vat_validate_cmd( help="Apply safe, type-aware coercions before load (strip whitespace, " "normalize null tokens, canonicalize booleans). Off by default.", ) +@click.option( + "--resolve-relation", + "resolve_relation_specs", + multiple=True, + help="Pre-resolve a relation column in Polars before load, so Odoo performs " + "no name_search for it. Format " + "'source_column:model:key_field:relation_field[:xmlid|dbid]'. Repeatable. " + "Example: --resolve-relation country:res.country:code:country_id", +) @click.option( "--defer-parent-store", is_flag=True, @@ -1393,6 +1441,11 @@ def import_cmd(connection_file: str, **kwargs: Any) -> None: # noqa: C901 context["fallback_values"] = fallback_values kwargs["context"] = context + resolve_relation_specs = kwargs.pop("resolve_relation_specs", ()) + if resolve_relation_specs: + kwargs["resolve_relations"] = _parse_resolve_relation_specs( + resolve_relation_specs + ) # Handle groupby option groupby = kwargs.get("groupby") diff --git a/tests/test_resolve_relations_cli.py b/tests/test_resolve_relations_cli.py new file mode 100644 index 00000000..bd88a7a7 --- /dev/null +++ b/tests/test_resolve_relations_cli.py @@ -0,0 +1,86 @@ +"""Tests for the --resolve-relation CLI flag and its parser.""" + +from unittest.mock import MagicMock, patch + +import click +import pytest +from click.testing import CliRunner + +from fluvo import __main__ +from fluvo.__main__ import _parse_resolve_relation_specs + + +def test_parse_four_part_spec() -> None: + """A 4-part spec parses to a dict without a 'to' key.""" + out = _parse_resolve_relation_specs(("country:res.country:code:country_id",)) + assert out == [ + { + "source_column": "country", + "model": "res.country", + "key_field": "code", + "relation_field": "country_id", + } + ] + + +def test_parse_five_part_spec_includes_to() -> None: + """A 5-part spec carries the 'to' target.""" + out = _parse_resolve_relation_specs(("c:res.country:code:country_id:dbid",)) + assert out[0]["to"] == "dbid" + + +def test_parse_multiple_specs() -> None: + """Several specs produce several dicts.""" + out = _parse_resolve_relation_specs( + ( + "country:res.country:code:country_id", + "parent:res.partner:ref:parent_id:xmlid", + ) + ) + assert len(out) == 2 + + +def test_parse_rejects_wrong_part_count() -> None: + """A spec with too few parts is rejected.""" + with pytest.raises(click.BadParameter): + _parse_resolve_relation_specs(("country:res.country:code",)) + + +def test_parse_rejects_invalid_to() -> None: + """An invalid 'to' value is rejected.""" + with pytest.raises(click.BadParameter): + _parse_resolve_relation_specs(("c:res.country:code:country_id:nope",)) + + +@patch("fluvo.__main__.run_import") +def test_cli_resolve_relation_flows_to_run_import(mock_run_import: MagicMock) -> None: + """--resolve-relation is parsed and passed to run_import as resolve_relations.""" + mock_run_import.return_value = {"x": 1} + runner = CliRunner() + with runner.isolated_filesystem(): + with open("conn.conf", "w") as f: + f.write("[Connection]") + result = runner.invoke( + __main__.cli, + [ + "import", + "--connection-file", + "conn.conf", + "--file", + "my.csv", + "--model", + "res.partner", + "--resolve-relation", + "country:res.country:code:country_id", + ], + ) + assert result.exit_code == 0 + call_kwargs = mock_run_import.call_args.kwargs + assert call_kwargs["resolve_relations"] == [ + { + "source_column": "country", + "model": "res.country", + "key_field": "code", + "relation_field": "country_id", + } + ] From 3b4987229ef5252164c871442b08585842c54de5 Mon Sep 17 00:00:00 2001 From: bosd <5e2fd43-d292-4c90-9d1f-74ff3436329a@anonaddy.me> Date: Thu, 11 Jun 2026 11:31:32 +0200 Subject: [PATCH 2/2] fix(orm): reject empty fields in --resolve-relation specs (review) Validate the 4 required parts are non-empty (e.g. 'country::code:country_id' was accepted before), with a test. --- src/fluvo/__main__.py | 5 +++-- tests/test_resolve_relations_cli.py | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/fluvo/__main__.py b/src/fluvo/__main__.py index 65276099..0c1bc7c2 100644 --- a/src/fluvo/__main__.py +++ b/src/fluvo/__main__.py @@ -48,10 +48,11 @@ def _parse_resolve_relation_specs(specs: tuple[str, ...]) -> list[dict[str, Any] parsed: list[dict[str, Any]] = [] for raw in specs: parts = [p.strip() for p in raw.split(":")] - if len(parts) not in (4, 5): + if len(parts) not in (4, 5) or not all(parts[:4]): raise click.BadParameter( f"--resolve-relation {raw!r}: expected " - "'source_column:model:key_field:relation_field[:xmlid|dbid]'." + "'source_column:model:key_field:relation_field[:xmlid|dbid]' " + "with non-empty fields." ) spec: dict[str, Any] = { "source_column": parts[0], diff --git a/tests/test_resolve_relations_cli.py b/tests/test_resolve_relations_cli.py index bd88a7a7..8a8730a6 100644 --- a/tests/test_resolve_relations_cli.py +++ b/tests/test_resolve_relations_cli.py @@ -46,6 +46,12 @@ def test_parse_rejects_wrong_part_count() -> None: _parse_resolve_relation_specs(("country:res.country:code",)) +def test_parse_rejects_empty_fields() -> None: + """A spec with an empty required field is rejected.""" + with pytest.raises(click.BadParameter): + _parse_resolve_relation_specs(("country::code:country_id",)) + + def test_parse_rejects_invalid_to() -> None: """An invalid 'to' value is rejected.""" with pytest.raises(click.BadParameter):