From 9e8f227d6e0298bf9efc99a4f1369a025eada386 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 17 Apr 2026 09:04:16 +0200 Subject: [PATCH] chore: replace custom sort postprocess with keep_model_order config datamodel-code-generator's built-in --keep-model-order already performs topological sort with class-name ordering, producing identical output to our custom alphabetizer. Enabling it in pyproject.toml lets us drop the sort_classes postprocess step and its tests. Co-Authored-By: Claude Opus 4.7 (1M context) --- pyproject.toml | 1 + scripts/postprocess_generated_models.py | 90 ------ .../unit/test_postprocess_generated_models.py | 271 ------------------ 3 files changed, 1 insertion(+), 361 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e445331e..b41e0cbc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -229,6 +229,7 @@ aliases = "datamodel_codegen_aliases.json" formatters = ["ruff-check", "ruff-format"] custom_file_header = "# generated by datamodel-codegen" disable_timestamp = true +keep_model_order = true [tool.uv] # Minimal defense against supply-chain atatcks. diff --git a/scripts/postprocess_generated_models.py b/scripts/postprocess_generated_models.py index 6336c0ee..e32ca443 100644 --- a/scripts/postprocess_generated_models.py +++ b/scripts/postprocess_generated_models.py @@ -10,15 +10,11 @@ class alongside the canonical `ErrorType(StrEnum)`. This script removes the dupl rewires references to use `ErrorType`. - Missing @docs_group decorator: Adds `@docs_group('Models')` to all model classes for API reference documentation grouping, along with the required import. -- Class sorting: Sorts class definitions alphabetically (with topological ordering to respect inheritance - dependencies), so that regeneration from a reordered OpenAPI spec produces minimal diffs. """ from __future__ import annotations -import heapq import re -from collections import defaultdict from pathlib import Path MODELS_PATH = Path(__file__).resolve().parent.parent / 'src' / 'apify_client' / '_models.py' @@ -80,97 +76,11 @@ def add_docs_group_decorators(content: str) -> str: return '\n'.join(result) -def sort_classes(content: str) -> str: - """Sort class definitions alphabetically while respecting inheritance order. - - Uses topological sorting so that base classes always appear before their subclasses, with alphabetical ordering as - the tie-breaker. This makes the output deterministic regardless of the order in the OpenAPI spec, which keeps diffs - minimal across regenerations. - - Only the class statement's base-class expression creates an ordering constraint — field type annotations are lazy - strings thanks to `from __future__ import annotations` and don't require forward declaration. - """ - lines = content.split('\n') - - # Find where class blocks start (first @docs_group decorator). - header_end = 0 - for i, line in enumerate(lines): - if line == DOCS_GROUP_DECORATOR: - header_end = i - break - - # Strip trailing blank lines from the header; we re-add spacing later. - header_lines = lines[:header_end] - while header_lines and not header_lines[-1].strip(): - header_lines.pop() - header = '\n'.join(header_lines) - - # Split the remainder into class blocks. - # Each block starts with `@docs_group('Models')` on its own line. - rest = '\n'.join(lines[header_end:]) - decorator_escaped = re.escape(DOCS_GROUP_DECORATOR) - raw_blocks = re.split(rf'(?=^{decorator_escaped}$)', rest, flags=re.MULTILINE) - blocks = [b.strip() for b in raw_blocks if b.strip()] - - # Parse each block: extract class name and base-class dependencies. - class_blocks: dict[str, str] = {} - class_deps: dict[str, set[str]] = {} - - for block in blocks: - match = re.search(r'^class\s+(\w+)\(([^)]+)\):', block, re.MULTILINE) - if not match: - continue - class_name = match.group(1) - base_expr = match.group(2) - - # Collect all capitalized identifiers from the base-class expression. - referenced = set(re.findall(r'\b([A-Z]\w+)\b', base_expr)) - class_blocks[class_name] = block - class_deps[class_name] = referenced - - if len(class_blocks) != len(blocks): - # Some blocks didn't match the class regex — fall back to avoid data loss. - return content - - all_names = set(class_blocks) - - # Build the dependency graph (only in-file references matter). - in_degree: dict[str, int] = {} - reverse: dict[str, set[str]] = defaultdict(set) - - for name, refs in class_deps.items(): - local_deps = (refs & all_names) - {name} - in_degree[name] = len(local_deps) - for dep in local_deps: - reverse[dep].add(name) - - # Kahn's algorithm with a min-heap for alphabetical tie-breaking. - heap = sorted(name for name, degree in in_degree.items() if degree == 0) - heapq.heapify(heap) - - sorted_names: list[str] = [] - while heap: - name = heapq.heappop(heap) - sorted_names.append(name) - for dependent in reverse[name]: - in_degree[dependent] -= 1 - if in_degree[dependent] == 0: - heapq.heappush(heap, dependent) - - if len(sorted_names) != len(class_blocks): - # Cycle detected — fall back to the original order to avoid data loss. - return content - - sorted_blocks = [class_blocks[name] for name in sorted_names] - return header + '\n\n\n' + '\n\n\n'.join(sorted_blocks) + '\n' - - def main() -> None: content = MODELS_PATH.read_text() fixed = fix_discriminators(content) fixed = deduplicate_error_type_enum(fixed) fixed = add_docs_group_decorators(fixed) - fixed = sort_classes(fixed) if fixed != content: MODELS_PATH.write_text(fixed) diff --git a/tests/unit/test_postprocess_generated_models.py b/tests/unit/test_postprocess_generated_models.py index 99c275d6..d48c4121 100644 --- a/tests/unit/test_postprocess_generated_models.py +++ b/tests/unit/test_postprocess_generated_models.py @@ -1,30 +1,13 @@ from __future__ import annotations -import re import textwrap from scripts.postprocess_generated_models import ( add_docs_group_decorators, deduplicate_error_type_enum, fix_discriminators, - sort_classes, ) -# -- Helpers ------------------------------------------------------------------ - - -def _make_file(header: str, classes: list[str]) -> str: - """Build a fake models file with header and decorated class blocks.""" - parts = [header.rstrip()] - parts.extend(f"@docs_group('Models')\n{cls}" for cls in classes) - return '\n\n\n'.join(parts) + '\n' - - -def _extract_class_names(content: str) -> list[str]: - """Extract class names in order of appearance.""" - return re.findall(r'^class\s+(\w+)\(', content, re.MULTILINE) - - # -- fix_discriminators ------------------------------------------------------- @@ -246,254 +229,6 @@ def test_add_docs_group_decorators_no_classes() -> None: assert "@docs_group('Models')" not in result -# -- sort_classes ------------------------------------------------------------- - - -def test_sort_classes_alphabetically() -> None: - content = _make_file( - 'from pydantic import BaseModel\n', - [ - 'class Charlie(BaseModel):\n pass', - 'class Alpha(BaseModel):\n pass', - 'class Bravo(BaseModel):\n pass', - ], - ) - result = sort_classes(content) - names = _extract_class_names(result) - assert names == ['Alpha', 'Bravo', 'Charlie'] - - -def test_sort_classes_respects_inheritance_order() -> None: - """A child class must come after its parent, even if alphabetically first.""" - content = _make_file( - 'from pydantic import BaseModel\n', - [ - 'class Apple(Fruit):\n pass', - 'class Fruit(BaseModel):\n pass', - ], - ) - result = sort_classes(content) - names = _extract_class_names(result) - assert names.index('Fruit') < names.index('Apple') - - -def test_sort_classes_diamond_inheritance() -> None: - content = _make_file( - 'from pydantic import BaseModel\n', - [ - 'class Diamond(Left, Right):\n pass', - 'class Right(Base):\n pass', - 'class Left(Base):\n pass', - 'class Base(BaseModel):\n pass', - ], - ) - result = sort_classes(content) - names = _extract_class_names(result) - assert names.index('Base') < names.index('Left') - assert names.index('Base') < names.index('Right') - assert names.index('Left') < names.index('Diamond') - assert names.index('Right') < names.index('Diamond') - - -def test_sort_classes_alphabetical_tiebreaking() -> None: - content = _make_file( - 'from pydantic import BaseModel\n', - [ - 'class Zulu(BaseModel):\n pass', - 'class Mike(BaseModel):\n pass', - 'class Alpha(BaseModel):\n pass', - ], - ) - result = sort_classes(content) - names = _extract_class_names(result) - assert names == ['Alpha', 'Mike', 'Zulu'] - - -def test_sort_classes_already_sorted_is_stable() -> None: - content = _make_file( - 'from pydantic import BaseModel\n', - [ - 'class Alpha(BaseModel):\n pass', - 'class Bravo(BaseModel):\n pass', - 'class Charlie(BaseModel):\n pass', - ], - ) - result = sort_classes(content) - assert result == content - - -def test_sort_classes_preserves_header() -> None: - header = 'from __future__ import annotations\n\nfrom pydantic import BaseModel\n\nX = 1\n' - content = _make_file( - header, - [ - 'class Bravo(BaseModel):\n pass', - 'class Alpha(BaseModel):\n pass', - ], - ) - result = sort_classes(content) - assert result.startswith('from __future__ import annotations\n\nfrom pydantic import BaseModel\n\nX = 1') - - -def test_sort_classes_preserves_class_body() -> None: - body_a = 'class Alpha(BaseModel):\n name: str\n age: int = 0' - body_b = 'class Bravo(BaseModel):\n value: float' - content = _make_file('from pydantic import BaseModel\n', [body_b, body_a]) - result = sort_classes(content) - assert body_a in result - assert body_b in result - - -def test_sort_classes_chain_inheritance() -> None: - """Child -> Parent -> GrandParent must preserve order GrandParent, Parent, Child.""" - content = _make_file( - 'from pydantic import BaseModel\n', - [ - 'class Child(Parent):\n pass', - 'class Parent(GrandParent):\n pass', - 'class GrandParent(BaseModel):\n pass', - ], - ) - result = sort_classes(content) - names = _extract_class_names(result) - assert names == ['GrandParent', 'Parent', 'Child'] - - -def test_sort_classes_ignores_external_base_classes() -> None: - content = _make_file( - 'from pydantic import BaseModel\n', - [ - 'class Zeta(BaseModel):\n pass', - 'class Alpha(BaseModel):\n pass', - ], - ) - result = sort_classes(content) - names = _extract_class_names(result) - assert names == ['Alpha', 'Zeta'] - - -def test_sort_classes_self_reference_in_base_ignored() -> None: - """A class listing itself in the base expression should not deadlock.""" - content = _make_file( - 'from pydantic import BaseModel\n', - [ - 'class Foo(BaseModel):\n pass', - ], - ) - result = sort_classes(content) - assert 'class Foo(BaseModel)' in result - - -def test_sort_classes_single_class() -> None: - content = _make_file( - 'from pydantic import BaseModel\n', - [ - 'class Only(BaseModel):\n name: str', - ], - ) - result = sort_classes(content) - assert 'class Only(BaseModel)' in result - - -def test_sort_classes_generic_base_class() -> None: - """Classes with generic bases like RootModel[list[Foo]] should parse correctly.""" - content = _make_file( - 'from pydantic import BaseModel, RootModel\n', - [ - 'class FooList(RootModel[list[Foo]]):\n pass', - 'class Foo(BaseModel):\n name: str', - ], - ) - result = sort_classes(content) - names = _extract_class_names(result) - # Foo is referenced in FooList's base expression, so Foo must come first. - assert names.index('Foo') < names.index('FooList') - - -def test_sort_classes_multiple_independent_trees() -> None: - """Two separate inheritance trees should each be sorted correctly.""" - content = _make_file( - 'from pydantic import BaseModel\n', - [ - 'class DogBreed(Dog):\n pass', - 'class Cat(BaseModel):\n pass', - 'class Dog(BaseModel):\n pass', - 'class Ant(BaseModel):\n pass', - ], - ) - result = sort_classes(content) - names = _extract_class_names(result) - assert names.index('Dog') < names.index('DogBreed') - # All independent classes should be in alphabetical order relative to each other. - independent = [n for n in names if n != 'DogBreed'] - assert independent == sorted(independent) - - -def test_sort_classes_fallback_on_cycle() -> None: - """If there's a cycle in inheritance, the original content is returned unchanged.""" - content = _make_file( - 'from pydantic import BaseModel\n', - [ - 'class A(B):\n pass', - 'class B(A):\n pass', - ], - ) - result = sort_classes(content) - assert result == content - - -def test_sort_classes_fallback_on_unparsable_block() -> None: - header = 'from pydantic import BaseModel' - # Insert a decorated block that has no valid class definition. - content = ( - f'{header}\n\n\n' - f"@docs_group('Models')\n" - f'# This is not a class\n' - f'some_var = 1\n\n\n' - f"@docs_group('Models')\n" - f'class Foo(BaseModel):\n' - f' pass\n' - ) - result = sort_classes(content) - assert result == content - - -def test_sort_classes_multiline_class_body_preserved() -> None: - """Ensure multi-line class bodies with various field types are fully preserved.""" - body = textwrap.dedent("""\ - class Config(BaseModel): - name: str - value: int = 0 - tags: list[str] = Field(default_factory=list) - - def validate_name(self) -> None: - pass""") - content = _make_file( - 'from pydantic import BaseModel, Field\n', - [ - 'class Zebra(BaseModel):\n pass', - body, - ], - ) - result = sort_classes(content) - names = _extract_class_names(result) - assert names == ['Config', 'Zebra'] - assert 'def validate_name(self) -> None:' in result - - -def test_sort_classes_strips_trailing_header_blanks() -> None: - """Trailing blank lines in the header should not cause extra spacing.""" - content = _make_file( - 'from pydantic import BaseModel\n\n\n\n', - [ - 'class Foo(BaseModel):\n pass', - ], - ) - result = sort_classes(content) - # Should not have more than 3 consecutive newlines. - assert '\n\n\n\n' not in result - - # -- Integration: full pipeline ----------------------------------------------- @@ -519,7 +254,6 @@ class Alpha(BaseModel): result = fix_discriminators(content) result = deduplicate_error_type_enum(result) result = add_docs_group_decorators(result) - result = sort_classes(result) # Discriminator fixed. assert "discriminator='pricing_model'" in result @@ -531,8 +265,3 @@ class Alpha(BaseModel): # Decorators added. assert "@docs_group('Models')" in result - - # Classes sorted (Alpha before ErrorResponse before Zebra). - names = _extract_class_names(result) - model_names = [n for n in names if n != 'ErrorType'] - assert model_names == sorted(model_names)