From 727661d709eb833020b702c109c534079e9594be Mon Sep 17 00:00:00 2001 From: p1c2u Date: Wed, 18 Feb 2026 16:26:48 +0000 Subject: [PATCH] Optimize getitem --- .github/workflows/python-bench-regression.yml | 8 ++- pathable/paths.py | 34 ++++++++-- tests/benchmarks/bench_lookup.py | 38 +++++++++++ tests/unit/test_paths.py | 68 +++++++++++++++++-- 4 files changed, 137 insertions(+), 11 deletions(-) diff --git a/.github/workflows/python-bench-regression.yml b/.github/workflows/python-bench-regression.yml index 527da4c..a073239 100644 --- a/.github/workflows/python-bench-regression.yml +++ b/.github/workflows/python-bench-regression.yml @@ -15,8 +15,8 @@ jobs: with: suffix: head quick: true - repeats: "3" - warmup_loops: "0" + repeats: "5" + warmup_loops: "1" artifact_name: pathable-bench-pr-head compare: @@ -38,7 +38,9 @@ jobs: id: baseline-cache uses: actions/cache/restore@v4 with: - path: reports + path: | + reports/bench-parse.baseline.json + reports/bench-lookup.baseline.json key: bench-baseline-py3.12-${{ github.event.pull_request.base.sha }} restore-keys: | bench-baseline-py3.12- diff --git a/pathable/paths.py b/pathable/paths.py index ee1ca2c..39b61ce 100644 --- a/pathable/paths.py +++ b/pathable/paths.py @@ -429,10 +429,36 @@ def __iter__(self: TAccessorPath) -> Iterator[TAccessorPath]: def __getitem__(self: TAccessorPath, key: K) -> V | TAccessorPath: """Access a child path's value.""" - path = self // key - if path.is_traversable(): - return path - return cast(V, path.read_value()) + path: TAccessorPath | None = None + + # Fast path: if accessor supports direct traversal helpers, resolve the + # child once and classify it without repeating full-path lookups. + try: + parent = self.accessor[self.parts] + child = self.accessor._get_subnode(parent, key) + except NotImplementedError: + # Compatibility path for accessors that only implement keys/read. + path = self // key + if path.is_traversable(): + return path + return cast(V, path.read_value()) + + try: + if self.accessor._is_traversable_node(child): + path = self._make_child_relpath(key) + return path + except NotImplementedError: + if path is None: + path = self // key + if path.is_traversable(): + return path + + try: + return cast(V, self.accessor._read_node(child)) + except NotImplementedError: + if path is None: + path = self // key + return cast(V, path.read_value()) def __contains__(self, key: K) -> bool: """Check if a key exists in the path. diff --git a/tests/benchmarks/bench_lookup.py b/tests/benchmarks/bench_lookup.py index 93e13b1..294f0b3 100644 --- a/tests/benchmarks/bench_lookup.py +++ b/tests/benchmarks/bench_lookup.py @@ -82,6 +82,44 @@ def main(argv: Iterable[str] | None = None) -> int: ) ) + # __getitem__ leaf read: should return value for non-traversable child. + leaf_parent = LookupPath.from_lookup( + {"root": {"branch": {"leaf": "value"}}}, "root", "branch" + ) + + def getitem_leaf(_p: LookupPath = leaf_parent) -> None: + _ = _p["leaf"] + + loops_getitem_leaf = 200_000 if not args.quick else 20_000 + results.append( + run_benchmark( + "lookup.getitem.leaf", + getitem_leaf, + loops=loops_getitem_leaf, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + + # __getitem__ branch read: should return child path for traversable child. + branch_parent = LookupPath.from_lookup( + {"root": {"branch": {"child": {"x": 1}}}}, "root", "branch" + ) + + def getitem_branch(_p: LookupPath = branch_parent) -> None: + _ = _p["child"] + + loops_getitem_branch = 200_000 if not args.quick else 20_000 + results.append( + run_benchmark( + "lookup.getitem.branch", + getitem_branch, + loops=loops_getitem_branch, + repeats=repeats, + warmup_loops=warmup_loops, + ) + ) + # Cache miss cost: disable cache and repeatedly read. deep_accessor = deep.accessor if not isinstance(deep_accessor, LookupAccessor): diff --git a/tests/unit/test_paths.py b/tests/unit/test_paths.py index 127477a..6d70196 100644 --- a/tests/unit/test_paths.py +++ b/tests/unit/test_paths.py @@ -38,12 +38,10 @@ def read(self, parts: Sequence[Hashable]) -> Mapping[Hashable, Any] | Any: return self._content -class MockTraversableAccessor( - NodeAccessor[Mapping[Hashable, Any], Hashable, Any] -): +class MockTraversableAccessor(NodeAccessor[Mapping[Any, Any], Hashable, Any]): """Mock accessor that implements _get_subnode for testing fast paths.""" - def __init__(self, node: Mapping[Hashable, Any]): + def __init__(self, node: Mapping[Any, Any]): super().__init__(node) def keys(self, parts: Sequence[Hashable]) -> Any: @@ -73,6 +71,57 @@ def _get_subnode(cls, node: Any, part: Hashable) -> Any: raise KeyError(part) from exc +class CountingTraversableAccessor( + NodeAccessor[Mapping[Any, Any], Hashable, Any] +): + """Accessor that counts traversal operations.""" + + def __init__(self, node: Mapping[Any, Any]): + super().__init__(node) + self.get_node_calls = 0 + + def keys(self, parts: Sequence[Hashable]) -> Any: + node = self._get_node(self.node, parts) + if isinstance(node, Mapping): + return list(node.keys()) + raise KeyError + + def len(self, parts: Sequence[Hashable]) -> int: + keys = self.keys(parts) + return len(keys) + + def read(self, parts: Sequence[Hashable]) -> Any: + return self._read_node(self._get_node(self.node, parts)) + + @classmethod + def _is_traversable_node(cls, node: Mapping[Any, Any] | Any) -> bool: + return isinstance(node, Mapping) + + @classmethod + def _read_node(cls, node: Any) -> Any: + return node + + @classmethod + def _get_subnode(cls, node: Any, part: Hashable) -> Any: + if not isinstance(node, Mapping): + raise KeyError(part) + try: + return node[part] + except KeyError as exc: + raise KeyError(part) from exc + + @classmethod + def _get_node(cls, node: Any, parts: Sequence[Hashable]) -> Any: + current = node + for part in parts: + current = cls._get_subnode(current, part) + return current + + def __getitem__(self, parts: Sequence[Hashable]) -> Any: + self.get_node_calls += 1 + return super().__getitem__(parts) + + class MockPart(str): """Mock resource for testing purposes.""" @@ -1024,6 +1073,17 @@ def test_invalid(self): p["test4"] +class TestAccessorPathGetItemPerformance: + def test_single_traversal_for_leaf_value(self): + accessor = CountingTraversableAccessor({"a": {"b": "value"}}) + p = AccessorPath(accessor, "a") + + result = p["b"] + + assert result == "value" + assert accessor.get_node_calls == 1 + + class TestLookupPathReadValue: @pytest.mark.parametrize( "resource,args,expected",