diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 856d88f..d6e5dc0 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -1,4 +1,4 @@ -# This file is autogenerated by maturin v1.9.6 +# This file is autogenerated by maturin v1.13.1 # To update, run # # maturin generate-ci github @@ -37,10 +37,10 @@ jobs: - runner: ubuntu-22.04 target: ppc64le steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 with: - python-version: 3.x + python-version: 3.13 - name: Build wheels uses: PyO3/maturin-action@v1 with: @@ -49,7 +49,7 @@ jobs: sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} manylinux: auto - name: Upload wheels - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: wheels-linux-${{ matrix.platform.target }} path: dist @@ -68,10 +68,10 @@ jobs: - runner: ubuntu-22.04 target: armv7 steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 with: - python-version: 3.x + python-version: 3.13 - name: Build wheels uses: PyO3/maturin-action@v1 with: @@ -80,7 +80,7 @@ jobs: sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} manylinux: musllinux_1_2 - name: Upload wheels - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: wheels-musllinux-${{ matrix.platform.target }} path: dist @@ -92,14 +92,19 @@ jobs: platform: - runner: windows-latest target: x64 + python_arch: x64 - runner: windows-latest target: x86 + python_arch: x86 + - runner: windows-11-arm + target: aarch64 + python_arch: arm64 steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 with: - python-version: 3.x - architecture: ${{ matrix.platform.target }} + python-version: 3.13 + architecture: ${{ matrix.platform.python_arch }} - name: Build wheels uses: PyO3/maturin-action@v1 with: @@ -107,7 +112,7 @@ jobs: args: --release --out dist --find-interpreter sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} - name: Upload wheels - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: wheels-windows-${{ matrix.platform.target }} path: dist @@ -117,15 +122,15 @@ jobs: strategy: matrix: platform: - - runner: macos-13 + - runner: macos-15-intel target: x86_64 - - runner: macos-14 + - runner: macos-latest target: aarch64 steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 with: - python-version: 3.x + python-version: 3.13 - name: Build wheels uses: PyO3/maturin-action@v1 with: @@ -133,7 +138,7 @@ jobs: args: --release --out dist --find-interpreter sccache: ${{ !startsWith(github.ref, 'refs/tags/') }} - name: Upload wheels - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: wheels-macos-${{ matrix.platform.target }} path: dist @@ -141,14 +146,14 @@ jobs: sdist: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Build sdist uses: PyO3/maturin-action@v1 with: command: sdist args: --out dist - name: Upload sdist - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: wheels-sdist path: dist @@ -166,14 +171,14 @@ jobs: # Used to generate artifact attestation attestations: write steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v7 - name: Generate artifact attestation - uses: actions/attest-build-provenance@v2 + uses: actions/attest-build-provenance@v3 with: subject-path: 'wheels-*/*' + - name: Install uv + if: ${{ startsWith(github.ref, 'refs/tags/') }} + uses: astral-sh/setup-uv@v7 - name: Publish to PyPI if: ${{ startsWith(github.ref, 'refs/tags/') }} - uses: PyO3/maturin-action@v1 - with: - command: upload - args: --non-interactive --skip-existing wheels-*/* + run: uv publish 'wheels-*/*' diff --git a/Cargo.lock b/Cargo.lock index 0969f9b..75dfb6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,118 +2,74 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - [[package]] name = "fast-walk" -version = "0.1.7" +version = "0.2.0" dependencies = [ - "fastset", "pyo3", ] -[[package]] -name = "fastset" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b536fec0cd9d1750df69c24c143cad5529de2d78e26a235d6d055de043f0bb6" -dependencies = [ - "nanorand", - "serde", -] - [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "indoc" -version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" -dependencies = [ - "rustversion", -] - [[package]] name = "libc" -version = "0.2.177" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" - -[[package]] -name = "memoffset" -version = "0.9.1" +version = "0.2.185" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" -dependencies = [ - "autocfg", -] - -[[package]] -name = "nanorand" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" +checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "portable-atomic" -version = "1.11.1" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "pyo3" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37a6df7eab65fc7bee654a421404947e10a0f7085b6951bf2ea395f4659fb0cf" +checksum = "91fd8e38a3b50ed1167fb981cd6fd60147e091784c427b8f7183a7ee32c31c12" dependencies = [ - "indoc", "libc", - "memoffset", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", - "unindent", ] [[package]] name = "pyo3-build-config" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f77d387774f6f6eec64a004eac0ed525aab7fa1966d94b42f743797b3e395afb" +checksum = "e368e7ddfdeb98c9bca7f8383be1648fd84ab466bf2bc015e94008db6d35611e" dependencies = [ "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dd13844a4242793e02df3e2ec093f540d948299a6a77ea9ce7afd8623f542be" +checksum = "7f29e10af80b1f7ccaf7f69eace800a03ecd13e883acfacc1e5d0988605f651e" dependencies = [ "libc", "pyo3-build-config", @@ -121,9 +77,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf8f9f1108270b90d3676b8679586385430e5c0bb78bb5f043f95499c821a71" +checksum = "df6e520eff47c45997d2fc7dd8214b25dd1310918bbb2642156ef66a67f29813" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -133,9 +89,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.27.1" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a3b2274450ba5288bc9b8c1b69ff569d1d61189d4bff38f8d22e03d17f932b" +checksum = "c4cdc218d835738f81c2338f822078af45b4afdf8b2e33cbb5916f108b813acb" dependencies = [ "heck", "proc-macro2", @@ -146,54 +102,18 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.41" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] -[[package]] -name = "rustversion" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" - -[[package]] -name = "serde" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" -dependencies = [ - "serde_core", - "serde_derive", -] - -[[package]] -name = "serde_core" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "syn" -version = "2.0.109" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f17c7e013e88258aa9543dcbe81aca68a667a9ac37cd69c9fbc07858bfe0e2f" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -202,18 +122,12 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.13.3" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c" +checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" [[package]] name = "unicode-ident" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" - -[[package]] -name = "unindent" -version = "0.2.4" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" diff --git a/Cargo.toml b/Cargo.toml index ef8e97e..ee9cab4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fast-walk" -version = "0.1.7" +version = "0.2.0" edition = "2024" license = "MIT" readme = "README.md" @@ -12,11 +12,10 @@ name = "fast_walk" crate-type = ["cdylib"] [profile.release] -debug = false +debug = "line-tables-only" # symbols for perf profiling; near-zero perf cost opt-level = 3 lto = true # Link-time optimization. -codegen-units = 1 # Slower compilation but faster code. +codegen-units = 1 # Slower compilation but faster code. [dependencies] -fastset = "0.5.2" -pyo3 = "0.27.1" +pyo3 = "0.28.3" diff --git a/README.md b/README.md index 6dc26f7..294f3c9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # fast-walk -A fast (almost) drop-in implementation of `ast.walk`. +A fast reimplementation of Python's `ast.walk`, written in Rust. ## Installation @@ -10,19 +10,50 @@ pip install fast-walk ## Usage +Two public entry points, depending on whether you care about traversal order: + ```python -from fast_walk import walk import ast +from fast_walk import walk_dfs, walk_unordered + +tree = ast.parse("def f(x): return x + 1") -code = """ -def hello( x,y, z ): - print( x+y+z ) -""" +# Strict depth-first pre-order. +for node in walk_dfs(tree): + ... -for node in walk(ast.parse(code)): - pass +# Implementation-defined order; same node set, faster. +# ast.walk itself makes no ordering guarantee, so this is a drop-in for most code. +for node in walk_unordered(tree): + ... ``` +### Which one to use + +- **`walk_unordered`** — default choice. Same set of nodes as `ast.walk` + but with better cache behavior (batched dict-metadata prefetching). + Roughly 25% faster than `walk_dfs` on real Python source. +- **`walk_dfs`** — pick this only if your code actually depends on + depth-first pre-order visitation. `ast.walk` does not document an order, + so most callers can safely use `walk_unordered`. + +## Performance + +Benchmark on CPython 3.13, walking the AST of `difflib.py` (~2000 lines, +~4300 unique AST nodes), best-of-N run pinned to a single CPU with the +`performance` governor: + +| implementation | min time | relative | +| -------------------------- | -------- | -------- | +| `ast.walk` (stdlib) | ~2.3 ms | 1× | +| pure-Python equivalent | ~1.0 ms | ~2× | +| `fast_walk.walk_dfs` | ~18 µs | ~130× | +| `fast_walk.walk_unordered` | ~13 µs | ~180× | + +Both `fast_walk` entry points are semantically equivalent to +`list(ast.walk(node))` — they return the same set of AST nodes. They +differ only in visit order. + ## Development ### Prerequisites @@ -34,14 +65,23 @@ for node in walk(ast.parse(code)): ### Building from source ```bash -# Install maturin pip install maturin -# Build the package +# Iterative builds (debug profile): maturin develop -# Or build a release version -maturin build --release +# Optimized builds for benchmarking: +maturin develop --release +``` + +### Running the tests and benchmarks + +```bash +# Correctness + refcount leak tests: +pytest tests/test_refcount.py + +# Benchmarks (codspeed, walltime mode): +pytest tests/benchmarks.py --codspeed ``` ## License diff --git a/fast_walk.pyi b/fast_walk.pyi index 874edf9..8dd5e38 100644 --- a/fast_walk.pyi +++ b/fast_walk.pyi @@ -1,4 +1,37 @@ import ast +def walk_dfs(node: ast.AST) -> list[ast.AST]: + """Return every descendant of `node` (including `node` itself) in strict + depth-first pre-order. + + Semantically equivalent to ``list(ast.walk(node))`` but much faster. + Use :func:`walk_unordered` if traversal order doesn't matter — it's + faster still. + """ + +def walk_unordered(node: ast.AST) -> list[ast.AST]: + """Return every descendant of `node` (including `node` itself) in an + implementation-defined order. + + The set of returned nodes is identical to :func:`walk_dfs` and to + :func:`ast.walk`; only the visit order differs. Since :func:`ast.walk` + makes no ordering guarantee, this is a drop-in replacement wherever + the caller does not depend on DFS order. + + Uses batched stack draining with L1 prefetch hints to hide the + cache-miss latency of scattered ``PyDictKeysObject`` loads — roughly + 25% faster than :func:`walk_dfs` on real Python source. + """ + def walk(node: ast.AST) -> list[ast.AST]: - """Return a list of all AST nodes in the tree rooted at `node`.""" + """Deprecated. Use :func:`walk_dfs` for explicit depth-first order or + :func:`walk_unordered` for the faster order-agnostic variant. + + Emits a :class:`DeprecationWarning` once per process on first call and + then delegates to :func:`walk_dfs`. + """ + +def _walk_count(node: ast.AST) -> int: + """Benchmarking-only. Traverse the AST and return the node count without + materializing a result list. + """ diff --git a/pyproject.toml b/pyproject.toml index 749a39b..b7e5870 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["maturin>=1.9,<2.0"] +requires = ["maturin>=1.13,<2.0"] build-backend = "maturin" [project] @@ -34,6 +34,6 @@ cache-keys = [ [dependency-groups] dev = [ "py-spy>=0.4.1", - "pytest>=8.4.2", - "pytest-codspeed>=4.2.0", + "pytest>=9.0.3", + "pytest-codspeed>=4.4.0", ] diff --git a/src/lib.rs b/src/lib.rs index c58e51d..9e3bd3d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,32 +1,46 @@ +//! Fast reimplementation of `ast.walk`. +//! +//! The public API exposes two traversal variants: +//! +//! - [`walk_dfs`] — strict depth-first pre-order. +//! - [`walk_unordered`] — faster; same set of nodes, implementation-defined +//! order. Uses batched prefetching to hide cache-miss latency on the +//! scattered `PyDictKeysObject` loads that dominate the DFS profile. +//! +//! `ast.walk` makes no ordering guarantee, so `walk_unordered` is a drop-in +//! replacement wherever order doesn't matter. + mod pydict; -use std::cell::RefCell; +use std::cell::Cell; +use std::sync::atomic::{AtomicBool, Ordering}; -use pyo3::ffi::{self, Py_ssize_t, PyDictObject, PyObject, PyTypeObject}; +use pyo3::exceptions::PyDeprecationWarning; +use pyo3::ffi::{self, PyDictObject, PyListObject, PyObject, PyTypeObject}; use pyo3::types::{PyList, PyModule, PyType}; use pyo3::{PyTypeInfo, prelude::*}; +/// Reverse iterator over the values of a Python dict whose keys are all +/// strings — the layout used by instance `__dict__`s. Reads the +/// `PyDictKeysObject` entry table directly and skips null (deleted) slots. pub struct ReverseDictValuesIter { entries: *const pydict::PyDictUnicodeEntry, current: usize, } impl ReverseDictValuesIter { - /// Creates a new iterator over dictionary values - /// /// # Safety /// - /// The caller must ensure that: - /// - `obj` is a valid pointer to a `PyDictObject` - /// - The dictionary remains valid for the lifetime of the iterator - /// - The dictionary is not modified while iterating + /// - `obj` must be a valid pointer to a `PyDictObject` whose keys are + /// all unicode strings (i.e. a split/combined-unicode dict). + /// - The dictionary must outlive the iterator and must not be mutated + /// while iterating. pub unsafe fn new(obj: *mut PyDictObject) -> Self { unsafe { let dict = &*obj; let keys = &*dict.ma_keys.cast::(); let entries = keys.unicode_entries(); let n = keys.dk_nentries as usize; - Self { entries, current: n, @@ -39,94 +53,211 @@ impl Iterator for ReverseDictValuesIter { type Item = *mut PyObject; fn next(&mut self) -> Option { - // Skip null entries until we find a valid one or reach the end while self.current > 0 { self.current -= 1; let entry: &pydict::PyDictUnicodeEntry = &unsafe { *self.entries.add(self.current) }; - if !entry.me_value.is_null() { return Some(entry.me_value); } } - None } } +/// Return an object's instance `__dict__` pointer via `tp_dictoffset`, or +/// `None` if the type has no dict offset or the slot is null. fn get_instance_dict_fast(obj: *mut PyObject) -> Option<*mut PyObject> { unsafe { - let dict_ptr = (*obj).ob_type.as_ref()?.tp_dictoffset; - - if dict_ptr != 0 { - let dict_ptr_addr = (obj as *mut u8).offset(dict_ptr) as *mut *mut ffi::PyObject; - let dict = *dict_ptr_addr; - - if !dict.is_null() { - return Some(dict); - } + let dict_offset = (*obj).ob_type.as_ref()?.tp_dictoffset; + if dict_offset == 0 { + return None; } - None + let dict_ptr_addr = (obj as *mut u8).offset(dict_offset) as *mut *mut ffi::PyObject; + let dict = *dict_ptr_addr; + if dict.is_null() { None } else { Some(dict) } } } -fn isinstance_of_ast( - obj: *mut PyObject, +/// Check whether `subtype` is a subclass of `ast.AST` within the first two +/// levels of the MRO. Every stdlib AST node is `Concrete -> ast.expr/stmt +/// -> ast.AST` or `Concrete -> ast.AST`, so two hops suffice. +/// +/// Performance notes baked in here: +/// - Early-exit on `first_supertype == PyBaseObject_Type`: primitives like +/// `str`, `NoneType`, `float`, `bytes` inherit directly from `object`, +/// AST subclasses never do. This skips the scattered second `tp_base` +/// load on ~13% of items in typical ASTs. +fn issubclass_of_ast( + subtype: *mut PyTypeObject, base_ast_and_expr_type: (*mut PyTypeObject, *mut PyTypeObject), ) -> bool { - let subtype = unsafe { ffi::Py_TYPE(obj) }; let first_supertype = unsafe { (*subtype).tp_base }; if first_supertype.is_null() { return false; } + let py_object_type = &raw mut ffi::PyBaseObject_Type; + if first_supertype == py_object_type { + return false; + } let (base_ast_type, base_expr_type) = base_ast_and_expr_type; if first_supertype == base_ast_type { return true; } - let second_supertype = unsafe { (*first_supertype).tp_base }; - second_supertype == base_ast_type || second_supertype == base_expr_type } -fn is_list(obj: *mut PyObject, py_list_type: *mut PyTypeObject) -> bool { - unsafe { ffi::Py_TYPE(obj) == py_list_type } +/// L1 prefetch hint. No-op on non-x86_64 targets — the Python extension +/// builds and runs identically without it, just without the cache-miss +/// hiding that benefits `walk_unordered`. +#[inline(always)] +unsafe fn prefetch_l1(ptr: *const u8) { + #[cfg(target_arch = "x86_64")] + unsafe { + std::arch::x86_64::_mm_prefetch(ptr as *const i8, std::arch::x86_64::_MM_HINT_T0); + } + #[cfg(not(target_arch = "x86_64"))] + { + let _ = ptr; + } } -fn get_length_of_list(obj: *mut PyObject) -> Py_ssize_t { - unsafe { ffi::PyList_GET_SIZE(obj) } +/// Resolve the `ma_keys` pointer of a node's instance dict. Used to +/// prefetch the `PyDictKeysObject` — the intermediate reads (object +/// header, type object, dict slot) are hot; only the final `ma_keys` +/// target typically misses cache. +#[inline(always)] +unsafe fn ma_keys_of(node: *mut PyObject) -> Option<*const u8> { + unsafe { + let type_ptr = (*node).ob_type; + if type_ptr.is_null() { + return None; + } + let dict_offset = (*type_ptr).tp_dictoffset; + if dict_offset == 0 { + return None; + } + let dict_ptr_addr = (node as *const u8).offset(dict_offset) as *const *mut ffi::PyObject; + let dict = *dict_ptr_addr; + if dict.is_null() { + return None; + } + let ma_keys = (*(dict as *const ffi::PyDictObject)).ma_keys; + Some(ma_keys as *const u8) + } } -fn get_item_of_list(obj: *mut PyObject, index: Py_ssize_t) -> *mut PyObject { - unsafe { ffi::PyList_GET_ITEM(obj, index) } +/// Per-node body shared by both traversals: enumerate the node's instance +/// dict, pushing AST children onto `stack` and descending into list +/// attributes (`body`, `args`, `decorator_list`, ...) to push any AST +/// items found there. +/// +/// The `int` fast-path skips ~57% of items in a real Python AST (lineno / +/// col_offset / end_lineno / end_col_offset appear on every node). +/// Expanding the fast-path to str/None/float/bytes was measured and +/// reverted — the extra compares cost more on the AST-hit items than +/// they save on the primitive items. +#[inline(always)] +unsafe fn process_node( + current_node: *mut PyObject, + base_ast_and_expr_type: (*mut PyTypeObject, *mut PyTypeObject), + py_list_type: *mut PyTypeObject, + py_long_type: *mut PyTypeObject, + stack: &mut Vec<*mut PyObject>, +) { + let Some(dict) = get_instance_dict_fast(current_node) else { + return; + }; + + for item_ptr in unsafe { ReverseDictValuesIter::new(dict.cast::()) } { + let item_type = unsafe { ffi::Py_TYPE(item_ptr) }; + if item_type == py_long_type { + continue; + } + if item_type == py_list_type { + let list = item_ptr as *mut PyListObject; + let length = unsafe { (*(list as *mut ffi::PyVarObject)).ob_size }; + let ob_item = unsafe { (*list).ob_item }; + for i in (0..length).rev() { + let child = unsafe { *ob_item.offset(i) }; + let child_type = unsafe { ffi::Py_TYPE(child) }; + if issubclass_of_ast(child_type, base_ast_and_expr_type) { + stack.push(child); + } + } + } else if issubclass_of_ast(item_type, base_ast_and_expr_type) { + stack.push(item_ptr); + } + } } -fn walk_node_iterative( +/// Strict depth-first pre-order traversal. +fn walk_node_dfs( node: *mut PyObject, base_ast_and_expr_type: (*mut PyTypeObject, *mut PyTypeObject), py_list_type: *mut PyTypeObject, result_list: &mut Vec<*mut PyObject>, ) -> PyResult<()> { let mut stack = vec![node]; + let py_long_type = &raw mut ffi::PyLong_Type; while let Some(current_node) = stack.pop() { result_list.push(current_node); + unsafe { + process_node( + current_node, + base_ast_and_expr_type, + py_list_type, + py_long_type, + &mut stack, + ); + } + } - // Walk through child nodes - let Some(dict) = get_instance_dict_fast(current_node) else { - continue; - }; - - for item_ptr in unsafe { ReverseDictValuesIter::new(dict.cast::()) } { - if isinstance_of_ast(item_ptr, base_ast_and_expr_type) { - stack.push(item_ptr); - } else if is_list(item_ptr, py_list_type) { - let length = get_length_of_list(item_ptr); - for i in (0..length).rev() { - let item_ptr = get_item_of_list(item_ptr, i); - if isinstance_of_ast(item_ptr, base_ast_and_expr_type) { - stack.push(item_ptr); - } - } + Ok(()) +} + +/// Batched traversal with prefetching. +/// +/// Drains up to `BATCH` nodes from the stack, issues an L1 prefetch for +/// each node's `PyDictKeysObject` in a tight loop, then processes each +/// node in turn. Prefetches issued in parallel hide the latency of the +/// scattered dict-keys loads that dominate the DFS profile (~20% of +/// function time). Visits the same set of nodes as `walk_node_dfs` but +/// not in strict DFS order. +fn walk_node_unordered( + node: *mut PyObject, + base_ast_and_expr_type: (*mut PyTypeObject, *mut PyTypeObject), + py_list_type: *mut PyTypeObject, + result_list: &mut Vec<*mut PyObject>, +) -> PyResult<()> { + const BATCH: usize = 4; + let mut stack = vec![node]; + let py_long_type = &raw mut ffi::PyLong_Type; + let mut batch: [*mut PyObject; BATCH] = [std::ptr::null_mut(); BATCH]; + + while !stack.is_empty() { + let take = stack.len().min(BATCH); + for slot in batch.iter_mut().take(take) { + *slot = stack.pop().unwrap(); + } + + for &node_ptr in batch.iter().take(take) { + if let Some(p) = unsafe { ma_keys_of(node_ptr) } { + unsafe { prefetch_l1(p) }; + } + } + + for ¤t in batch.iter().take(take) { + result_list.push(current); + unsafe { + process_node( + current, + base_ast_and_expr_type, + py_list_type, + py_long_type, + &mut stack, + ); } } } @@ -135,56 +266,134 @@ fn walk_node_iterative( } thread_local! { - static BASE_AST_TYPE_AND_EXPR: RefCell> = const { RefCell::new(None) }; + static BASE_AST_TYPE_AND_EXPR: Cell> = + const { Cell::new(None) }; } +/// Resolve `ast.AST` and `ast.expr` to their raw type pointers. Kept out +/// of the hot path so the importlib work doesn't inline into the walk. #[inline(never)] -fn get_base_ast_type<'py>(py: Python<'py>) -> PyResult<(*mut PyTypeObject, *mut PyTypeObject)> { +fn get_base_ast_type(py: Python<'_>) -> PyResult<(*mut PyTypeObject, *mut PyTypeObject)> { let ast_module = py.import("ast")?; let ast_class = ast_module.getattr("AST")?.cast_into::()?; let expr_class = ast_module.getattr("expr")?.cast_into::()?; - Ok((ast_class.as_type_ptr(), expr_class.as_type_ptr())) } +#[inline(always)] +fn resolve_base_types(py: Python) -> PyResult<(*mut PyTypeObject, *mut PyTypeObject)> { + BASE_AST_TYPE_AND_EXPR.with(|cache| match cache.get() { + Some(v) => Ok(v), + None => { + let v = get_base_ast_type(py)?; + cache.set(Some(v)); + Ok(v) + } + }) +} + +/// Construct a Python list from a Vec of owned-reference pointers, going +/// directly through the FFI `PyList_New` + `PyList_SET_ITEM` path. Avoids +/// the per-item `Bound` allocation in `PyList::new(iter)`. +fn vec_into_pylist<'py>(py: Python<'py>, items: &[*mut PyObject]) -> PyResult> { + let len = items.len() as ffi::Py_ssize_t; + unsafe { + let list_ptr = ffi::PyList_New(len); + if list_ptr.is_null() { + return Err(PyErr::fetch(py)); + } + let ob_item = (*(list_ptr as *mut ffi::PyListObject)).ob_item; + for (i, &ptr) in items.iter().enumerate() { + ffi::Py_INCREF(ptr); + *ob_item.add(i) = ptr; + } + Ok(Bound::from_owned_ptr(py, list_ptr)) + } +} + +/// Walk the AST rooted at `node` in strict depth-first pre-order and +/// return every descendant (including `node` itself) as a list. +/// +/// Semantically equivalent to `list(ast.walk(node))` but ~100× faster. +/// Use `walk_unordered` if traversal order doesn't matter — it's faster +/// still. #[pyfunction] -fn walk<'py>(py: Python, node: Bound<'py, PyAny>) -> PyResult> { +fn walk_dfs<'py>(py: Python<'py>, node: Bound<'py, PyAny>) -> PyResult> { let mut result_list = Vec::new(); + let base = resolve_base_types(py)?; + walk_node_dfs( + node.as_ptr(), + base, + PyList::type_object_raw(py), + &mut result_list, + )?; + vec_into_pylist(py, &result_list) +} - // Initialize if needed (separate step with mutable borrow) - BASE_AST_TYPE_AND_EXPR.with(|cache| { - if cache.borrow().is_none() { - *cache.borrow_mut() = Some(get_base_ast_type(py)?); - } - Ok::<(), PyErr>(()) - })?; - - // Now use immutable borrow for the actual work - BASE_AST_TYPE_AND_EXPR.with(|cache| { - let cache_ref = cache.borrow(); - let base_ast_and_expr_type = cache_ref.as_ref().unwrap(); - - walk_node_iterative( - node.as_ptr(), - *base_ast_and_expr_type, - PyList::type_object_raw(py), - &mut result_list, +/// Walk the AST rooted at `node` and return every descendant (including +/// `node` itself) as a list, in an implementation-defined order. +/// +/// The set of returned nodes is identical to `walk_dfs` and to +/// `ast.walk`; only the order differs. Use this whenever order is not +/// significant — batched prefetching makes it ~25% faster than +/// `walk_dfs`. +#[pyfunction] +fn walk_unordered<'py>(py: Python<'py>, node: Bound<'py, PyAny>) -> PyResult> { + let mut result_list = Vec::new(); + let base = resolve_base_types(py)?; + walk_node_unordered( + node.as_ptr(), + base, + PyList::type_object_raw(py), + &mut result_list, + )?; + vec_into_pylist(py, &result_list) +} + +static DEPRECATED_WALK_WARNED: AtomicBool = AtomicBool::new(false); + +/// Deprecated. Use `walk_dfs` for explicit depth-first order or +/// `walk_unordered` for the faster order-agnostic variant. +/// +/// Emits a `DeprecationWarning` once per process on the first call, then +/// delegates to `walk_dfs`. The warning is gated behind an atomic flag +/// so repeated calls don't pay the cost of the `warnings.warn` machinery. +#[pyfunction] +fn walk<'py>(py: Python<'py>, node: Bound<'py, PyAny>) -> PyResult> { + if !DEPRECATED_WALK_WARNED.swap(true, Ordering::Relaxed) { + let category = py.get_type::(); + PyErr::warn( + py, + &category, + c"fast_walk.walk is deprecated; use walk_dfs for strict depth-first order or walk_unordered for the faster order-agnostic variant", + 1, )?; - Ok::<(), PyErr>(()) - })?; + } + walk_dfs(py, node) +} - Ok(PyList::new( - py, - result_list - .into_iter() - .map(|ptr| unsafe { Bound::from_borrowed_ptr(py, ptr) }), - )? - .into()) +/// Benchmarking-only. Traverse the AST and return the node count without +/// materializing a result list. Isolates traversal cost from list-build +/// cost for profiling deltas. +#[pyfunction] +fn _walk_count<'py>(py: Python, node: Bound<'py, PyAny>) -> PyResult { + let mut result_list = Vec::new(); + let base = resolve_base_types(py)?; + walk_node_dfs( + node.as_ptr(), + base, + PyList::type_object_raw(py), + &mut result_list, + )?; + Ok(result_list.len()) } #[pymodule] fn fast_walk(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(walk, m)?)?; + m.add_function(wrap_pyfunction!(walk_dfs, m)?)?; + m.add_function(wrap_pyfunction!(walk_unordered, m)?)?; + m.add_function(wrap_pyfunction!(_walk_count, m)?)?; Ok(()) } diff --git a/tests/benchmarks.py b/tests/benchmarks.py index c725530..7d1428c 100644 --- a/tests/benchmarks.py +++ b/tests/benchmarks.py @@ -1,14 +1,18 @@ from ast import AST, NodeVisitor as ASTNodeVisitor, parse from ast import walk as ast_walk import ast -from collections.abc import Callable, Iterable +from collections.abc import Callable from pathlib import Path from pytest_codspeed import BenchmarkFixture -from fast_walk import walk as fast_walk +from fast_walk import walk_dfs, walk_unordered, _walk_count import pytest +def ast_walk_list(node: AST) -> list[AST]: + return list(ast_walk(node)) + + def python_walk_helper(node: AST, fields: tuple[str, ...], nodes: list[AST]): nodes.append(node) for field in fields: @@ -30,20 +34,37 @@ def python_walk(node: AST) -> list[AST]: @pytest.mark.parametrize( "algorithm", [ - ast_walk, - fast_walk, + ast_walk_list, + walk_dfs, + walk_unordered, python_walk, ], ) -def test_walk(benchmark: BenchmarkFixture, algorithm: Callable[[AST], Iterable[AST]]): +def test_walk(benchmark: BenchmarkFixture, algorithm: Callable[[AST], list[AST]]): import difflib source_code = Path(difflib.__file__).read_text() node = parse(source_code) def run(): - for _ in algorithm(node): - pass + algorithm(node) + + benchmark(run) + + +def test_walk_count(benchmark: BenchmarkFixture): + """Traversal cost only — no PyList build, no per-item bound wrapping. + + Delta vs `test_walk[fast_walk]` is the ceiling for a PyList-build + optimization. If the delta is small, it isn't worth chasing. + """ + import difflib + + source_code = Path(difflib.__file__).read_text() + node = parse(source_code) + + def run(): + _walk_count(node) benchmark(run) diff --git a/tests/manual.py b/tests/manual.py index 52084ea..0acaa22 100644 --- a/tests/manual.py +++ b/tests/manual.py @@ -1,14 +1,14 @@ if __name__ == "__main__": from ast import parse from pathlib import Path - from fast_walk import walk + from fast_walk import walk_unordered import difflib source_code = Path(difflib.__file__).read_text() node = parse(source_code) def walk_benchmark(): - walk(node) + walk_unordered(node) for _ in range(1_000): walk_benchmark() diff --git a/tests/perf_driver.py b/tests/perf_driver.py new file mode 100644 index 0000000..f26ba2d --- /dev/null +++ b/tests/perf_driver.py @@ -0,0 +1,18 @@ +"""Driver for perf profiling: one parse, many walks. + +Usage: + maturin develop --release + perf record -F 2999 --call-graph=dwarf -- \ + taskset -c 2 python tests/perf_driver.py + perf report --stdio --no-children | head -40 +""" + +import difflib +from ast import parse +from pathlib import Path +from fast_walk import walk_unordered + +node = parse(Path(difflib.__file__).read_text()) + +for _ in range(200_000): + walk_unordered(node) diff --git a/tests/test_coherency.py b/tests/test_coherency.py new file mode 100644 index 0000000..6f27fed --- /dev/null +++ b/tests/test_coherency.py @@ -0,0 +1,251 @@ +"""Coherency tests — `walk_dfs`, `walk_unordered`, and `walk` must agree with +each other and with the stdlib `ast.walk` across a range of inputs. + +Each helper produces the same SET of AST-node identities (multiset actually — +shared singletons like `ast.Load()` legitimately appear multiple times). +They may differ in visit order. These tests pin down what must be invariant +regardless of order. +""" + +from __future__ import annotations + +import ast +import textwrap +import warnings +from collections import Counter + +import pytest + +from fast_walk import walk_dfs, walk_unordered +import fast_walk + + +SOURCES: dict[str, str] = { + "empty": "", + "single_stmt": "x = 1", + "just_a_literal": "42\n", + "functions": textwrap.dedent(""" + def f(x, y, /, z, *args, kw=1, **kwargs): + return x + y * z + + async def g(): + async for i in it(): + async with ctx() as c: + yield i + """), + "classes": textwrap.dedent(""" + class A: + x: int = 0 + + def m(self, *a, **k) -> int: + return self.x + + class B(A, metaclass=type): + ... + """), + "comprehensions": textwrap.dedent(""" + squares = [x * x for x in range(10) if x % 2] + d = {k: v for k, v in pairs if k} + g = (x async for x in aiter() if x) + s = {a + b for a in xs for b in ys} + """), + "control_flow": textwrap.dedent(""" + for i in range(10): + if i % 2 == 0: + continue + elif i > 7: + break + else: + pass + try: + risky() + except (ValueError, TypeError) as e: + handle(e) + except Exception: + raise + finally: + cleanup() + while cond: + do() + with open('x') as f, open('y') as g: + pass + match obj: + case [1, 2, *rest]: + pass + case {"k": v, **rest}: + pass + case Point(x=0, y=0): + pass + case _: + pass + """), + "deeply_nested": "a = " + "[" * 50 + "1" + "]" * 50, + "imports_and_decorators": textwrap.dedent(""" + import a, b.c + from d.e import f as g, h + from . import i + + @decorator + @deco2(arg) + def decorated(): + pass + + @dataclass + class Point: + x: int + y: int = 0 + """), + "fstrings_and_strings": textwrap.dedent(""" + s = f"hello {name!r:>10}" + b = b"bytes" + multiline = ''' + a + b + ''' + """), + "type_params_pep695": textwrap.dedent(""" + def generic[T, U: int, *Ts, **P](x: T) -> T: + return x + + class Container[T]: + def __init__(self, value: T) -> None: + self.value = value + + type Alias[T] = list[T] + """), +} + + +def _multiset(nodes) -> Counter[int]: + """Collapse a walk result to a Counter of id()s. + + AST walks legitimately contain the same node object multiple times + (shared singletons like `ast.Load()`), so equality of sets isn't + enough — we need equality of multisets. + """ + return Counter(id(n) for n in nodes) + + +@pytest.fixture(params=sorted(SOURCES.keys())) +def tree(request) -> ast.AST: + return ast.parse(SOURCES[request.param]) + + +def test_walk_dfs_matches_ast_walk(tree: ast.AST): + """walk_dfs must produce the same multiset as ast.walk.""" + assert _multiset(walk_dfs(tree)) == _multiset(ast.walk(tree)) + + +def test_walk_unordered_matches_ast_walk(tree: ast.AST): + """walk_unordered must produce the same multiset as ast.walk, even + though the order is not guaranteed.""" + assert _multiset(walk_unordered(tree)) == _multiset(ast.walk(tree)) + + +def test_walk_dfs_and_unordered_agree(tree: ast.AST): + """The two fast-path implementations must agree on the multiset of + visited nodes.""" + assert _multiset(walk_dfs(tree)) == _multiset(walk_unordered(tree)) + + +def test_walk_dfs_visits_root_first(tree: ast.AST): + """Pre-order DFS: the first node in the result is always the root.""" + result = walk_dfs(tree) + assert result[0] is tree + + +def test_walk_unordered_visits_root_first(tree: ast.AST): + """The batched implementation currently pops the seed root first + regardless of order within later batches. Pin that behaviour so a + future refactor doesn't silently stop returning the input as the + first element.""" + result = walk_unordered(tree) + assert result[0] is tree + + +def test_walk_dfs_is_deterministic(tree: ast.AST): + """Repeated calls must return identical sequences — no thread-local + or cached state should leak between invocations.""" + a = [id(n) for n in walk_dfs(tree)] + b = [id(n) for n in walk_dfs(tree)] + c = [id(n) for n in walk_dfs(tree)] + assert a == b == c + + +def test_walk_unordered_is_deterministic(tree: ast.AST): + """The *unordered* variant still has a deterministic implementation. + Non-determinism here would be a bug (e.g. uninitialised scratch).""" + a = [id(n) for n in walk_unordered(tree)] + b = [id(n) for n in walk_unordered(tree)] + assert a == b + + +def test_walk_dfs_pre_order_property(tree: ast.AST): + """For each AST node with child AST nodes, the parent must appear + before any of its descendants in walk_dfs order. This is the + defining property of pre-order DFS.""" + order = {id(n): i for i, n in enumerate(walk_dfs(tree))} + + for node in ast.walk(tree): + parent_index = order[id(node)] + for child in ast.iter_child_nodes(node): + # Shared singletons (Load/Store/...) may appear earlier via + # a different parent; only require that *some* occurrence of + # the child lies after this parent. Since walks return one + # entry per visit, we just require parent_index < child_index + # for child nodes reachable via this parent. + child_index = order.get(id(child)) + assert child_index is not None + assert parent_index < child_index, ( + f"{type(node).__name__} at {parent_index} should precede " + f"its child {type(child).__name__} at {child_index}" + ) + + +def test_walk_deprecated_alias_matches_dfs(tree: ast.AST): + """The deprecated `walk` entry point must return exactly the same + sequence as walk_dfs. We swallow the DeprecationWarning since the + point of this test is equivalence, not emission.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + legacy = [id(n) for n in fast_walk.walk(tree)] + dfs = [id(n) for n in walk_dfs(tree)] + assert legacy == dfs + + +def test_walk_returns_list(): + """Both entry points must return a built-in list (not a generator or + custom iterable), matching the documented return type.""" + tree = ast.parse(SOURCES["functions"]) + assert type(walk_dfs(tree)) is list + assert type(walk_unordered(tree)) is list + + +def test_non_tree_leaf_inputs(): + """walk_dfs/walk_unordered on a single leaf node (no descendants) + should return exactly [node].""" + node = ast.parse("1").body[0].value # an ast.Constant + assert walk_dfs(node) == [node] + assert walk_unordered(node) == [node] + + +def test_walk_dfs_order_is_stable_across_independent_trees(): + """Two parses of the same source must produce isomorphic walk + sequences — same type sequence in the same order. This catches + any accidental dependence on node identity/address during + traversal decisions.""" + src = SOURCES["control_flow"] + a_types = [type(n).__name__ for n in walk_dfs(ast.parse(src))] + b_types = [type(n).__name__ for n in walk_dfs(ast.parse(src))] + assert a_types == b_types + + +def test_matches_stdlib_for_sizable_real_file(): + """End-to-end coherency on a real, nontrivial module.""" + import difflib + from pathlib import Path + + tree = ast.parse(Path(difflib.__file__).read_text()) + expected = _multiset(ast.walk(tree)) + assert _multiset(walk_dfs(tree)) == expected + assert _multiset(walk_unordered(tree)) == expected diff --git a/tests/test_refcount.py b/tests/test_refcount.py new file mode 100644 index 0000000..8526e60 --- /dev/null +++ b/tests/test_refcount.py @@ -0,0 +1,136 @@ +import ast +import gc +import sys +import weakref +from collections import Counter + +import pytest + +from fast_walk import walk_dfs as fast_walk + + +SOURCE = """ +def foo(x, y): + a = 1 + 2 + b = [x, y, a] + for i in b: + if i > 0: + print(i, "positive") + else: + yield i + +class C: + def m(self): + return [1, 2, 3] +""" + + +def test_result_matches_ast_walk(): + tree = ast.parse(SOURCE) + fast_ids = [id(n) for n in fast_walk(tree)] + ast_ids = [id(n) for n in ast.walk(tree)] + assert sorted(fast_ids) == sorted(ast_ids) + + +def test_refcount_neutral_after_walks(): + """Calling walk N times then dropping the result must leave input + refcounts exactly where they started. Drift means we're either + over-INCREF'ing (leak) or under-INCREF'ing (use-after-free waiting to + happen).""" + tree = ast.parse(SOURCE) + sample = list(ast.walk(tree)) + + before = [sys.getrefcount(n) for n in sample] + for _ in range(1000): + fast_walk(tree) # result is dropped immediately as an expr statement + gc.collect() + after = [sys.getrefcount(n) for n in sample] + + drifts = [ + (i, b, a) for i, (b, a) in enumerate(zip(before, after)) if b != a + ] + assert not drifts, f"refcount drift on {len(drifts)} nodes: {drifts[:5]}" + + +def test_result_holds_strong_refs(): + """Items in the returned list should stay alive as long as the list + does, even if every other reference to them is dropped. This catches + a missing Py_INCREF — without it, items would be freed when the last + *non-list* reference is dropped, and the list would hold dangling + pointers.""" + tree = ast.parse(SOURCE) + result = fast_walk(tree) + # weakrefs let us observe liveness without contributing to refcount + weak = [] + for n in result: + try: + weak.append(weakref.ref(n)) + except TypeError: + pass + assert weak, "test setup: expected at least one weakref-able node" + + del tree + gc.collect() + alive = sum(1 for w in weak if w() is not None) + assert alive == len(weak), ( + f"{len(weak) - alive} nodes freed while still held by result list" + ) + + +def test_refcount_of_result_items(): + """Each node's refcount bump must equal its appearance count in the + result. Some AST singletons (ast.Load(), ast.Store(), ast.Del(), ...) + are shared across many parents, so they legitimately appear multiple + times in a walk — this test uses ast.walk as the spec for expected + appearance counts and checks fast_walk's refcount deltas match.""" + tree = ast.parse(SOURCE) + sample = list(ast.walk(tree)) + expected_counts = Counter(id(n) for n in ast.walk(tree)) + + before = [sys.getrefcount(n) for n in sample] + result = fast_walk(tree) + after = [sys.getrefcount(n) for n in sample] + + mismatches = [ + (i, b + expected_counts[id(n)], a) + for i, (n, b, a) in enumerate(zip(sample, before, after)) + if a != b + expected_counts[id(n)] + ] + assert not mismatches, ( + f"refcount delta != appearance count for {len(mismatches)} nodes: " + f"{mismatches[:5]}" + ) + + del result # silence unused-var lint + explicit teardown + + +def test_survives_gc_between_walks(): + """Stress test: forcing full GC between walks should not crash or + invalidate the returned list.""" + tree = ast.parse(SOURCE) + for _ in range(100): + result = fast_walk(tree) + gc.collect() + # touch every item to force a read — segfaults surface here + assert sum(1 for _ in result) == len(result) + + +def test_empty_input(): + tree = ast.parse("") + assert [id(n) for n in fast_walk(tree)] == [id(n) for n in ast.walk(tree)] + + +@pytest.mark.parametrize("iterations", [10, 100, 1000]) +def test_no_leak_growth(iterations): + """Run many walks of different sizes. If we leak on every walk, + memory would grow unbounded — we can't measure RSS portably, but we + can at least detect that the result list's refcount bookkeeping is + stable regardless of iteration count.""" + tree = ast.parse(SOURCE * 5) + sample = list(ast.walk(tree))[:10] + before = [sys.getrefcount(n) for n in sample] + for _ in range(iterations): + fast_walk(tree) + gc.collect() + after = [sys.getrefcount(n) for n in sample] + assert before == after diff --git a/uv.lock b/uv.lock index 5248f04..98fbf65 100644 --- a/uv.lock +++ b/uv.lock @@ -72,8 +72,8 @@ dev = [ [package.metadata.requires-dev] dev = [ { name = "py-spy", specifier = ">=0.4.1" }, - { name = "pytest", specifier = ">=8.4.2" }, - { name = "pytest-codspeed", specifier = ">=4.2.0" }, + { name = "pytest", specifier = ">=9.0.3" }, + { name = "pytest-codspeed", specifier = ">=4.4.0" }, ] [[package]] @@ -108,11 +108,11 @@ wheels = [ [[package]] name = "packaging" -version = "25.0" +version = "26.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" } +sdist = { url = "https://files.pythonhosted.org/packages/df/de/0d2b39fb4af88a0258f3bac87dfcbb48e73fbdea4a2ed0e2213f9a4c2f9a/packaging-26.1.tar.gz", hash = "sha256:f042152b681c4bfac5cae2742a55e103d27ab2ec0f3d88037136b6bfe7c9c5de", size = 215519, upload-time = "2026-04-14T21:12:49.362Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, + { url = "https://files.pythonhosted.org/packages/7a/c2/920ef838e2f0028c8262f16101ec09ebd5969864e5a64c4c05fad0617c56/packaging-26.1-py3-none-any.whl", hash = "sha256:5d9c0669c6285e491e0ced2eee587eaf67b670d94a19e94e3984a481aba6802f", size = 95831, upload-time = "2026-04-14T21:12:47.56Z" }, ] [[package]] @@ -141,25 +141,25 @@ wheels = [ [[package]] name = "pycparser" -version = "2.23" +version = "3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fe/cf/d2d3b9f5699fb1e4615c8e32ff220203e43b248e1dfcc6736ad9057731ca/pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2", size = 173734, upload-time = "2025-09-09T13:23:47.91Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" }, + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, ] [[package]] name = "pygments" -version = "2.19.2" +version = "2.20.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, + { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" }, ] [[package]] name = "pytest" -version = "8.4.2" +version = "9.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -168,40 +168,40 @@ dependencies = [ { name = "pluggy" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" }, + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, ] [[package]] name = "pytest-codspeed" -version = "4.2.0" +version = "4.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi" }, { name = "pytest" }, { name = "rich" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e2/e8/27fcbe6516a1c956614a4b61a7fccbf3791ea0b992e07416e8948184327d/pytest_codspeed-4.2.0.tar.gz", hash = "sha256:04b5d0bc5a1851ba1504d46bf9d7dbb355222a69f2cd440d54295db721b331f7", size = 113263, upload-time = "2025-10-24T09:02:55.704Z" } +sdist = { url = "https://files.pythonhosted.org/packages/52/bc/9070fdbfb479a0e92a12652a68875de157dc9be7dc4865a06a519e3a1877/pytest_codspeed-4.4.0.tar.gz", hash = "sha256:edb7c101d9c50439a42cf02cfa9c0ac92da618841636bbebf87c3fa54669442a", size = 201093, upload-time = "2026-04-14T15:13:20.014Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/de/dc/e864f45e994a50390ff49792256f1bdcbf42f170e3bc0470ee1a7d2403f3/pytest_codspeed-4.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72aab8278452a6d020798b9e4f82780966adb00f80d27a25d1274272c54630d5", size = 262057, upload-time = "2025-10-24T09:02:45.791Z" }, - { url = "https://files.pythonhosted.org/packages/1d/1c/f1d2599784486879cf6579d8d94a3e22108f0e1f130033dab8feefd29249/pytest_codspeed-4.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:684fcd9491d810ded653a8d38de4835daa2d001645f4a23942862950664273f8", size = 251013, upload-time = "2025-10-24T09:02:46.937Z" }, - { url = "https://files.pythonhosted.org/packages/0c/fd/eafd24db5652a94b4d00fe9b309b607de81add0f55f073afb68a378a24b6/pytest_codspeed-4.2.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:50794dabea6ec90d4288904452051e2febace93e7edf4ca9f2bce8019dd8cd37", size = 262065, upload-time = "2025-10-24T09:02:48.018Z" }, - { url = "https://files.pythonhosted.org/packages/f9/14/8d9340d7dc0ae647991b28a396e16b3403e10def883cde90d6b663d3f7ec/pytest_codspeed-4.2.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0ebd87f2a99467a1cfd8e83492c4712976e43d353ee0b5f71cbb057f1393aca", size = 251057, upload-time = "2025-10-24T09:02:49.102Z" }, - { url = "https://files.pythonhosted.org/packages/4b/39/48cf6afbca55bc7c8c93c3d4ae926a1068bcce3f0241709db19b078d5418/pytest_codspeed-4.2.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dbbb2d61b85bef8fc7e2193f723f9ac2db388a48259d981bbce96319043e9830", size = 267983, upload-time = "2025-10-24T09:02:50.558Z" }, - { url = "https://files.pythonhosted.org/packages/33/86/4407341efb5dceb3e389635749ce1d670542d6ca148bd34f9d5334295faf/pytest_codspeed-4.2.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:748411c832147bfc85f805af78a1ab1684f52d08e14aabe22932bbe46c079a5f", size = 256732, upload-time = "2025-10-24T09:02:51.603Z" }, - { url = "https://files.pythonhosted.org/packages/25/0e/8cb71fd3ed4ed08c07aec1245aea7bc1b661ba55fd9c392db76f1978d453/pytest_codspeed-4.2.0-py3-none-any.whl", hash = "sha256:e81bbb45c130874ef99aca97929d72682733527a49f84239ba575b5cb843bab0", size = 113726, upload-time = "2025-10-24T09:02:54.785Z" }, + { url = "https://files.pythonhosted.org/packages/78/53/031793dab3a0edbbcbbd8755648ace0853f4cfb92a0e09e620f301f9ef5d/pytest_codspeed-4.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee3e1964446011ca192eebf0350227df231a5b88af57e518f2a4328fc8ca5131", size = 820300, upload-time = "2026-04-14T15:13:06.791Z" }, + { url = "https://files.pythonhosted.org/packages/e7/66/0c3530c0dd9959b7f0930551b3de296db391040e5e8ad3e0cab917736980/pytest_codspeed-4.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:340dbb1cc5a21434e0e29bd68ab03c7dc7ad9bfde09d1980b7161352c4c2f048", size = 829201, upload-time = "2026-04-14T15:13:08Z" }, + { url = "https://files.pythonhosted.org/packages/f2/8a/24c7997d95f8bda081b8d4346750a5db0d9d8405183ee5cb9062f7381476/pytest_codspeed-4.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:413666266762f9cef1321ba971a9e127b97a1f1dad40ddfd2184c2bc5ac157f9", size = 820242, upload-time = "2026-04-14T15:13:09.191Z" }, + { url = "https://files.pythonhosted.org/packages/8b/7f/3912bf6c2bcddb69189d23213f28e5bc058fd4c78fca15dd0010938154b0/pytest_codspeed-4.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e258e6c3d5a8a02ae02a64831be3acd44c19210ffbf13321bdbb8c111c5c6fe4", size = 829190, upload-time = "2026-04-14T15:13:10.762Z" }, + { url = "https://files.pythonhosted.org/packages/d8/f4/2cc5e10847aee4233690aa511df6b6f1c2c09f9d8ae506628a138f4ba201/pytest_codspeed-4.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:56d5dd94dcb69460f916acb9c69865d0171b98acec3ce256645d0c0275b553d7", size = 827557, upload-time = "2026-04-14T15:13:12.553Z" }, + { url = "https://files.pythonhosted.org/packages/7f/57/982ce8aa81089b285730dca8404c76af648af41e46d95012be54452913e6/pytest_codspeed-4.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:33c38e0e797c74506004f231fc53eab0e412987de281755f714018334381aa3a", size = 835388, upload-time = "2026-04-14T15:13:14.232Z" }, + { url = "https://files.pythonhosted.org/packages/99/36/9e84323c6be426728e897133f8e9f3e65a90c26c137e190ca9b27bf304c3/pytest_codspeed-4.4.0-py3-none-any.whl", hash = "sha256:a6aab2fa73523f538e7729c20ccf4a1e8e921324c9877a816b05334135950fd9", size = 203809, upload-time = "2026-04-14T15:13:18.72Z" }, ] [[package]] name = "rich" -version = "14.2.0" +version = "15.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fb/d2/8920e102050a0de7bfabeb4c4614a49248cf8d5d7a8d01885fbb24dc767a/rich-14.2.0.tar.gz", hash = "sha256:73ff50c7c0c1c77c8243079283f4edb376f0f6442433aecb8ce7e6d0b92d1fe4", size = 219990, upload-time = "2025-10-09T14:16:53.064Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680, upload-time = "2026-04-12T08:24:00.75Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" }, + { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, ]