diff --git a/.github/workflows/cd-build.yaml b/.github/workflows/cd-build.yaml index 61cc3a9..c6dc9a1 100644 --- a/.github/workflows/cd-build.yaml +++ b/.github/workflows/cd-build.yaml @@ -8,51 +8,40 @@ env: CARGO_TERM_COLOR: always jobs: - wheels: + leading_wheels: strategy: fail-fast: false - max-parallel: 64 + max-parallel: 1 matrix: build: - # Linux - - { os: ubuntu-latest, rust_target: x86_64-unknown-linux-gnu, manylinux: auto, wheel_arch: "manylinux*_x86_64", arch_label: "x86_64", platform_label: "Linux" } - - { os: ubuntu-24.04-arm, rust_target: aarch64-unknown-linux-gnu, manylinux: auto, wheel_arch: "manylinux*_aarch64", arch_label: "ARM64", platform_label: "Linux" } - # Windows - - { os: windows-latest, rust_target: x86_64-pc-windows-msvc, wheel_arch: "win_amd64", arch_label: "x64", platform_label: "Windows" } - - { os: windows-11-arm, rust_target: aarch64-pc-windows-msvc, wheel_arch: "win_arm64", arch_label: "ARM64", platform_label: "Windows" } - # macOS - - { os: macos-15, rust_target: aarch64-apple-darwin, wheel_arch: "macosx*_arm64", arch_label: "Apple Silicon", platform_label: "macOS" } - - { os: macos-15-intel, rust_target: x86_64-apple-darwin, wheel_arch: "macosx*_x86_64", arch_label: "Intel", platform_label: "macOS" } + - { os: ubuntu-24.04-arm, rust_target: aarch64-unknown-linux-gnu, manylinux: auto, wheel_arch: "manylinux*_aarch64", arch_label: "ARM64", platform_label: "Linux" } + - { os: ubuntu-latest, rust_target: x86_64-unknown-linux-gnu, manylinux: auto, wheel_arch: "manylinux*_x86_64", arch_label: "x86_64", platform_label: "Linux" } python: - - { tag: "cp310", version: "3.10.11", label: "3.10" } - - { tag: "cp311", version: "3.11.9", label: "3.11" } - - { tag: "cp312", version: "3.12.10", label: "3.12" } - - { tag: "cp313", version: "3.13.9", label: "3.13" } - - { tag: "cp314", version: "3.14.0", label: "3.14" } - - { tag: "cp314t", version: "3.14t", label: "3.14t", branch: "3.14" } + - { tag: "cp314", version: "3.14.0", label: "3.14" } runs-on: ${{ matrix.build.os }} - name: "Python ${{ matrix.python.label }} - ${{ matrix.build.platform_label }} - ${{ matrix.build.arch_label }}" steps: - - uses: actions/checkout@v5 + - &checkout + uses: actions/checkout@v5 with: submodules: recursive fetch-depth: 0 fetch-tags: true - - name: Setup uv + - &setup-uv + name: Setup uv uses: astral-sh/setup-uv@v6 with: enable-cache: true - - name: Install Python (non-Linux) + - &install-python-non-linux + name: Install Python (non-Linux) if: runner.os != 'Linux' shell: bash run: | - if [[ "${{ matrix.build.os }}" == "windows-11-arm" && "${{ matrix.python.label }}" =~ ^3\.(10|11|12)$ ]]; - then + if [[ "${{ matrix.build.os }}" == "windows-11-arm" && "${{ matrix.python.label }}" =~ ^3\.(10|11|12)$ ]]; then uv python install ${{ matrix.python.version }} echo "PYTHON=python${{ matrix.python.label }}" >> "$GITHUB_ENV" else @@ -64,100 +53,131 @@ jobs: echo "PYTHON=$(uv python find ${{ matrix.python.version }})" >> "$GITHUB_ENV" fi - - name: Build wheel + - &build-wheel + name: Build wheel uses: PyO3/maturin-action@v1 with: target: ${{ matrix.build.rust_target }} manylinux: ${{ matrix.build.manylinux || 'auto' }} args: --release --out dist -i ${{ env.PYTHON || matrix.python.label }} rust-toolchain: nightly - sccache: 'true' + sccache: "true" env: CL: ${{ runner.os == 'Windows' && '/experimental:c11atomics' || '' }} MACOSX_DEPLOYMENT_TARGET: ${{ runner.os == 'macOS' && '10.12' || '' }} - - name: Clear sccache wrapper - # this is required due to maturin failing to find sccache later during getting metadata on uv sync. + - &clear-sccache-linux + name: Clear sccache wrapper if: runner.os == 'Linux' - run: echo "RUSTC_WRAPPER=" >> "$GITHUB_ENV" + run: | + echo "RUSTC_WRAPPER=" >> "$GITHUB_ENV" - - uses: actions/upload-artifact@v4 + - &upload-wheel-artifacts + name: Upload wheel artifact + uses: actions/upload-artifact@v4 with: name: "Wheels-${{ matrix.python.label }}-${{ matrix.build.platform_label }}-${{ matrix.build.arch_label }}" path: dist/*.whl - - name: Find built wheel - if: ${{ matrix.build.os != 'windows-11-arm' || !contains(fromJSON('["3.10","3.11","3.12"]'), matrix.python.label) }} + leading_tests: + needs: leading_wheels + strategy: + fail-fast: false + max-parallel: 1 + matrix: + build: + - { os: ubuntu-24.04-arm, wheel_arch: "manylinux*_aarch64", arch_label: "ARM64", platform_label: "Linux" } + python: + - { tag: "cp314", version: "3.14.0", label: "3.14", branch: "3.14" } + + runs-on: ${{ matrix.build.os }} + name: "Tests - Python ${{ matrix.python.label }} - ${{ matrix.build.platform_label }} - ${{ matrix.build.arch_label }}" + + steps: + - *checkout + - *setup-uv + - &download-test-wheel + name: Download wheel artifact + uses: actions/download-artifact@v4 + with: + name: "Wheels-${{ matrix.python.label }}-${{ matrix.build.platform_label }}-${{ matrix.build.arch_label }}" + path: dist + + - &should-test + name: Check test eligibility + id: should_test + shell: bash + run: echo "run=true" >> "$GITHUB_OUTPUT" + if: matrix.build.os != 'windows-11-arm' || !contains(fromJSON('["cp310","cp311","cp312"]'), matrix.python.tag) + + - &find-test-wheel + name: Find built wheel id: wheel + if: steps.should_test.outputs.run == 'true' shell: bash run: | wheel=$(find dist -name "*-${{ matrix.python.tag }}-${{ matrix.build.wheel_arch }}.whl" -type f | head -n1) - + if [[ -z "$wheel" && "${{ matrix.build.platform_label }}" == "Linux" ]]; then + case "${{ matrix.build.arch_label }}" in + "x86_64") + wheel=$(find dist -name "*-${{ matrix.python.tag }}-*-musllinux*_x86_64.whl" -type f | head -n1) + ;; + "ARM64") + wheel=$(find dist -name "*-${{ matrix.python.tag }}-*-musllinux*_aarch64.whl" -type f | head -n1) + ;; + esac + fi if [[ -z "$wheel" ]]; then echo "No compatible wheel found" exit 1 fi - - echo "Found: $wheel" echo "path=$wheel" >> "$GITHUB_OUTPUT" - - name: Setup test environment - if: ${{ matrix.build.os != 'windows-11-arm' || !contains(fromJSON('["3.10","3.11","3.12"]'), matrix.python.label) }} + - &setup-test-env + name: Setup test environment + if: steps.should_test.outputs.run == 'true' run: | - uv venv --python ${{ matrix.python.label }} --clear + uv venv --python ${{ env.PYTHON || matrix.python.label }} --clear uv sync --extra test --no-install-project uv pip install "${{ steps.wheel.outputs.path }}" - - name: "Test: Built wheel" - if: ${{ matrix.build.os != 'windows-11-arm' || !contains(fromJSON('["3.10","3.11","3.12"]'), matrix.python.label) }} + - &test-built-wheel + name: Test built wheel + if: steps.should_test.outputs.run == 'true' run: uv run --no-sync pytest tests/ -vvv - - name: Setup venv for codspeed - if: runner.os == 'Linux' && matrix.build.arch_label == 'x86_64' && contains(fromJSON('["3.13","3.14"]'), matrix.python.label) - run: | - uv venv --python ${{ matrix.python.version }} --clear - uv sync --extra test --no-install-project - - - name: Run codspeed benchmarks - if: runner.os == 'Linux' && matrix.build.arch_label == 'x86_64' && contains(fromJSON('["3.13","3.14"]'), matrix.python.label) - uses: CodSpeedHQ/action@v4 - with: - mode: simulation - run: uv run --with "${{ steps.wheel.outputs.path }}" --no-sync pytest tests/ --codspeed -k test_performance -v - - - name: Setup venv for shuffle tests - if: ${{ matrix.build.os != 'windows-11-arm' || !contains(fromJSON('["3.10","3.11","3.12"]'), matrix.python.label) }} - run: | - uv venv --python ${{ matrix.python.label }} --clear - uv sync --extra test --no-install-project - uv pip install "${{ steps.wheel.outputs.path }}" - - - name: "Test: Shuffle A" - if: ${{ matrix.build.os != 'windows-11-arm' || !contains(fromJSON('["3.10","3.11","3.12"]'), matrix.python.label) }} + - &test-shuffle-a + name: Test shuffle A + if: steps.should_test.outputs.run == 'true' run: uv run --no-sync pytest tests/ -v --random-order --random-order-seed=A - - name: "Test: Shuffle B" - if: ${{ matrix.build.os != 'windows-11-arm' || !contains(fromJSON('["3.10","3.11","3.12"]'), matrix.python.label) }} + - &test-shuffle-b + name: Test shuffle B + if: steps.should_test.outputs.run == 'true' run: uv run --no-sync pytest tests/ -v --random-order --random-order-seed=B - - name: "Test: Shuffle C" - if: ${{ matrix.build.os != 'windows-11-arm' || !contains(fromJSON('["3.10","3.11","3.12"]'), matrix.python.label) }} + - &test-shuffle-c + name: Test shuffle C + if: steps.should_test.outputs.run == 'true' run: uv run --no-sync pytest tests/ -v --random-order --random-order-seed=C - - name: "Test: Shuffle D" - if: ${{ matrix.build.os != 'windows-11-arm' || !contains(fromJSON('["3.10","3.11","3.12"]'), matrix.python.label) }} + - &test-shuffle-d + name: Test shuffle D + if: steps.should_test.outputs.run == 'true' run: uv run --no-sync pytest tests/ -v --random-order --random-order-seed=D - - name: "Cache CPython test suite" - if: ${{ matrix.build.os != 'windows-11-arm' || !contains(fromJSON('["3.10","3.11","3.12"]'), matrix.python.label) }} + - &cache-cpython-tests + name: Cache CPython test suite id: cpython-cache + if: steps.should_test.outputs.run == 'true' uses: actions/cache@v4 with: path: cpython-tests key: cpython-tests-${{ matrix.python.branch || matrix.python.label }} - - name: "Checkout CPython test suite" - if: ${{ (matrix.build.os != 'windows-11-arm' || !contains(fromJSON('["3.10","3.11","3.12"]'), matrix.python.label)) && steps.cpython-cache.outputs.cache-hit != 'true' }} + - &checkout-cpython-tests + name: Checkout CPython test suite + if: steps.should_test.outputs.run == 'true' && steps.cpython-cache.outputs.cache-hit != 'true' uses: actions/checkout@v5 with: repository: python/cpython @@ -166,17 +186,164 @@ jobs: sparse-checkout-cone-mode: true path: cpython-tests - - name: "Test: CPython test_copy (patched)" - if: ${{ matrix.build.os != 'windows-11-arm' || !contains(fromJSON('["3.10","3.11","3.12"]'), matrix.python.label) }} + - &test-cpython-copy-patched + name: Test CPython test_copy (patched) + if: steps.should_test.outputs.run == 'true' env: COPIUM_PATCH_ENABLE: "1" PYTHONPATH: ${{ github.workspace }}/cpython-tests/Lib run: uv run --no-sync python -m unittest test.test_copy -v - - name: "Test: CPython test_copy (patched, dict memo)" - if: ${{ matrix.build.os != 'windows-11-arm' || !contains(fromJSON('["3.10","3.11","3.12"]'), matrix.python.label) }} + - &test-cpython-copy-patched-dict-memo + name: Test CPython test_copy (patched, dict memo) + if: steps.should_test.outputs.run == 'true' env: COPIUM_PATCH_ENABLE: "1" COPIUM_USE_DICT_MEMO: "1" PYTHONPATH: ${{ github.workspace }}/cpython-tests/Lib - run: uv run --no-sync python -m unittest test.test_copy -v \ No newline at end of file + run: uv run --no-sync python -m unittest test.test_copy -v + + build_and_test: + needs: + - leading_tests + if: ${{ always() }} + strategy: + fail-fast: false + max-parallel: 64 + matrix: + build: + - { os: ubuntu-latest, rust_target: x86_64-unknown-linux-gnu, manylinux: auto, wheel_arch: "manylinux*_x86_64", arch_label: "x86_64", platform_label: "Linux" } + - { os: ubuntu-24.04-arm, rust_target: aarch64-unknown-linux-gnu, manylinux: auto, wheel_arch: "manylinux*_aarch64", arch_label: "ARM64", platform_label: "Linux" } + - { os: windows-latest, rust_target: x86_64-pc-windows-msvc, wheel_arch: "win_amd64", arch_label: "x64", platform_label: "Windows" } + - { os: windows-11-arm, rust_target: aarch64-pc-windows-msvc, wheel_arch: "win_arm64", arch_label: "ARM64", platform_label: "Windows" } + - { os: macos-15, rust_target: aarch64-apple-darwin, wheel_arch: "macosx*_arm64", arch_label: "Apple Silicon", platform_label: "macOS" } + - { os: macos-15-intel, rust_target: x86_64-apple-darwin, wheel_arch: "macosx*_x86_64", arch_label: "Intel", platform_label: "macOS" } + python: + - { tag: "cp310", version: "3.10.11", label: "3.10" } + - { tag: "cp311", version: "3.11.9", label: "3.11" } + - { tag: "cp312", version: "3.12.10", label: "3.12" } + - { tag: "cp313", version: "3.13.9", label: "3.13" } + - { tag: "cp314", version: "3.14.0", label: "3.14" } + - { tag: "cp314t", version: "3.14t", label: "3.14t", branch: "3.14" } + exclude: + - build: { os: ubuntu-24.04-arm, rust_target: aarch64-unknown-linux-gnu, manylinux: auto, wheel_arch: "manylinux*_aarch64", arch_label: "ARM64", platform_label: "Linux" } + python: { tag: "cp314", version: "3.14.0", label: "3.14" } + - build: { os: ubuntu-latest, rust_target: x86_64-unknown-linux-gnu, manylinux: auto, wheel_arch: "manylinux*_x86_64", arch_label: "x86_64", platform_label: "Linux" } + python: { tag: "cp314", version: "3.14.0", label: "3.14" } + + runs-on: ${{ matrix.build.os }} + name: "Python ${{ matrix.python.label }} - ${{ matrix.build.platform_label }} - ${{ matrix.build.arch_label }}" + + steps: + - *checkout + - *setup-uv + - *install-python-non-linux + - *build-wheel + - *clear-sccache-linux + - *upload-wheel-artifacts + - *should-test + - *find-test-wheel + - *setup-test-env + - *test-built-wheel + - *test-shuffle-a + - *test-shuffle-b + - *test-shuffle-c + - *test-shuffle-d + - *cache-cpython-tests + - *checkout-cpython-tests + - *test-cpython-copy-patched + - *test-cpython-copy-patched-dict-memo + + codspeed: + name: "CodSpeed ${{ matrix.mode.label }} - Python 3.14 - ${{ matrix.shard.name }}" + needs: leading_wheels + runs-on: ubuntu-latest + env: + BENCH_ARTIFACT: "Wheels-3.14-Linux-x86_64" + BENCH_WHL_GLOB: "*-cp314-*-manylinux*_x86_64.whl" + BENCH_WHL_FALLBACK: "*-cp314-*-musllinux*_x86_64.whl" + + strategy: + fail-fast: false + matrix: + mode: + - { id: simulation, label: "CPU Simulation" } + - { id: memory, label: "Memory" } + shard: + - { name: "Core", expr: "test_memo or test_container or test_depth" } + - { name: "Mid", expr: "test_atomic or test_reduce or test_edge" } + - { name: "Real Data", expr: "test_real" } + + steps: + - *checkout + - *setup-uv + + - &download-benchmark-wheel + name: Download benchmark wheel artifact + uses: actions/download-artifact@v4 + with: + name: ${{ env.BENCH_ARTIFACT }} + path: dist/benchmark-wheel + + - &find-benchmark-wheel + name: Find benchmark wheel + id: wheel + shell: bash + run: | + wheel=$(find dist/benchmark-wheel -name "${{ env.BENCH_WHL_GLOB }}" -type f | head -n1) + if [[ -z "$wheel" ]]; then + wheel=$(find dist/benchmark-wheel -name "${{ env.BENCH_WHL_FALLBACK }}" -type f | head -n1) + fi + if [[ -z "$wheel" ]]; then + echo "No compatible wheel found" + exit 1 + fi + echo "path=$wheel" >> "$GITHUB_OUTPUT" + + - &setup-benchmark-env + name: Setup benchmark environment + run: | + uv venv --python 3.14 --clear + uv sync --extra test --no-install-project + uv pip install "${{ steps.wheel.outputs.path }}" + + - name: Run CodSpeed benchmarks + uses: CodSpeedHQ/action@v4 + env: + CODSPEED_MEMORY: ${{ matrix.mode.id == 'memory' && '1' || '' }} + with: + mode: ${{ matrix.mode.id }} + run: > + uv run --no-sync pytest tests/test_performance.py + --codspeed + -k "${{ matrix.shard.expr }}" + -v + + codspeed_walltime: + name: "CodSpeed WallTime - Python 3.14 - ARM64" + needs: leading_wheels + runs-on: codspeed-macro + env: + BENCH_ARTIFACT: "Wheels-3.14-Linux-ARM64" + BENCH_WHL_GLOB: "*-cp314-*-manylinux*_aarch64.whl" + BENCH_WHL_FALLBACK: "*-cp314-*-musllinux*_aarch64.whl" + + steps: + - *checkout + - *setup-uv + - *download-benchmark-wheel + - *find-benchmark-wheel + - *setup-benchmark-env + + - name: Run CodSpeed walltime benchmarks + uses: CodSpeedHQ/action@v4 + continue-on-error: true + with: + mode: walltime + run: > + uv run --no-sync pytest tests/test_performance.py + --codspeed + -k "test_performance" + --codspeed-warmup-time=0.1 + --codspeed-max-time=3 + -v \ No newline at end of file diff --git a/.github/workflows/cd.yaml b/.github/workflows/cd.yaml index ad0c7fb..848c903 100644 --- a/.github/workflows/cd.yaml +++ b/.github/workflows/cd.yaml @@ -21,7 +21,7 @@ jobs: build: uses: ./.github/workflows/cd-build.yaml - # Main branch only: benchmarks + # Main branch only: longer-running benchmarks and README assets pyperformance: if: github.ref == 'refs/heads/main' needs: build diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 7792ccb..c264095 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -17,6 +17,7 @@ permissions: jobs: test: + if: github.event_name != 'pull_request' uses: ./.github/workflows/ci-test.yaml lint: diff --git a/pyproject.toml b/pyproject.toml index e7019da..a54d589 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,7 +86,7 @@ test = [ "indifference>=0.2.0", "typing-extensions; python_version < '3.12'", "datamodelzoo", - "pytest-codspeed>=4.2.0", + "pytest-codspeed>=4.3.0", "pytest-test-groups>=1.2.1", "psutil>=5.9.0", "pytest-random-order>=1.2.0", diff --git a/tests/test_performance.py b/tests/test_performance.py index bc1ecbf..bab0cf3 100644 --- a/tests/test_performance.py +++ b/tests/test_performance.py @@ -1,126 +1,596 @@ -# SPDX-FileCopyrightText: 2025-present Arseny Boykov (Bobronium) -# -# SPDX-License-Identifier: MIT +""" +copium.deepcopy benchmark suite for CodSpeed. + +Each synthetic group isolates one code path in the deepcopy pipeline. +Within a group, variants share identical structure but differ in the +measured signal. 3+ scale points per signal. Real-world cases detect +end-to-end regression across representative workloads. + +Deepcopy pipeline (from deepcopium.rs): + + 1. pre-memo atomic? → return immediately (None/int/str/bool/float/bytes) + 2. memo recall → hit: return cached; miss: continue + 3. type dispatch → tuple / dict / list / set (exact type) + 4. post-memo atomic? → return immediately (re.Pattern/type/range/function/…) + 5. specialized → frozenset / bytearray / bound method + 6. reduce fallback → __deepcopy__ or __reduce_ex__ +""" import copy as stdlib_copy +import os import platform -import random +import re import sys +from dataclasses import dataclass +from dataclasses import field +from datetime import datetime +from datetime import timedelta from itertools import chain from typing import Any +from typing import NamedTuple import pytest import copium import copium.patch -from datamodelzoo import CASES -from datamodelzoo import Case -BASE_CASES = [ - case - for case in CASES - if "raises" not in case.name and "thirdparty" not in case.name and "guard" not in case.name -] -GUARD_CASES = [case for case in CASES if "guard" in case.name] -random.seed(1) +class Case(NamedTuple): + name: str + obj: Any + memory: bool = True -COMBINED_CASES = [ - Case( - "all", - factory=lambda: (c := [case.obj for case in BASE_CASES] * 1000, random.shuffle(c), c)[-1], - ), - Case( - "cpython:91610", - factory=lambda: [case.obj for case in BASE_CASES if "91610" in case.name], + +def scaled(tag, factory, sizes): + return (Case(f"{tag}-n-{n}", factory(n), memory=n >= 1000) for n in sizes) + + +def depth_scaled(tag, factory, depths): + return (Case(f"{tag}-d-{d}", factory(d), memory=d >= 100) for d in depths) + + +CODSPEED_MEMORY = bool(os.getenv("CODSPEED_MEMORY")) + + +def generate_params(cases): + return pytest.mark.parametrize( + "case", + (pytest.param(c, id=c.name) for c in cases if not CODSPEED_MEMORY or c.memory), + ) + + +python_version = ".".join(map(str, sys.version_info[:2])) +if not getattr(sys, "_is_gil_enabled", lambda: True)(): + python_version += "t" +python_version += f"-{platform.machine()}" + +PYTHON_VERSION = pytest.mark.parametrize("_python", [python_version]) + +SIZES = (10, 100, 1000) +DEPTHS = (10, 100, 500) +ATOM_SIZES = (100, 1000, 10000) +REDUCE_SIZES = (10, 50, 200) + + +# ═══════════════════════════════════════════════════════════ +# MEMO ISOLATION +# +# Constant shape: {'a': (X, X, X), 'b': [X] * n} +# +# Outer dict (2 keys) and inner tuple/list are the same +# across all variants. X controls which memo path fires: +# +# shared_mut → memo hit after first (shallow leaf) +# shared_deep → memo hit after first (recursive leaf) +# shared_tuple_atom → tuple all_same path, never memoised +# shared_tuple_mut → tuple content changes → memo store + hits +# shared_atom → pre-memo atomic skip, no memo +# unique_atom → pre-memo atomic skip, distinct id()s +# unique_mut → memo store each, zero hits +# ═══════════════════════════════════════════════════════════ + + +def memo_shared_mut(n): + leaf = [1, 2, 3] + return {"a": (leaf, leaf, leaf), "b": [leaf] * n} + + +def memo_shared_deep(n): + leaf = [[1, 2], {"k": "v"}, [3, 4]] + return {"a": (leaf, leaf, leaf), "b": [leaf] * n} + + +def memo_shared_tuple_atom(n): + leaf = (1, 2, 3) + return {"a": (leaf, leaf, leaf), "b": [leaf] * n} + + +def memo_shared_tuple_mut(n): + leaf = ([],) + return {"a": (leaf, leaf, leaf), "b": [leaf] * n} + + +def memo_shared_atom(n): + return {"a": (None, None, None), "b": [None] * n} + + +def memo_unique_atom(n): + return {"a": (1, 2, 3), "b": list(range(n))} + + +def memo_unique_mut(n): + return {"a": ([], [], []), "b": [[] for _ in range(n)]} + + +MEMO_CASES = chain( + scaled("shared_mut", memo_shared_mut, SIZES), + scaled("shared_deep", memo_shared_deep, SIZES), + scaled("shared_tuple_atom", memo_shared_tuple_atom, SIZES), + scaled("shared_tuple_mut", memo_shared_tuple_mut, SIZES), + scaled("shared_atom", memo_shared_atom, SIZES), + scaled("unique_atom", memo_unique_atom, SIZES), + scaled("unique_mut", memo_unique_mut, SIZES), +) + +# ═══════════════════════════════════════════════════════════ +# CONTAINER TRAVERSAL +# +# Flat container of n atomic ints. +# Isolates per-container creation + traversal cost. +# ═══════════════════════════════════════════════════════════ + +CONTAINER_CASES = chain( + scaled("list", lambda n: list(range(n)), SIZES), + scaled("tuple", lambda n: tuple(range(n)), SIZES), + scaled("dict", lambda n: {i: i for i in range(n)}, SIZES), + scaled("set", lambda n: set(range(n)), SIZES), + scaled("frozenset", lambda n: frozenset(range(n)), SIZES), + scaled("bytearray", lambda n: bytearray(n), (100, 10_000, 1_000_000)), +) + + +# ═══════════════════════════════════════════════════════════ +# NESTING DEPTH +# +# Single chain d levels deep. Leaf = [1, 2, 3] (mutable) +# except tuple_atom which uses atomic leaf to trigger +# the all_same optimisation at every level. +# ═══════════════════════════════════════════════════════════ + + +def nested_list(d): + obj = [1, 2, 3] + for _ in range(d): + obj = [obj] + return obj + + +def nested_dict(d): + obj = [1, 2, 3] + for _ in range(d): + obj = {"k": obj} + return obj + + +def nested_tuple_mut(d): + obj = [1, 2, 3] + for _ in range(d): + obj = (obj,) + return obj + + +def nested_tuple_atom(d): + obj = 42 + for _ in range(d): + obj = (obj,) + return obj + + +DEPTH_CASES = chain( + depth_scaled("list", nested_list, DEPTHS), + depth_scaled("dict", nested_dict, DEPTHS), + depth_scaled("tuple_mut", nested_tuple_mut, DEPTHS), + depth_scaled("tuple_atom", nested_tuple_atom, DEPTHS), +) + +# ═══════════════════════════════════════════════════════════ +# ATOMIC FAST PATH +# +# Outer list of n items. List overhead is constant across +# variants; we measure per-item dispatch cost. +# +# Pre-memo atomics: None, int, str, bool, float, bytes +# → is_literal_immutable fires before memo +# Post-memo atomics: re.Pattern, type objects +# → memo recall miss, then is_postmemo_atomic fires +# ═══════════════════════════════════════════════════════════ + +CACHED_RE = re.compile(r"^test$") + + +def mixed_prememo_atoms(n): + pool = [None, 42, "s", True, 3.14, b"b"] + return [pool[i % 6] for i in range(n)] + + +ATOMIC_CASES = chain( + scaled("none", lambda n: [None] * n, ATOM_SIZES), + scaled("int", lambda n: list(range(n)), ATOM_SIZES), + scaled("str", lambda n: [f"s{i}" for i in range(n)], ATOM_SIZES), + scaled("mixed_builtin_atomics", mixed_prememo_atoms, ATOM_SIZES), + scaled("re.Pattern", lambda n: [CACHED_RE] * n, ATOM_SIZES), + scaled("type", lambda n: [int] * n, ATOM_SIZES), +) + + +# ═══════════════════════════════════════════════════════════ +# REDUCE PROTOCOL +# +# Objects going through __reduce_ex__ / __deepcopy__. +# List of n instances to scale. +# ═══════════════════════════════════════════════════════════ + + +@dataclass +class SimpleDataclass: + x: int + y: str + + +@dataclass +class MutableDataclass: + x: int + items: list = field(default_factory=list) + mapping: dict = field(default_factory=dict) + + +@dataclass +class NestedDataclass: + inner: SimpleDataclass + items: list = field(default_factory=list) + + +class SlotsObject: + __slots__ = ("x", "y", "z") + + def __init__(self, x, y, z): + self.x = x + self.y = y + self.z = z + + +class CustomDeepcopyObject: + def __init__(self, v): + self.v = v + + def __deepcopy__(self, memo): + return CustomDeepcopyObject(stdlib_copy.deepcopy(self.v, memo)) + + +REDUCE_CASES = chain( + scaled( + "dataclass_simple", + lambda n: [SimpleDataclass(i, f"v{i}") for i in range(n)], + REDUCE_SIZES, ), - Case( - "diverse_atomic", - factory=lambda: [case.obj for case in BASE_CASES if "atom:" in case.name] * 1000, + scaled( + "dataclass_mutable", + lambda n: [MutableDataclass(i, [i], {"k": i}) for i in range(n)], + REDUCE_SIZES, ), - Case( - "all_proto", - factory=lambda: [case.obj for case in BASE_CASES if "proto:" in case.name] * 1000, + scaled( + "dataclass_nested", + lambda n: [NestedDataclass(SimpleDataclass(i, f"v{i}"), [i]) for i in range(n)], + REDUCE_SIZES, ), - Case( - "all_reflexive", - factory=lambda: [case.obj for case in BASE_CASES if "reflexive" in case.name] * 10, + scaled( + "slots", + lambda n: [SlotsObject(i, f"v{i}", float(i)) for i in range(n)], + REDUCE_SIZES, ), - Case( - "all_empty", - factory=lambda: [case.obj for case in BASE_CASES if "empty" in case.name] * 100, + scaled( + "datetime", + lambda n: [datetime(2024, 1, 1) + timedelta(days=i) for i in range(n)], # noqa: DTZ001 + REDUCE_SIZES, ), - Case( - "all_stdlib", - factory=lambda: [case.obj for case in BASE_CASES if "stdlib" in case.name] * 1000, + scaled( + "custom_deepcopy", + lambda n: [CustomDeepcopyObject([i]) for i in range(n)], + REDUCE_SIZES, ), +) + + +# ═══════════════════════════════════════════════════════════ +# EDGE CASES +# +# Structural pathologies: cycles, empties, dense sharing, +# all_same tuples at scale. +# ═══════════════════════════════════════════════════════════ + + +def make_cyclic_list(): + a = [1, 2, 3] + a.append(a) + return a + + +def make_cyclic_dict(): + d = {"k": "v"} + d["self"] = d + return d + + +def make_dense_refs(): + nodes = [[i] for i in range(50)] + return [nodes[i % 50] for i in range(2500)] + + +def wide_dict(n): + return {f"k{i}": [i] for i in range(n)} + + +EDGE_CASES = [ + Case("cyclic_list", make_cyclic_list()), + Case("cyclic_dict", make_cyclic_dict()), + Case("empties", [[], (), {}, set(), frozenset(), bytearray()]), + Case("tuple_allsame_10k", (None,) * 10000), + Case("tuple_alldiff_1k", tuple([] for _ in range(1000))), + Case("dense_refs_50x50", make_dense_refs()), + *scaled("wide_dict", wide_dict, (100, 1000, 5000)), ] -python_version = ".".join(map(str, sys.version_info[:2])) -if not getattr(sys, "_is_gil_enabled", lambda: True)(): - python_version += "t" -python_version += f"-{platform.machine()}" -PYTHON_VERSION_PARAM = pytest.mark.parametrize("_python", [python_version]) +# ═══════════════════════════════════════════════════════════ +# REAL-WORLD +# +# Representative production deepcopy patterns. +# Data is self-contained and deterministic. +# ═══════════════════════════════════════════════════════════ -COMBINED_CASES_PARAMS = pytest.mark.parametrize( - "case", - [pytest.param(case, id=case.name) for case in COMBINED_CASES], -) -BASE_CASES_PARAMS = pytest.mark.parametrize( - "case", - [pytest.param(case, id=case.name) for case in chain(BASE_CASES, GUARD_CASES)], -) +def make_json_api_response(): + return { + "status": "ok", + "pagination": {"page": 1, "per_page": 20, "total": 142}, + "data": [ + { + "id": i, + "type": "user", + "attributes": { + "name": f"User {i}", + "email": f"u{i}@x.com", + "active": i % 3 != 0, + "score": float(i * 17 % 100), + "tags": ["admin", "verified"] if i % 5 == 0 else ["user"], + "metadata": {"joined": "2024-01-15", "logins": i * 7}, + }, + "relationships": { + "team": {"data": {"type": "team", "id": i % 4}}, + "projects": {"data": [{"type": "project", "id": i * 10 + j} for j in range(3)]}, + }, + } + for i in range(20) + ], + "included": [ + {"type": "team", "id": t, "attributes": {"name": f"Team {t}"}} for t in range(4) + ], + "meta": {"request_id": "abc-123", "timing_ms": 42.5}, + } -@BASE_CASES_PARAMS -@PYTHON_VERSION_PARAM -def test_individual_cases_warmup(case: Any, copy, _python, benchmark) -> None: - copy.deepcopy(case.obj) +def make_config_with_shared_defaults(): + defaults = {"timeout": 30, "retries": 3, "backoff": 1.5} + return { + "version": "2.1.0", + "environments": { + env: { + "database": { + "host": f"db-{env}", + "port": 5432, + "pool_size": pool_size, + "options": defaults, + }, + "cache": {"host": f"redis-{env}", "port": 6379, "options": defaults}, + "features": { + "oauth": env != "dev", + "debug": env == "dev", + "providers": ["google", "github"] if env != "dev" else [], + }, + } + for env, pool_size in [("dev", 2), ("staging", 5), ("prod", 20)] + }, + "shared": { + "origins": ["https://app.example.com", "https://api.example.com"], + "headers": ("Content-Type", "Authorization", "X-Request-ID"), + "error_codes": frozenset({400, 401, 403, 404, 500}), + }, + } -@COMBINED_CASES_PARAMS -@PYTHON_VERSION_PARAM -def test_combined_cases_warmup(case: Any, copy, _python, benchmark) -> None: - copy.deepcopy(case.obj) +def make_openapi_fragment(): + def schema(name, fields): + return { + "type": "object", + "title": name, + "properties": {f: {"type": t} for f, t in fields}, + "required": [f for f, _ in fields], + } + base_fields = [ + ("id", "integer"), + ("name", "string"), + ("created_at", "string"), + ("updated_at", "string"), + ("metadata", "object"), + ] -# Initially tests were only running on 3.13 x86_64 -if python_version == "3.13-x86_64": - # backwards compatibility with previous benchmarks runs + schemas = {} + for model in ("User", "Project", "Task", "Comment"): + schemas[model] = schema(model, base_fields) + schemas[f"{model}List"] = { + "type": "object", + "properties": { + "items": { + "type": "array", + "items": {"$ref": f"#/components/schemas/{model}"}, + }, + "total": {"type": "integer"}, + "page": {"type": "integer"}, + }, + } - @BASE_CASES_PARAMS - def test_individual_cases(case: Any, copy, benchmark) -> None: - benchmark(copy.deepcopy, case.obj) + paths = {} + for resource in ("users", "projects", "tasks"): + paths[f"/api/v1/{resource}"] = { + method: { + "operationId": f"{method}_{resource}", + "tags": [resource], + "parameters": [ + {"name": "page", "in": "query", "schema": {"type": "integer"}}, + {"name": "per_page", "in": "query", "schema": {"type": "integer"}}, + ], + "responses": { + "200": {"description": "OK"}, + "404": {"description": "Not found"}, + }, + } + for method in ("get", "post") + } - @COMBINED_CASES_PARAMS - def test_combined_cases(case: Any, copy, benchmark) -> None: - benchmark(copy.deepcopy, case.obj) + return { + "openapi": "3.0.3", + "info": {"title": "Example API", "version": "1.0.0"}, + "paths": paths, + "components": {"schemas": schemas}, + } + + +def make_tabular_data(n): + categories = ("A", "B", "C", "D") + return [ + { + "id": i, + "name": f"item_{i}", + "value": float(i * 3.14), + "category": categories[i % 4], + "active": i % 7 != 0, + "tags": [f"t{j}" for j in range(i % 4)], + } + for i in range(n) + ] + + +def make_grayscale_image_1024x1024(): + return [[(r * 4 + c) % 256 for c in range(1024)] for r in range(1024)] + + +@dataclass +class OrmUser: + id: int + name: str + prefs: dict = field(default_factory=dict) + sessions: list = field(default_factory=list) + + +@dataclass +class OrmSession: + token: str + created: datetime + data: dict = field(default_factory=dict) + + +def make_orm_graph(): + shared_prefs = {"theme": "dark", "lang": "en", "notifications": True} + return [ + OrmUser( + i, + f"u{i}", + shared_prefs, + [ + OrmSession( + f"t{i}{j}", + datetime(2024, 1, 1 + j), # noqa: DTZ001 + {"ip": f"10.0.{i}.{j}"}, + ) + for j in range(3) + ], + ) + for i in range(10) + ] + + +REAL_WORLD_CASES = [ + Case("json_api_response", make_json_api_response()), + Case("config_shared_defaults", make_config_with_shared_defaults()), + Case("openapi_schema", make_openapi_fragment()), + Case("tabular_100", make_tabular_data(100)), + Case("tabular_1000", make_tabular_data(1000)), + Case("image_1024x1024", make_grayscale_image_1024x1024()), + Case("orm_graph_10u3s", make_orm_graph()), +] + + +# ═══════════════════════════════════════════════════════════ +# TESTS +# ═══════════════════════════════════════════════════════════ + + +@generate_params(MEMO_CASES) +@PYTHON_VERSION +def test_memo(case: Case, _python, benchmark): + benchmark(copium.deepcopy, case.obj) + + +@generate_params(CONTAINER_CASES) +@PYTHON_VERSION +def test_container(case: Case, _python, benchmark): + benchmark(copium.deepcopy, case.obj) + + +@generate_params(DEPTH_CASES) +@PYTHON_VERSION +def test_depth(case: Case, _python, benchmark): + benchmark(copium.deepcopy, case.obj) + + +@generate_params(ATOMIC_CASES) +@PYTHON_VERSION +def test_atomic(case: Case, _python, benchmark): + benchmark(copium.deepcopy, case.obj) + + +@generate_params(REDUCE_CASES) +@PYTHON_VERSION +def test_reduce(case: Case, _python, benchmark): + benchmark(copium.deepcopy, case.obj) + + +@generate_params(EDGE_CASES) +@PYTHON_VERSION +def test_edge(case: Case, _python, benchmark): + benchmark(copium.deepcopy, case.obj) + + +@generate_params(REAL_WORLD_CASES) +@PYTHON_VERSION +def test_real(case: Case, _python, benchmark): + benchmark(copium.deepcopy, case.obj) + + +@generate_params(REAL_WORLD_CASES) +@PYTHON_VERSION +def test_real_dict_memo(case: Case, _python, benchmark): + benchmark(copium.deepcopy, case.obj, {}) + + +@generate_params(REAL_WORLD_CASES) +@PYTHON_VERSION +def test_real_stdlib_patched(case: Case, _python, benchmark, copium_patch_enabled): + benchmark(stdlib_copy.deepcopy, case.obj) -else: - assert sys.version_info >= (3, 14) or "--codspeed" not in sys.argv, ( - "This block assumed to have newer versions only." - ) - @BASE_CASES_PARAMS - @PYTHON_VERSION_PARAM - def test_individual_cases(case: Any, copy, benchmark, _python) -> None: - benchmark(copy.deepcopy, case.obj) - - @COMBINED_CASES_PARAMS - @PYTHON_VERSION_PARAM - def test_combined_cases(case: Any, copy, benchmark, _python) -> None: - benchmark(copy.deepcopy, case.obj) - - @COMBINED_CASES_PARAMS - @PYTHON_VERSION_PARAM - def test_combined_cases_copium_dict_memo(case: Any, benchmark, _python) -> None: - benchmark(copium.deepcopy, case.obj, {}) - - @COMBINED_CASES_PARAMS - @PYTHON_VERSION_PARAM - def test_combined_cases_stdlib_patched( - case: Any, benchmark, _python, copium_patch_enabled - ) -> None: - benchmark(stdlib_copy.deepcopy, case.obj) +@generate_params(REAL_WORLD_CASES) +@PYTHON_VERSION +def test_real_stdlib(case: Case, _python, benchmark): + benchmark(stdlib_copy.deepcopy, case.obj)