From 10cab63fde0c74f31cb2386477383e3ae807a440 Mon Sep 17 00:00:00 2001
From: p1c2u <maciag.artur@gmail.com>
Date: Wed, 18 Feb 2026 15:40:11 +0000
Subject: [PATCH] CI benchmarks

---
 .github/workflows/bench.yml                   |  82 --------------
 .github/workflows/python-bench-baseline.yml   |  38 +++++++
 .github/workflows/python-bench-regression.yml |  77 +++++++++++++
 .github/workflows/python-bench.yml            | 106 ++++++++++++++++++
 .github/workflows/python-publish.yml          |   5 +-
 .../{python-test.yml => python-tests.yml}     |   4 +-
 6 files changed, 226 insertions(+), 86 deletions(-)
 delete mode 100644 .github/workflows/bench.yml
 create mode 100644 .github/workflows/python-bench-baseline.yml
 create mode 100644 .github/workflows/python-bench-regression.yml
 create mode 100644 .github/workflows/python-bench.yml
 rename .github/workflows/{python-test.yml => python-tests.yml} (97%)

diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
deleted file mode 100644
index 39d0190..0000000
--- a/.github/workflows/bench.yml
+++ /dev/null
@@ -1,82 +0,0 @@
-name: Benchmarks
-
-on:
-  workflow_dispatch:
-    inputs:
-      quick:
-        description: "Run a shorter benchmark (fewer iterations)"
-        required: false
-        default: false
-        type: boolean
-      repeats:
-        description: "Repeats per scenario (median is reported)"
-        required: false
-        default: "5"
-        type: string
-      warmup_loops:
-        description: "Warmup passes before timing"
-        required: false
-        default: "1"
-        type: string
-
-jobs:
-  bench:
-    name: "Bench"
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v6
-
-      - name: Set up Python
-        uses: actions/setup-python@v6
-        with:
-          python-version: "3.12"
-
-      - name: Set up poetry
-        uses: Gr1N/setup-poetry@v9
-        with:
-          poetry-version: "2.2.1"
-
-      - name: Configure poetry
-        run: poetry config virtualenvs.in-project true
-
-      - name: Set up cache
-        uses: actions/cache@v5
-        id: cache
-        with:
-          path: .venv
-          key: venv-bench-${{ runner.os }}-py3.12-${{ hashFiles('**/poetry.lock') }}
-
-      - name: Install dependencies
-        run: poetry install
-
-      - name: Run benchmarks
-        env:
-          PYTHONHASHSEED: "0"
-        shell: bash
-        run: |
-          set -euo pipefail
-          quick_flag=""
-          if [[ "${{ inputs.quick }}" == "true" ]]; then
-            quick_flag="--quick"
-          fi
-
-          repeats="${{ inputs.repeats }}"
-          warmup="${{ inputs.warmup_loops }}"
-
-          poetry run python -m tests.benchmarks.bench_parse \
-            --output reports/bench-parse.json \
-            $quick_flag \
-            --repeats "$repeats" \
-            --warmup-loops "$warmup"
-
-          poetry run python -m tests.benchmarks.bench_lookup \
-            --output reports/bench-lookup.json \
-            $quick_flag \
-            --repeats "$repeats" \
-            --warmup-loops "$warmup"
-
-      - name: Upload benchmark results
-        uses: actions/upload-artifact@v6
-        with:
-          name: pathable-bench-results
-          path: reports/bench-*.json
diff --git a/.github/workflows/python-bench-baseline.yml b/.github/workflows/python-bench-baseline.yml
new file mode 100644
index 0000000..7559eaa
--- /dev/null
+++ b/.github/workflows/python-bench-baseline.yml
@@ -0,0 +1,38 @@
+name: CI / Benchmarks / Baseline
+
+on:
+  push:
+    branches: [master]
+  workflow_dispatch:
+    inputs:
+      quick:
+        description: "Run a shorter benchmark (fewer iterations)"
+        required: false
+        default: true
+        type: boolean
+      repeats:
+        description: "Repeats per scenario (median is reported)"
+        required: false
+        default: "5"
+        type: string
+      warmup_loops:
+        description: "Warmup passes before timing"
+        required: false
+        default: "1"
+        type: string
+
+concurrency:
+  group: bench-baseline-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  baseline:
+    name: "Bench baseline (master)"
+    uses: ./.github/workflows/python-bench.yml
+    with:
+      suffix: baseline
+      quick: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.quick == 'true' }}
+      repeats: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.repeats || '5' }}
+      warmup_loops: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.warmup_loops || '1' }}
+      save_cache_key: bench-baseline-py3.12-${{ github.sha }}
+      artifact_name: pathable-bench-baseline-results
diff --git a/.github/workflows/python-bench-regression.yml b/.github/workflows/python-bench-regression.yml
new file mode 100644
index 0000000..527da4c
--- /dev/null
+++ b/.github/workflows/python-bench-regression.yml
@@ -0,0 +1,77 @@
+name: CI / Benchmarks / Regression
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+
+concurrency:
+  group: bench-pr-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  head:
+    name: "Bench head"
+    uses: ./.github/workflows/python-bench.yml
+    with:
+      suffix: head
+      quick: true
+      repeats: "3"
+      warmup_loops: "0"
+      artifact_name: pathable-bench-pr-head
+
+  compare:
+    name: "Bench compare (PR vs baseline)"
+    runs-on: ubuntu-latest
+    needs: head
+    env:
+      BENCH_TOLERANCE: "0.20"
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Download head benchmark results
+        uses: actions/download-artifact@v6
+        with:
+          name: pathable-bench-pr-head
+          path: reports
+
+      - name: Restore baseline cache
+        id: baseline-cache
+        uses: actions/cache/restore@v4
+        with:
+          path: reports
+          key: bench-baseline-py3.12-${{ github.event.pull_request.base.sha }}
+          restore-keys: |
+            bench-baseline-py3.12-
+
+      - name: Ensure baseline exists
+        shell: bash
+        run: |
+          set -euo pipefail
+          if [[ -f reports/bench-parse.baseline.json && -f reports/bench-lookup.baseline.json ]]; then
+            exit 0
+          fi
+          echo "Baseline benchmark cache not found for this repository." >&2
+          echo "Run the baseline workflow on master at least once to populate cache." >&2
+          exit 1
+
+      - name: Compare parse benchmark
+        shell: bash
+        run: |
+          python tests/benchmarks/compare_results.py \
+            --baseline reports/bench-parse.baseline.json \
+            --candidate reports/bench-parse.head.json \
+            --tolerance "$BENCH_TOLERANCE"
+
+      - name: Compare lookup benchmark
+        shell: bash
+        run: |
+          python tests/benchmarks/compare_results.py \
+            --baseline reports/bench-lookup.baseline.json \
+            --candidate reports/bench-lookup.head.json \
+            --tolerance "$BENCH_TOLERANCE"
+
+      - name: Upload comparison inputs
+        uses: actions/upload-artifact@v6
+        with:
+          name: pathable-bench-pr-results
+          path: reports/bench-*.json
diff --git a/.github/workflows/python-bench.yml b/.github/workflows/python-bench.yml
new file mode 100644
index 0000000..6e62e20
--- /dev/null
+++ b/.github/workflows/python-bench.yml
@@ -0,0 +1,106 @@
+name: Benchmarks / Reusable
+
+on:
+  workflow_call:
+    inputs:
+      suffix:
+        required: true
+        type: string
+      git_ref:
+        required: false
+        type: string
+        default: ""
+      quick:
+        required: false
+        type: boolean
+        default: true
+      repeats:
+        required: false
+        type: string
+        default: "3"
+      warmup_loops:
+        required: false
+        type: string
+        default: "0"
+      save_cache_key:
+        required: false
+        type: string
+        default: ""
+      artifact_name:
+        required: false
+        type: string
+        default: "pathable-bench-results"
+
+env:
+  PYTHON_VERSION: "3.12"
+  POETRY_VERSION: "2.2.1"
+  PYTHONHASHSEED: "0"
+
+jobs:
+  bench:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: ${{ inputs.git_ref != '' && inputs.git_ref || github.sha }}
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
+
+      - name: Set up poetry
+        uses: Gr1N/setup-poetry@v9
+        with:
+          poetry-version: ${{ env.POETRY_VERSION }}
+
+      - name: Configure poetry
+        run: poetry config virtualenvs.in-project true
+
+      - name: Set up cache
+        uses: actions/cache@v5
+        id: cache
+        with:
+          path: .venv
+          key: venv-bench-${{ runner.os }}-py${{ env.PYTHON_VERSION }}-${{ hashFiles('**/poetry.lock') }}
+
+      - name: Ensure cache is healthy
+        if: steps.cache.outputs.cache-hit == 'true'
+        shell: bash
+        run: timeout 10s poetry run pip --version || rm -rf .venv
+
+      - name: Install dependencies
+        run: poetry install --no-interaction
+
+      - name: Run parse benchmark
+        run: |
+          poetry run python -m tests.benchmarks.bench_parse \
+            --output "reports/bench-parse.${{ inputs.suffix }}.json" \
+            ${{ inputs.quick && '--quick' || '' }} \
+            --repeats "${{ inputs.repeats }}" \
+            --warmup-loops "${{ inputs.warmup_loops }}"
+
+      - name: Run lookup benchmark
+        run: |
+          poetry run python -m tests.benchmarks.bench_lookup \
+            --output "reports/bench-lookup.${{ inputs.suffix }}.json" \
+            ${{ inputs.quick && '--quick' || '' }} \
+            --repeats "${{ inputs.repeats }}" \
+            --warmup-loops "${{ inputs.warmup_loops }}"
+
+      - name: Save benchmark cache
+        if: inputs.save_cache_key != ''
+        uses: actions/cache/save@v4
+        with:
+          path: |
+            reports/bench-parse.${{ inputs.suffix }}.json
+            reports/bench-lookup.${{ inputs.suffix }}.json
+          key: ${{ inputs.save_cache_key }}
+
+      - name: Upload benchmark results
+        uses: actions/upload-artifact@v6
+        with:
+          name: ${{ inputs.artifact_name }}
+          path: |
+            reports/bench-parse.${{ inputs.suffix }}.json
+            reports/bench-lookup.${{ inputs.suffix }}.json
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index def4996..f46085e 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -1,7 +1,7 @@
 # This workflow will upload a Python Package using Twine when a release is created
 # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 
-name: Publish python packages
+name: CI / Publish
 
 on:
   workflow_dispatch:
@@ -10,7 +10,8 @@ on:
       - published
 
 jobs:
-  publish:
+  publish_pypi:
+    name: "PyPI"
     runs-on: ubuntu-latest
     permissions:
       id-token: write
diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-tests.yml
similarity index 97%
rename from .github/workflows/python-test.yml
rename to .github/workflows/python-tests.yml
index e74e23d..53ddbb0 100644
--- a/.github/workflows/python-test.yml
+++ b/.github/workflows/python-tests.yml
@@ -1,7 +1,7 @@
 # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 
-name: Test python code
+name: CI / Tests
 
 on:
   push:
@@ -10,7 +10,7 @@ on:
 
 jobs:
   test:
-    name: "Tests"
+    name: "py${{ matrix.python-version }}"
     runs-on: ubuntu-latest
     strategy:
       matrix: