diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 8662701b..c27a4030 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -96,6 +96,26 @@ jobs: run: maturin build --release --out dist - name: install the built wheel shell: bash - run: python -m pip install ordvec-python/dist/*.whl + run: | + set -euo pipefail + WHEEL="$(python - <<'PY' + from pathlib import Path + wheels = sorted(Path("ordvec-python/dist").glob("*.whl")) + if len(wheels) != 1: + raise SystemExit(f"expected exactly one wheel, found {wheels}") + print(wheels[0]) + PY + )" + REQ_FILE="${RUNNER_TEMP:?RUNNER_TEMP must be set}/ordvec-wheel-requirements.txt" + python - <<'PY' "$WHEEL" > "$REQ_FILE" + import hashlib + import sys + from pathlib import Path + + wheel = Path(sys.argv[1]).resolve() + digest = hashlib.sha256(wheel.read_bytes()).hexdigest() + print(f"ordvec @ {wheel.as_uri()} --hash=sha256:{digest}") + PY + python -m pip install --require-hashes --no-index --no-deps -r "$REQ_FILE" - name: pytest run: python -m pytest ordvec-python/tests -q diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 329bc1f8..c50bfb48 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,12 +1,14 @@ # Unified, tag-triggered release pipeline for ordvec (crate + Python wheel). # # Cutting a stable `vMAJOR.MINOR.PATCH` tag fully automates: build (crate + -# wheels + sdist) -> attest (GitHub artifact attestations) -> SLSA Build-L3 -# provenance -> stage EVERYTHING on the DRAFT GitHub Release (`release-assets- -# draft`) -> gated registry publishes -> un-draft ONLY after BOTH publishes -# succeed (`publish-github-release`). The two registry publishes (crates.io, -# PyPI) are the manual gates: each is bound to a GitHub Environment with -# Required Reviewers, so it pauses for a human. +# wheels + sdist) -> canonicalize the Python dist (current build for new +# versions, verified PyPI-served bytes if PyPI already owns the immutable +# version) -> attest / SLSA-provenance the files this run actually built -> +# stage EVERYTHING on the DRAFT GitHub Release (`release-assets-draft`) -> +# gated registry publishes / verification -> un-draft ONLY after BOTH gates +# succeed (`publish-github-release`). The two registry gates (crates.io, PyPI) +# are bound to GitHub Environments with Required Reviewers, so they pause for a +# human. # # The un-draft-after-publish ordering is deliberate: it prevents a public # GitHub Release from existing for a version that crates.io / PyPI later @@ -38,20 +40,28 @@ # Provenance / attestation, soup to nuts (all genuine, nothing faked): # * SLSA generator -> `*.intoto.jsonl` on the Release (OpenSSF Scorecard # Signed-Releases provenance probe; older unsigned releases may keep that -# score below 10 temporarily; SLSA Build L3). +# score below 10 temporarily; SLSA Build L3). Recovery mode limits SLSA +# subjects to the crate built by this run, because PyPI files are immutable +# bytes from an earlier Trusted Publishing upload. # * actions/attest-build-provenance -> GitHub attestation store + a # `*.sigstore.json` bundle on the Release (`gh attestation verify`; also the # Scorecard signing probe sees this asset as a backup if the .intoto.jsonl -# ever regresses). -# * gh-action-pypi-publish -> PEP 740 attestations on PyPI (Integrity API). -# * post-publish PyPI JSON hash check -> every served wheel/sdist digest -# matches the staged dist files. -# * crates.io / PyPI publish via Trusted Publishing (OIDC) — NO stored tokens. +# ever regresses). Recovery mode limits GitHub artifact attestations the +# same way. +# * gh-action-pypi-publish -> PEP 740 attestations on PyPI (Integrity API) on +# fresh publishes; recovery mode skips upload and verifies the existing +# PyPI-served hashes instead of minting attestations for different rebuilt +# bytes. +# * PyPI JSON hash check -> every served wheel/sdist digest matches the +# canonical Python dist files staged on the GitHub Release. +# * crates.io / PyPI publish/verification via Trusted Publishing gates (OIDC +# only when uploading) — NO stored tokens. # # Fail-closed: `release-assets-draft` and both publishes `needs:` attest + -# provenance, so nothing is attached or published unless provenance signed; and -# `publish-github-release` `needs:` both publishes, so the Release stays DRAFT -# unless both registry pushes succeed. The signed-release graph is pinned in +# provenance and canonical Python dist selection, so nothing is attached or +# published unless the artifact source is verified; and `publish-github-release` +# `needs:` both registry gates, so the Release stays DRAFT unless both pass. +# The signed-release graph is pinned in # `tests/release_signed_release_invariants.sh` (run by ci.yml's release-guard # on every push/PR) so a future commit can't silently dismantle it. # @@ -297,7 +307,17 @@ jobs: PY )" python -m pip install --require-hashes -r ordvec-python/requirements-dev.txt - python -m pip install --no-index "$WHEEL" + REQ_FILE="${RUNNER_TEMP:?RUNNER_TEMP must be set}/ordvec-wheel-requirements.txt" + python - <<'PY' "$WHEEL" > "$REQ_FILE" + import hashlib + import sys + from pathlib import Path + + wheel = Path(sys.argv[1]).resolve() + digest = hashlib.sha256(wheel.read_bytes()).hexdigest() + print(f"ordvec @ {wheel.as_uri()} --hash=sha256:{digest}") + PY + python -m pip install --require-hashes --no-index --no-deps -r "$REQ_FILE" python -m pytest ordvec-python/tests -q - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: @@ -307,7 +327,7 @@ jobs: smoke-linux-aarch64-wheel: name: smoke linux/aarch64 wheel - needs: [guard, build-wheels] + needs: [guard, pypi-canonical-dist] if: needs.guard.outputs.ok == 'true' runs-on: ubuntu-24.04-arm steps: @@ -315,17 +335,17 @@ jobs: uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 with: egress-policy: audit - - name: Set up Python to test the built wheel + - name: Set up Python to test the canonical wheel uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.13" - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - - name: Download the exact linux/aarch64 wheel + - name: Download the canonical Python dist uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: - name: wheels-ubuntu-latest-aarch64 + name: pypi-canonical-dist path: wheelhouse - name: Install exact wheel and run tiny RankQuant/Bitmap smoke shell: bash @@ -333,14 +353,28 @@ jobs: set -euo pipefail WHEEL="$(python - <<'PY' from pathlib import Path - wheels = sorted(Path("wheelhouse").glob("*.whl")) + wheels = sorted( + wheel + for wheel in Path("wheelhouse").glob("*.whl") + if "aarch64" in wheel.name and ("manylinux" in wheel.name or "musllinux" in wheel.name) + ) if len(wheels) != 1: raise SystemExit(f"expected exactly one linux/aarch64 wheel, found {wheels}") print(wheels[0]) PY )" python -m pip install --require-hashes -r ordvec-python/requirements-dev.txt - python -m pip install --no-index "$WHEEL" + REQ_FILE="${RUNNER_TEMP:?RUNNER_TEMP must be set}/ordvec-aarch64-wheel-requirements.txt" + python - <<'PY' "$WHEEL" > "$REQ_FILE" + import hashlib + import sys + from pathlib import Path + + wheel = Path(sys.argv[1]).resolve() + digest = hashlib.sha256(wheel.read_bytes()).hexdigest() + print(f"ordvec @ {wheel.as_uri()} --hash=sha256:{digest}") + PY + python -m pip install --require-hashes --no-index --no-deps -r "$REQ_FILE" python - <<'PY' import numpy as np from ordvec import Bitmap, RankQuant @@ -392,8 +426,26 @@ jobs: shell: bash run: | set -euo pipefail + SDIST="$(python - <<'PY' + from pathlib import Path + sdists = sorted(Path("ordvec-python/dist").glob("*.tar.gz")) + if len(sdists) != 1: + raise SystemExit(f"expected exactly one sdist, found {sdists}") + print(sdists[0]) + PY + )" python -m pip install --require-hashes -r ordvec-python/requirements-dev.txt - python -m pip install ordvec-python/dist/*.tar.gz + REQ_FILE="${RUNNER_TEMP:?RUNNER_TEMP must be set}/ordvec-sdist-requirements.txt" + python - <<'PY' "$SDIST" > "$REQ_FILE" + import hashlib + import sys + from pathlib import Path + + sdist = Path(sys.argv[1]).resolve() + digest = hashlib.sha256(sdist.read_bytes()).hexdigest() + print(f"ordvec @ {sdist.as_uri()} --hash=sha256:{digest}") + PY + python -m pip install --require-hashes --no-index --no-deps --no-build-isolation -r "$REQ_FILE" python -m pytest ordvec-python/tests -q - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: @@ -415,9 +467,55 @@ jobs: path: ordvec-python/ordvec-python.cdx.json if-no-files-found: error + pypi-canonical-dist: + name: canonicalize Python dist for PyPI/GitHub Release + needs: [guard, build-wheels, build-sdist] + if: needs.guard.outputs.ok == 'true' + runs-on: ubuntu-latest + outputs: + source: ${{ steps.canonicalize.outputs.source }} + pypi_exists: ${{ steps.canonicalize.outputs.pypi_exists }} + steps: + - name: Harden the runner + uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 + with: + egress-policy: audit + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - name: Collect the built wheels + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + pattern: wheels-* + path: built-dist + merge-multiple: true + - name: Collect the built sdist + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: sdist + path: built-dist + - name: Select canonical Python dist + id: canonicalize + env: + VERSION: ${{ needs.guard.outputs.version }} + run: | + set -euo pipefail + python3 tests/release_pypi_canonical_dist.py canonicalize \ + --version "$VERSION" \ + --built-dir built-dist \ + --out-dir canonical-dist + - name: Upload the canonical Python dist + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: pypi-canonical-dist + path: | + canonical-dist/*.whl + canonical-dist/*.tar.gz + if-no-files-found: error + attest: name: GitHub artifact attestation (+ .sigstore.json bundle) - needs: [guard, build-crate, build-wheels, build-sdist] + needs: [guard, build-crate, pypi-canonical-dist] if: needs.guard.outputs.ok == 'true' runs-on: ubuntu-latest permissions: @@ -430,23 +528,37 @@ jobs: uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 with: egress-policy: audit - - name: Collect the distributables + - name: Collect the crate distributable uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: + name: dist-crate path: dist - merge-multiple: true - # One consolidated attestation referencing every subject (v4 behavior). - - name: Attest build provenance for crate + wheels + sdist - id: attest + - name: Collect the canonical Python dist + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: pypi-canonical-dist + path: dist + # Fresh release: one consolidated attestation references every subject. + - name: Attest build provenance for crate + canonical wheels + sdist + id: attest_all + if: needs.pypi-canonical-dist.outputs.source == 'build' uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0 with: subject-path: | dist/*.crate dist/*.whl dist/*.tar.gz + # Recovery release: PyPI already owns immutable wheel/sdist bytes from a + # previous Trusted Publishing upload. Do not claim this run rebuilt them. + - name: Attest build provenance for crate only + id: attest_crate + if: needs.pypi-canonical-dist.outputs.source == 'pypi' + uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0 + with: + subject-path: dist/*.crate - name: Stage the Sigstore bundle as a release asset env: - BUNDLE: ${{ steps.attest.outputs.bundle-path }} + BUNDLE: ${{ steps.attest_all.outputs.bundle-path || steps.attest_crate.outputs.bundle-path }} VERSION: ${{ needs.guard.outputs.version }} run: cp "$BUNDLE" "ordvec-${VERSION}.sigstore.json" - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 @@ -457,7 +569,7 @@ jobs: combine-hashes: name: combine artifact digests for SLSA provenance - needs: [guard, build-crate, build-wheels, build-sdist] + needs: [guard, build-crate, pypi-canonical-dist] if: needs.guard.outputs.ok == 'true' runs-on: ubuntu-latest outputs: @@ -467,20 +579,36 @@ jobs: uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 with: egress-policy: audit - - name: Collect the distributables + - name: Collect the crate distributable uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: + name: dist-crate path: dist - merge-multiple: true - - name: Compute one combined base64 sha256sum over all distributables + - name: Collect the canonical Python dist + if: needs.pypi-canonical-dist.outputs.source == 'build' + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: pypi-canonical-dist + path: dist + - name: Compute one combined base64 sha256sum over SLSA subjects id: hash working-directory: dist + env: + PYPI_SOURCE: ${{ needs.pypi-canonical-dist.outputs.source }} run: | set -euo pipefail # SLSA generator wants `sha256sum`-format subjects, base64'd, no wrap. # `./*.glob` form (not bare `*.glob`) so a hostile filename that starts # with `-` can't be reinterpreted as a sha256sum flag (shellcheck SC2035). - echo "hashes=$(sha256sum ./*.crate ./*.whl ./*.tar.gz | base64 -w0)" >> "$GITHUB_OUTPUT" + if [ "$PYPI_SOURCE" = "build" ]; then + echo "hashes=$(sha256sum ./*.crate ./*.whl ./*.tar.gz | base64 -w0)" >> "$GITHUB_OUTPUT" + elif [ "$PYPI_SOURCE" = "pypi" ]; then + echo "::notice::PyPI dist already exists; SLSA subjects are limited to the crate built by this run." + echo "hashes=$(sha256sum ./*.crate | base64 -w0)" >> "$GITHUB_OUTPUT" + else + echo "::error::unexpected pypi-canonical-dist source: $PYPI_SOURCE" + exit 1 + fi provenance: name: SLSA Build-L3 provenance (.intoto.jsonl) @@ -505,7 +633,7 @@ jobs: release-assets-draft: name: stage all assets on the DRAFT Release (does NOT un-draft) - needs: [guard, notes, attest, provenance, require-ci-green, smoke-linux-aarch64-wheel] + needs: [guard, notes, attest, provenance, pypi-canonical-dist, require-ci-green, smoke-linux-aarch64-wheel] if: needs.guard.outputs.ok == 'true' runs-on: ubuntu-latest permissions: @@ -515,15 +643,41 @@ jobs: uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 with: egress-policy: audit - - name: Collect everything (artifacts + attestation + provenance) + - name: Collect the crate distributable uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: + name: dist-crate path: dist - merge-multiple: true + - name: Collect the canonical Python dist + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: pypi-canonical-dist + path: dist + - name: Collect the Sigstore bundle + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: sigstore-bundle + path: dist + - name: Collect workflow artifacts for SLSA provenance + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + path: provenance-artifacts + - name: Copy the SLSA provenance into the release dist + run: | + set -euo pipefail + mapfile -t provenance < <(find provenance-artifacts -type f -name '*.intoto.jsonl' | sort) + if [ "${#provenance[@]}" -ne 1 ]; then + printf '%s\n' "${provenance[@]}" + echo "::error::expected exactly one .intoto.jsonl provenance artifact, found ${#provenance[@]}" + exit 1 + fi + cp "${provenance[0]}" dist/ - name: Attach distributables, signature and provenance to the draft Release # SOLE Release-asset writer. SBOMs stay build artifacts (registries don't # host them); the GitHub-native bundle (.sigstore.json) and the SLSA - # provenance (.intoto.jsonl) ship with the artifacts they attest. + # provenance (.intoto.jsonl) ship with the Release. In recovery mode, + # they attest only the crate built by this run; canonical Python files + # are verified against PyPI's immutable hashes instead. # The Release is left DRAFT — un-drafting happens in # `publish-github-release` only after BOTH registry publishes succeed, # so a partial publish never leaves a "public Release with no @@ -645,7 +799,7 @@ jobs: publish-pypi: name: publish to PyPI - needs: [guard, release-assets-draft] + needs: [guard, pypi-canonical-dist, release-assets-draft] if: needs.guard.outputs.ok == 'true' runs-on: ubuntu-latest environment: @@ -659,70 +813,31 @@ jobs: uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 with: egress-policy: audit - - name: Collect the wheels - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: - pattern: wheels-* - path: dist - merge-multiple: true - - name: Collect the sdist + persist-credentials: false + - name: Collect the canonical Python dist uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: - name: sdist + name: pypi-canonical-dist path: dist + - name: Skip PyPI upload when the immutable version already exists + if: needs.pypi-canonical-dist.outputs.source == 'pypi' + run: | + echo "::notice::PyPI already serves this version; verifying existing canonical files instead of uploading." - name: Publish to PyPI (Trusted Publishing; PEP 740 attestations on by default) + if: needs.pypi-canonical-dist.outputs.source == 'build' uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0 with: packages-dir: dist - # Makes release recovery idempotent if PyPI already accepted this - # version but another registry publish failed. The next step still - # fails closed unless PyPI-served hashes equal the staged dist files. - skip-existing: true - - name: Post-publish PyPI hashes match staged dist + - name: Verify PyPI hashes match canonical dist env: VERSION: ${{ needs.guard.outputs.version }} run: | set -euo pipefail - python3 - <<'PY' - import hashlib - import json - import os - import sys - import time - import urllib.request - from pathlib import Path - - version = os.environ["VERSION"] - dist = Path("dist") - local = { - path.name: hashlib.sha256(path.read_bytes()).hexdigest() - for path in sorted(dist.iterdir()) - if path.is_file() and (path.name.endswith(".whl") or path.name.endswith(".tar.gz")) - } - if not local: - raise SystemExit("no local wheel/sdist files found in dist") - - url = f"https://pypi.org/pypi/ordvec/{version}/json" - last_error = None - for attempt in range(1, 25): - try: - with urllib.request.urlopen(url, timeout=15) as response: - payload = json.load(response) - remote = { - item["filename"]: item["digests"]["sha256"] - for item in payload.get("urls", []) - } - if remote == local: - print(f"OK: PyPI-served hashes match staged dist for ordvec {version}") - break - last_error = f"local={local!r} remote={remote!r}" - except Exception as exc: # noqa: BLE001 - diagnostic for CI logs. - last_error = repr(exc) - print(f"waiting for PyPI JSON/hash propagation ({attempt}/24): {last_error}", file=sys.stderr) - time.sleep(5) - else: - raise SystemExit(f"PyPI post-publish hash verification failed for {url}: {last_error}") - PY + python3 tests/release_pypi_canonical_dist.py verify \ + --version "$VERSION" \ + --dist-dir dist publish-github-release: name: un-draft the GitHub Release (only after BOTH registry publishes succeed) diff --git a/Cargo.toml b/Cargo.toml index 68fbbd96..3b773324 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,8 @@ exclude = [ "ordvec-python/", "tests/__pycache__/", "tests/release_environment_settings.sh", + "tests/release_pypi_canonical_dist.py", + "tests/release_pypi_canonical_dist_tests.py", "tests/release_publish_invariants.py", "tests/release_publish_invariants.sh", "tests/release_signed_release_invariants.sh", diff --git a/RELEASING.md b/RELEASING.md index a84552f1..ac679812 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -9,9 +9,9 @@ `ordvec` (the Rust crate) and `ordvec` on PyPI (the PyO3 wheel built from `ordvec-python/`) are released by **pushing a `vMAJOR.MINOR.PATCH` tag** to a -commit on `main`. The release workflow handles build, attestation, SLSA -provenance, Release-asset attach, and un-draft automatically; only the two -registry pushes are manual. +commit on `main`. The release workflow handles build, canonical Python artifact +selection, attestation, SLSA provenance, Release-asset attach, and un-draft +automatically; only the two registry gates are manual. ## Release pipeline controls @@ -25,11 +25,20 @@ The unified `release.yml`: (a *successful* run for that exact SHA on `main`); - publishes via **OIDC trusted publishing** (no long-lived crates.io / PyPI tokens in the repo); +- canonicalizes the Python dist before attestation and release upload: for a + new PyPI version it uses the current run's wheels/sdist; if PyPI already owns + that immutable version during recovery, it downloads the exact PyPI-served + files, verifies their SHA-256 digests from PyPI JSON, and uses those bytes as + the GitHub Release assets; - emits **GitHub SLSA build provenance** (`actions/attest-build-provenance`) and a **SLSA-generator `*.intoto.jsonl`** attached to the GitHub Release **before** the gated publishes — a failed attestation fails the release - closed, so nothing ships without provenance recorded; -- stages the **`.crate`, wheels, sdist, `*.sigstore.json` bundle, and + closed, so nothing ships without provenance recorded. In recovery mode where + PyPI files already exist, the GitHub/SLSA subjects are deliberately limited + to the crate built by the current run; the Python files are verified immutable + PyPI bytes from the earlier Trusted Publishing upload, not falsely claimed as + rebuilt by the recovery run; +- stages the **`.crate`, canonical wheels, canonical sdist, `*.sigstore.json` bundle, and `*.intoto.jsonl` provenance** on the GitHub Release while it is still **a DRAFT** (`release-assets-draft` is the sole Release-asset writer — no manual attach, which is what v0.2.0's manual step missed); @@ -56,7 +65,10 @@ The unified `release.yml`: `persist-credentials: false`, and defaults to `permissions: contents: read`. The PyPI publish step additionally produces **PEP 740** attestations via -Trusted Publishing (served from PyPI's Integrity API). +Trusted Publishing (served from PyPI's Integrity API) on a fresh upload. If the +version already exists on PyPI during recovery, the job skips upload and instead +verifies that PyPI-served wheel/sdist hashes match the canonical files staged on +the GitHub Release. ### Environment protection (configured in repo settings, not in code) @@ -145,10 +157,12 @@ filename. Until either is updated, the corresponding gated publish fails ``` `release.yml` triggers automatically. It builds the `.crate`, wheels, and - sdist; attests them (GitHub attestation store + `*.sigstore.json`); - generates the SLSA `*.intoto.jsonl`; and stages every artifact, the - attestation bundle, and the provenance on the GitHub Release — **as a - DRAFT**. It then pauses at the two registry environment gates. + sdist; selects the canonical Python dist (current build for a new PyPI + version, verified PyPI bytes for an existing immutable version); attests the + files this run can honestly attest (GitHub attestation store + + `*.sigstore.json`); generates the SLSA `*.intoto.jsonl`; and stages every + artifact, the attestation bundle, and the provenance on the GitHub Release + — **as a DRAFT**. It then pauses at the two registry environment gates. 7. **Approve the two publish environments** when they pause in the Actions UI (one for `crates-io`, one for `pypi`). The required-reviewer approval is what authorises the registry push. @@ -156,12 +170,14 @@ filename. Until either is updated, the corresponding gated publish fails SLSA-attested artifact — if they diverge (toolchain drift, etc.) the job fails closed BEFORE the OIDC token is minted, so nothing reaches crates.io. Re-run / investigate. - - Once **both** publishes succeed, `publish-github-release` un-drafts the - GitHub Release automatically. If one publish fails, the Release stays - DRAFT — re-run the failed job, the un-draft then completes. - - `publish-pypi` also queries PyPI after upload and compares every served - wheel/sdist SHA-256 digest against the staged `dist/` files before the - GitHub Release can un-draft. + - Once **both** registry gates succeed, `publish-github-release` un-drafts + the GitHub Release automatically. If one gate fails, the Release stays + DRAFT — investigate and re-run from a fixed workflow rather than approving + the other registry into another partial state. + - `publish-pypi` either uploads the fresh canonical dist or, if PyPI already + serves that version, skips upload and verifies the existing files. In both + modes it compares every PyPI-served wheel/sdist SHA-256 digest against the + canonical `dist/` files before the GitHub Release can un-draft. 8. Verify each published artifact and its provenance: - crates.io / docs.rs; - PyPI (confirm the post-publish hash-verification log, optionally diff --git a/tests/release_publish_invariants.py b/tests/release_publish_invariants.py index 8e08ceab..620d298c 100644 --- a/tests/release_publish_invariants.py +++ b/tests/release_publish_invariants.py @@ -13,6 +13,7 @@ WORKFLOW_PATH = os.environ.get("RELEASE_WORKFLOW_PATH", ".github/workflows/release.yml") +PYTHON_WORKFLOW_PATH = os.environ.get("PYTHON_WORKFLOW_PATH", ".github/workflows/python.yml") def fail(message: str) -> None: @@ -93,79 +94,180 @@ def empty(value: Any) -> bool: return value is None or value == "" +def has_need(job: dict[str, Any], needed: str) -> bool: + needs = job.get("needs") + if isinstance(needs, str): + return needs == needed + if isinstance(needs, list): + return needed in needs + return False + + +def contains_text(value: Any, needle: str) -> bool: + return isinstance(value, str) and needle in value + + +def read_text(path: str) -> str: + try: + with open(path, encoding="utf-8") as fh: + return fh.read() + except OSError as exc: + fail(f"{path}: could not read workflow: {exc}") + + +def check_hash_requirement_temp_paths(paths: list[str]) -> None: + for path in paths: + workflow_text = read_text(path) + if "/tmp/ordvec-" in workflow_text: + fail(f"{path}: hash requirement files must be written under ${{RUNNER_TEMP}}, not /tmp") + + +def check_aarch64_smoke_selector(workflow: dict[str, Any], path: str) -> None: + jobs = mapping(workflow.get("jobs"), f"{path}: jobs") + job = mapping(jobs.get("smoke-linux-aarch64-wheel"), f"{path}: jobs.smoke-linux-aarch64-wheel") + steps = sequence(job.get("steps"), f"{path}: jobs.smoke-linux-aarch64-wheel.steps") + + matching_steps: list[dict[str, Any]] = [] + for raw_step in steps: + step = mapping(raw_step, f"{path}: jobs.smoke-linux-aarch64-wheel.steps[]") + if step.get("name") == "Install exact wheel and run tiny RankQuant/Bitmap smoke": + matching_steps.append(step) + + if len(matching_steps) != 1: + fail(f"{path}: smoke-linux-aarch64-wheel must have exactly one install/smoke step") + + run = matching_steps[0].get("run") + if not isinstance(run, str): + fail(f"{path}: smoke-linux-aarch64-wheel install/smoke step must be a run step") + if "manylinux_2_17_aarch64" in run: + fail(f"{path}: linux/aarch64 wheel selector must not pin a specific manylinux policy tag") + if not all(needle in run for needle in ('"aarch64"', '"manylinux"', '"musllinux"', "len(wheels) != 1")): + fail(f"{path}: linux/aarch64 wheel selector must match architecture and assert exactly one wheel") + + +def check_pypi_canonical_dist(workflow: dict[str, Any], path: str) -> None: + jobs = mapping(workflow.get("jobs"), f"{path}: jobs") + job = mapping(jobs.get("pypi-canonical-dist"), f"{path}: jobs.pypi-canonical-dist") + steps = sequence(job.get("steps"), f"{path}: jobs.pypi-canonical-dist.steps") + + for needed in ("build-wheels", "build-sdist"): + if not has_need(job, needed): + fail(f"{path}: pypi-canonical-dist must need {needed}") + + outputs = mapping(job.get("outputs"), f"{path}: jobs.pypi-canonical-dist.outputs") + if outputs.get("source") != "${{ steps.canonicalize.outputs.source }}": + fail(f"{path}: pypi-canonical-dist must expose the canonical source output") + + wheels_downloads: list[int] = [] + sdist_downloads: list[int] = [] + canonicalize_steps: list[dict[str, Any]] = [] + uploads: list[tuple[int, dict[str, Any], dict[str, Any]]] = [] + + for index, raw_step in enumerate(steps): + step = mapping(raw_step, f"{path}: jobs.pypi-canonical-dist.steps[{index}]") + action = action_name(step) + if action == "actions/download-artifact": + with_map = mapping(step.get("with", {}), f"{path}: {step_label(index, step)} with") + artifact_path = norm_path(with_map.get("path")) + if with_map.get("pattern") == "wheels-*" and boolish_true(with_map.get("merge-multiple")): + if artifact_path != "built-dist": + fail(f"{path}: canonical wheel download must target built-dist") + wheels_downloads.append(index) + elif with_map.get("name") == "sdist": + if artifact_path != "built-dist": + fail(f"{path}: canonical sdist download must target built-dist") + sdist_downloads.append(index) + elif action == "actions/upload-artifact": + with_map = mapping(step.get("with", {}), f"{path}: {step_label(index, step)} with") + if with_map.get("name") == "pypi-canonical-dist": + uploads.append((index, step, with_map)) + + run = step.get("run") + if contains_text(run, "tests/release_pypi_canonical_dist.py canonicalize"): + canonicalize_steps.append(step) + if "--built-dir built-dist" not in run or "--out-dir canonical-dist" not in run: + fail(f"{path}: canonicalize step must read built-dist and write canonical-dist") + + if len(wheels_downloads) != 1: + fail(f"{path}: pypi-canonical-dist must download exactly one wheels-* artifact set") + if len(sdist_downloads) != 1: + fail(f"{path}: pypi-canonical-dist must download exactly one sdist artifact") + if len(canonicalize_steps) != 1: + fail(f"{path}: pypi-canonical-dist must run release_pypi_canonical_dist.py canonicalize") + if len(uploads) != 1: + fail(f"{path}: pypi-canonical-dist must upload exactly one pypi-canonical-dist artifact") + + _, _, upload_with = uploads[0] + upload_path = upload_with.get("path") + if not ( + contains_text(upload_path, "canonical-dist/*.whl") + and contains_text(upload_path, "canonical-dist/*.tar.gz") + ): + fail(f"{path}: pypi-canonical-dist upload must include canonical wheels and sdist") + + def check_publish_pypi(workflow: dict[str, Any], path: str) -> None: jobs = mapping(workflow.get("jobs"), f"{path}: jobs") job = mapping(jobs.get("publish-pypi"), f"{path}: jobs.publish-pypi") steps = sequence(job.get("steps"), f"{path}: jobs.publish-pypi.steps") + if not has_need(job, "pypi-canonical-dist"): + fail(f"{path}: publish-pypi must need pypi-canonical-dist") + publish_steps: list[tuple[int, dict[str, Any]]] = [] - artifact_downloads: list[tuple[int, dict[str, Any], dict[str, Any]]] = [] + canonical_downloads: list[tuple[int, dict[str, Any], dict[str, Any]]] = [] + verify_steps: list[dict[str, Any]] = [] for index, raw_step in enumerate(steps): step = mapping(raw_step, f"{path}: jobs.publish-pypi.steps[{index}]") action = action_name(step) if action == "pypa/gh-action-pypi-publish": publish_steps.append((index, step)) - if action != "actions/download-artifact": - continue + if action == "actions/download-artifact": + with_block = step.get("with", {}) + with_map = mapping(with_block, f"{path}: {step_label(index, step)} with") + if with_map.get("name") == "pypi-canonical-dist": + canonical_downloads.append((index, step, with_map)) + elif norm_path(with_map.get("path")) == "dist": + fail(f"{path}: {step_label(index, step)} downloads a non-canonical artifact into dist") - with_block = step.get("with", {}) - with_map = mapping(with_block, f"{path}: {step_label(index, step)} with") - artifact_downloads.append((index, step, with_map)) + run = step.get("run") + if contains_text(run, "tests/release_pypi_canonical_dist.py verify"): + verify_steps.append(step) + if "--dist-dir dist" not in run: + fail(f"{path}: PyPI verify step must verify dist") if len(publish_steps) != 1: fail(f"{path}: publish-pypi must have exactly one pypa/gh-action-pypi-publish step") publish_index, publish_step = publish_steps[0] + if publish_step.get("if") != "needs.pypi-canonical-dist.outputs.source == 'build'": + fail(f"{path}: PyPI publish step must only run when canonical source is the current build") publish_with = mapping( publish_step.get("with", {}), f"{path}: {step_label(publish_index, publish_step)} with" ) if norm_path(publish_with.get("packages-dir")) != "dist": fail(f"{path}: PyPI publish step must upload packages-dir: dist") - if not boolish_true(publish_with.get("skip-existing")): - fail( - f"{path}: PyPI publish step must set skip-existing: true so a recovery " - "rerun is idempotent after PyPI has already accepted the version" - ) - - wheels: list[int] = [] - sdists: list[int] = [] - for index, step, with_map in artifact_downloads: - label = step_label(index, step) - artifact_path = norm_path(with_map.get("path")) - if artifact_path != "dist": - fail( - f"{path}: {label} downloads artifacts to {artifact_path or 'the default path'!r}; " - "publish-pypi may only download wheels-* and sdist into dist" - ) - if index > publish_index: - fail(f"{path}: {label} downloads into dist after the PyPI publish step") - - name = with_map.get("name") - pattern = with_map.get("pattern") - is_wheels = ( - pattern == "wheels-*" - and empty(name) - and boolish_true(with_map.get("merge-multiple")) - ) - is_sdist = name == "sdist" and empty(pattern) - if is_wheels: - wheels.append(index) - continue - if is_sdist: - sdists.append(index) - continue + if len(canonical_downloads) != 1: + fail(f"{path}: publish-pypi must download exactly one pypi-canonical-dist artifact") + download_index, download_step, download_with = canonical_downloads[0] + if download_index > publish_index: + fail(f"{path}: {step_label(download_index, download_step)} must run before the PyPI publish step") + if norm_path(download_with.get("path")) != "dist": + fail(f"{path}: publish-pypi must download pypi-canonical-dist into dist") - fail( - f"{path}: {label} downloads into dist but is not the allowed " - "'pattern: wheels-*' or 'name: sdist' artifact" - ) + if len(verify_steps) != 1: + fail(f"{path}: publish-pypi must run release_pypi_canonical_dist.py verify exactly once") - if len(wheels) != 1: - fail(f"{path}: publish-pypi must download exactly one wheels-* artifact set into dist") - if len(sdists) != 1: - fail(f"{path}: publish-pypi must download exactly one sdist artifact into dist") + for index, step in enumerate(steps): + if action_name(step) != "actions/download-artifact": + continue + with_map = mapping(step.get("with", {}), f"{path}: {step_label(index, step)} with") + label = step_label(index, step) + artifact_path = norm_path(with_map.get("path")) + if artifact_path == "dist" and with_map.get("name") != "pypi-canonical-dist": + fail(f"{path}: {label} must not place non-canonical artifacts in dist") def check_publish_crate(workflow: dict[str, Any], path: str) -> None: @@ -225,6 +327,9 @@ def check_publish_crate(workflow: dict[str, Any], path: str) -> None: def main() -> None: workflow = load_workflow(WORKFLOW_PATH) + check_hash_requirement_temp_paths([WORKFLOW_PATH, PYTHON_WORKFLOW_PATH]) + check_aarch64_smoke_selector(workflow, WORKFLOW_PATH) + check_pypi_canonical_dist(workflow, WORKFLOW_PATH) check_publish_crate(workflow, WORKFLOW_PATH) check_publish_pypi(workflow, WORKFLOW_PATH) diff --git a/tests/release_pypi_canonical_dist.py b/tests/release_pypi_canonical_dist.py new file mode 100644 index 00000000..6946c08f --- /dev/null +++ b/tests/release_pypi_canonical_dist.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +"""Canonical PyPI dist handling for the release workflow. + +The normal release path publishes the wheels/sdist built by the current run. +The recovery path for an immutable PyPI version downloads the already-published +files from PyPI, verifies their published SHA-256 digests, and makes those bytes +the canonical Python dist for the GitHub Release. +""" + +from __future__ import annotations + +import argparse +import hashlib +import json +import os +import shutil +import sys +import time +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + + +PROJECT = "ordvec" +DIST_SUFFIXES = (".whl", ".tar.gz") + + +class PyPIReadError(RuntimeError): + """PyPI returned an unusable response for a retryable read.""" + + +def fail(message: str) -> None: + print(f"::error::{message}", file=sys.stderr) + raise SystemExit(1) + + +def notice(message: str) -> None: + print(f"::notice::{message}") + + +def set_output(name: str, value: str) -> None: + output = os.environ.get("GITHUB_OUTPUT") + if output: + with open(output, "a", encoding="utf-8") as fh: + fh.write(f"{name}={value}\n") + + +def sha256_file(path: Path) -> str: + digest = hashlib.sha256() + with path.open("rb") as fh: + for chunk in iter(lambda: fh.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def dist_files(directory: Path) -> dict[str, Path]: + files = { + path.name: path + for path in sorted(directory.iterdir()) + if path.is_file() and path.name.endswith(DIST_SUFFIXES) + } + if not files: + fail(f"no wheel/sdist files found in {directory}") + return files + + +def fetch_pypi_payload(version: str) -> dict[str, Any] | None: + url = f"https://pypi.org/pypi/{PROJECT}/{version}/json" + try: + with urllib.request.urlopen(url, timeout=20) as response: + return json.load(response) + except urllib.error.HTTPError as exc: + if exc.code == 404: + return None + raise PyPIReadError(f"could not read {url}: HTTP {exc.code}") from exc + except Exception as exc: # noqa: BLE001 - release diagnostics should be direct. + raise PyPIReadError(f"could not read {url}: {exc!r}") from exc + raise AssertionError("unreachable") + + +def pypi_dist_map(payload: dict[str, Any]) -> dict[str, dict[str, str]]: + dist: dict[str, dict[str, str]] = {} + for item in payload.get("urls", []): + if not isinstance(item, dict): + continue + filename = item.get("filename") + url = item.get("url") + sha256 = item.get("digests", {}).get("sha256") + if not ( + isinstance(filename, str) + and filename.endswith(DIST_SUFFIXES) + and isinstance(url, str) + and isinstance(sha256, str) + ): + continue + dist[filename] = {"url": url, "sha256": sha256} + if not dist: + raise PyPIReadError("PyPI JSON did not contain any wheel/sdist files") + return dist + + +def prepare_empty_dir(path: Path) -> None: + path.mkdir(parents=True, exist_ok=True) + if any(path.iterdir()): + fail(f"{path} must be empty before canonical dist is written") + + +def download_verified(url: str, expected_sha256: str, target: Path) -> None: + try: + with urllib.request.urlopen(url, timeout=60) as response: + data = response.read() + except Exception as exc: # noqa: BLE001 - release diagnostics should be direct. + fail(f"could not download {url}: {exc!r}") + actual_sha256 = hashlib.sha256(data).hexdigest() + if actual_sha256 != expected_sha256: + fail(f"downloaded {target.name} hash mismatch: {actual_sha256} != {expected_sha256}") + target.write_bytes(data) + + +def ensure_same_filenames(local: dict[str, Path], remote: dict[str, dict[str, str]]) -> None: + local_names = set(local) + remote_names = set(remote) + if local_names != remote_names: + only_local = sorted(local_names - remote_names) + only_remote = sorted(remote_names - local_names) + fail( + "current build and PyPI have different dist filename sets: " + f"only_local={only_local!r} only_pypi={only_remote!r}" + ) + + +def canonicalize(version: str, built_dir: Path, out_dir: Path) -> None: + built = dist_files(built_dir) + prepare_empty_dir(out_dir) + try: + payload = fetch_pypi_payload(version) + except PyPIReadError as exc: + fail(str(exc)) + + if payload is None: + for filename, path in built.items(): + shutil.copy2(path, out_dir / filename) + set_output("source", "build") + set_output("pypi_exists", "false") + print(f"OK: PyPI has no {PROJECT} {version}; canonical dist uses current build") + return + + try: + remote = pypi_dist_map(payload) + except PyPIReadError as exc: + fail(str(exc)) + ensure_same_filenames(built, remote) + + mismatched: list[str] = [] + for filename, path in built.items(): + built_sha256 = sha256_file(path) + remote_sha256 = remote[filename]["sha256"] + if built_sha256 != remote_sha256: + mismatched.append(filename) + + if mismatched: + notice( + "PyPI already has immutable files whose bytes differ from this rebuild; " + f"using PyPI-canonical bytes for {', '.join(mismatched)}" + ) + + for filename, item in remote.items(): + download_verified(item["url"], item["sha256"], out_dir / filename) + + set_output("source", "pypi") + set_output("pypi_exists", "true") + print(f"OK: PyPI already has {PROJECT} {version}; canonical dist uses verified PyPI files") + + +def remote_hashes(version: str) -> dict[str, str] | None: + payload = fetch_pypi_payload(version) + if payload is None: + return None + return {name: item["sha256"] for name, item in pypi_dist_map(payload).items()} + + +def local_hashes(dist_dir: Path) -> dict[str, str]: + return {name: sha256_file(path) for name, path in dist_files(dist_dir).items()} + + +def verify(version: str, dist_dir: Path, attempts: int, sleep_seconds: float) -> None: + local = local_hashes(dist_dir) + url = f"https://pypi.org/pypi/{PROJECT}/{version}/json" + last_error = "not checked" + for attempt in range(1, attempts + 1): + try: + remote = remote_hashes(version) + if remote == local: + print(f"OK: PyPI-served hashes match canonical dist for {PROJECT} {version}") + return + last_error = f"local={local!r} remote={remote!r}" + except PyPIReadError as exc: + last_error = str(exc) + print(f"waiting for PyPI JSON/hash propagation ({attempt}/{attempts}): {last_error}", file=sys.stderr) + if attempt != attempts: + time.sleep(sleep_seconds) + fail(f"PyPI hash verification failed for {url}: {last_error}") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description=__doc__) + subparsers = parser.add_subparsers(dest="command", required=True) + + canonical = subparsers.add_parser("canonicalize") + canonical.add_argument("--version", required=True) + canonical.add_argument("--built-dir", required=True, type=Path) + canonical.add_argument("--out-dir", required=True, type=Path) + + verify_parser = subparsers.add_parser("verify") + verify_parser.add_argument("--version", required=True) + verify_parser.add_argument("--dist-dir", required=True, type=Path) + verify_parser.add_argument("--attempts", default=24, type=int) + verify_parser.add_argument("--sleep-seconds", default=5.0, type=float) + + return parser.parse_args() + + +def main() -> None: + args = parse_args() + if args.command == "canonicalize": + canonicalize(args.version, args.built_dir, args.out_dir) + return + if args.command == "verify": + verify(args.version, args.dist_dir, args.attempts, args.sleep_seconds) + return + raise AssertionError(f"unknown command: {args.command}") + + +if __name__ == "__main__": + main() diff --git a/tests/release_pypi_canonical_dist_tests.py b/tests/release_pypi_canonical_dist_tests.py new file mode 100644 index 00000000..0bcbf136 --- /dev/null +++ b/tests/release_pypi_canonical_dist_tests.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +"""Unit tests for release_pypi_canonical_dist.py.""" + +from __future__ import annotations + +import hashlib +import importlib.util +import io +import tempfile +import unittest +from contextlib import redirect_stderr, redirect_stdout +from pathlib import Path + + +SCRIPT = Path(__file__).with_name("release_pypi_canonical_dist.py") +SPEC = importlib.util.spec_from_file_location("release_pypi_canonical_dist", SCRIPT) +assert SPEC is not None and SPEC.loader is not None +canonical = importlib.util.module_from_spec(SPEC) +SPEC.loader.exec_module(canonical) + + +def write(path: Path, data: bytes) -> str: + path.write_bytes(data) + return hashlib.sha256(data).hexdigest() + + +class CanonicalPyPIDistTests(unittest.TestCase): + def test_missing_pypi_release_uses_current_build(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + built = root / "built" + out = root / "out" + built.mkdir() + write(built / "ordvec-0.3.0.tar.gz", b"fresh sdist") + write(built / "ordvec-0.3.0-cp310-abi3-win_amd64.whl", b"fresh wheel") + + old_fetch = canonical.fetch_pypi_payload + canonical.fetch_pypi_payload = lambda version: None + try: + with redirect_stdout(io.StringIO()): + canonical.canonicalize("0.3.0", built, out) + finally: + canonical.fetch_pypi_payload = old_fetch + + self.assertEqual((out / "ordvec-0.3.0.tar.gz").read_bytes(), b"fresh sdist") + self.assertEqual((out / "ordvec-0.3.0-cp310-abi3-win_amd64.whl").read_bytes(), b"fresh wheel") + + def test_existing_pypi_release_uses_verified_remote_bytes(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + built = root / "built" + remote = root / "remote" + out = root / "out" + built.mkdir() + remote.mkdir() + + write(built / "ordvec-0.3.0.tar.gz", b"rebuilt sdist") + write(built / "ordvec-0.3.0-cp310-abi3-win_amd64.whl", b"rebuilt wheel") + sdist_sha = write(remote / "ordvec-0.3.0.tar.gz", b"pypi sdist") + wheel_sha = write(remote / "ordvec-0.3.0-cp310-abi3-win_amd64.whl", b"pypi wheel") + + payload = { + "urls": [ + { + "filename": "ordvec-0.3.0.tar.gz", + "url": (remote / "ordvec-0.3.0.tar.gz").as_uri(), + "digests": {"sha256": sdist_sha}, + }, + { + "filename": "ordvec-0.3.0-cp310-abi3-win_amd64.whl", + "url": (remote / "ordvec-0.3.0-cp310-abi3-win_amd64.whl").as_uri(), + "digests": {"sha256": wheel_sha}, + }, + ] + } + + old_fetch = canonical.fetch_pypi_payload + canonical.fetch_pypi_payload = lambda version: payload + try: + with redirect_stdout(io.StringIO()): + canonical.canonicalize("0.3.0", built, out) + finally: + canonical.fetch_pypi_payload = old_fetch + + self.assertEqual((out / "ordvec-0.3.0.tar.gz").read_bytes(), b"pypi sdist") + self.assertEqual((out / "ordvec-0.3.0-cp310-abi3-win_amd64.whl").read_bytes(), b"pypi wheel") + + def test_existing_pypi_release_rejects_filename_drift(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + built = root / "built" + out = root / "out" + built.mkdir() + write(built / "ordvec-0.3.0.tar.gz", b"fresh sdist") + + payload = { + "urls": [ + { + "filename": "ordvec-0.3.0-cp310-abi3-win_amd64.whl", + "url": "file:///unused", + "digests": {"sha256": "0" * 64}, + } + ] + } + + old_fetch = canonical.fetch_pypi_payload + canonical.fetch_pypi_payload = lambda version: payload + try: + with redirect_stderr(io.StringIO()), self.assertRaises(SystemExit): + canonical.canonicalize("0.3.0", built, out) + finally: + canonical.fetch_pypi_payload = old_fetch + + def test_verify_retries_after_transient_pypi_fetch_error(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + dist = Path(tmp) + wheel_sha = write(dist / "ordvec-0.3.0-cp310-abi3-win_amd64.whl", b"canonical wheel") + payload = { + "urls": [ + { + "filename": "ordvec-0.3.0-cp310-abi3-win_amd64.whl", + "url": "file:///unused", + "digests": {"sha256": wheel_sha}, + } + ] + } + responses = [canonical.PyPIReadError("temporary PyPI 503"), payload] + sleeps: list[float] = [] + + old_fetch = canonical.fetch_pypi_payload + old_sleep = canonical.time.sleep + def fetch(version: str) -> dict[str, object] | None: + response = responses.pop(0) + if isinstance(response, Exception): + raise response + return response + + canonical.fetch_pypi_payload = fetch + canonical.time.sleep = sleeps.append + try: + with redirect_stdout(io.StringIO()), redirect_stderr(io.StringIO()): + canonical.verify("0.3.0", dist, attempts=2, sleep_seconds=0.25) + finally: + canonical.fetch_pypi_payload = old_fetch + canonical.time.sleep = old_sleep + + self.assertEqual(sleeps, [0.25]) + + def test_verify_retries_after_empty_pypi_dist_payload(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + dist = Path(tmp) + sdist_sha = write(dist / "ordvec-0.3.0.tar.gz", b"canonical sdist") + payload = { + "urls": [ + { + "filename": "ordvec-0.3.0.tar.gz", + "url": "file:///unused", + "digests": {"sha256": sdist_sha}, + } + ] + } + responses = [{"urls": []}, payload] + sleeps: list[float] = [] + + old_fetch = canonical.fetch_pypi_payload + old_sleep = canonical.time.sleep + canonical.fetch_pypi_payload = lambda version: responses.pop(0) + canonical.time.sleep = sleeps.append + try: + with redirect_stdout(io.StringIO()), redirect_stderr(io.StringIO()): + canonical.verify("0.3.0", dist, attempts=2, sleep_seconds=0.5) + finally: + canonical.fetch_pypi_payload = old_fetch + canonical.time.sleep = old_sleep + + self.assertEqual(sleeps, [0.5]) + + def test_canonicalize_reports_pypi_read_error(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + built = root / "built" + out = root / "out" + built.mkdir() + write(built / "ordvec-0.3.0.tar.gz", b"fresh sdist") + + old_fetch = canonical.fetch_pypi_payload + canonical.fetch_pypi_payload = lambda version: (_ for _ in ()).throw( + canonical.PyPIReadError("temporary PyPI 503") + ) + try: + with redirect_stderr(io.StringIO()), self.assertRaises(SystemExit): + canonical.canonicalize("0.3.0", built, out) + finally: + canonical.fetch_pypi_payload = old_fetch + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/release_signed_release_invariants.sh b/tests/release_signed_release_invariants.sh index 2efc7c95..7a42909a 100755 --- a/tests/release_signed_release_invariants.sh +++ b/tests/release_signed_release_invariants.sh @@ -7,16 +7,20 @@ # unsigned releases may keep the score below 10 temporarily. The same graph # keeps the build-attest-publish chain honest: # -# build-{crate,wheels,sdist} (artifacts) +# build-{crate,wheels,sdist} (raw artifacts) # | -# +-> attest (id-token + attestations + .sigstore.json) -# +-> provenance (slsa-github-generator @vX.Y.Z, .intoto.jsonl) +# +-> pypi-canonical-dist (current build, or verified immutable PyPI files) +# | +# +-> attest (id-token + attestations + .sigstore.json; +# | crate-only when PyPI files already exist) +# +-> provenance (slsa-github-generator @vX.Y.Z, .intoto.jsonl; +# | crate-only when PyPI files already exist) # | # v -# release-assets-draft (uploads .crate/.whl/.tar.gz/.sigstore.json/.intoto.jsonl to DRAFT release) +# release-assets-draft (uploads .crate/canonical .whl/.tar.gz/.sigstore.json/.intoto.jsonl to DRAFT release) # | # +--> publish-crate (byte-identity check vs attested .crate, then cargo publish) -# +--> publish-pypi (Trusted Publishing) +# +--> publish-pypi (Trusted Publishing, or existing-file verification) # | # v # publish-github-release (un-draft, ONLY after both publishes succeed) @@ -70,7 +74,7 @@ require_job_line() { # (1) release-assets-draft needs attest + provenance + require-ci-green + notes # + exact linux/aarch64 wheel smoke # ---------------------------------------------------------------------- -for dep in attest provenance require-ci-green notes smoke-linux-aarch64-wheel; do +for dep in attest provenance pypi-canonical-dist require-ci-green notes smoke-linux-aarch64-wheel; do job_needs release-assets-draft "$dep" \ || fail "release-assets-draft must \`needs: $dep\` (fail-closed on missing provenance/CI)" done @@ -84,6 +88,8 @@ for ext in '\.crate' '\.whl' '\.tar\.gz' '\.sigstore\.json' '\.intoto\.jsonl'; d printf '%s\n' "$body_draft" | grep -qE "dist/\*${ext}([^a-zA-Z]|$)" \ || fail "release-assets-draft must \`gh release upload\` dist/*$(printf '%s' "$ext" | sed 's/\\//g')" done +printf '%s\n' "$body_draft" | grep -qE 'name:[[:space:]]*pypi-canonical-dist' \ + || fail "release-assets-draft must upload canonical Python dist, not raw rebuilt wheel/sdist artifacts" printf '%s\n' "$body_draft" | grep -qE "$github_repo_env_re" \ || fail "release-assets-draft must set \`GH_REPO: \${{ github.repository }}\` (no checkout, so gh release upload needs explicit repo context)" @@ -172,11 +178,21 @@ post_line="$(require_job_line publish-crate '^[[:space:]]+- name:[[:space:]]*Pos [ "$publish_line" -lt "$post_line" ] \ || fail "publish-crate must run the crates.io post-publish download/compare AFTER \`cargo publish\`" +pcd="$(job_body pypi-canonical-dist)" +printf '%s\n' "$pcd" | grep -qE 'release_pypi_canonical_dist\.py canonicalize' \ + || fail "pypi-canonical-dist must canonicalize Python artifacts before attestation/release upload" +printf '%s\n' "$pcd" | grep -qE 'name:[[:space:]]*pypi-canonical-dist' \ + || fail "pypi-canonical-dist must upload the canonical Python dist artifact" + ppb="$(job_body publish-pypi)" -printf '%s\n' "$ppb" | grep -qE 'Post-publish PyPI hashes match staged dist' \ - || fail "publish-pypi must verify PyPI-served wheel/sdist hashes after publish" -printf '%s\n' "$ppb" | grep -qE 'pypi\.org/pypi/ordvec/.+/json|pypi\.org/pypi/ordvec/' \ - || fail "publish-pypi must query PyPI after publish for served file hashes" +job_needs publish-pypi pypi-canonical-dist \ + || fail "publish-pypi must \`needs: pypi-canonical-dist\` (publish/verify exactly the canonical files)" +printf '%s\n' "$ppb" | grep -qE 'name:[[:space:]]*pypi-canonical-dist' \ + || fail "publish-pypi must consume pypi-canonical-dist, not raw rebuilt wheel/sdist artifacts" +printf '%s\n' "$ppb" | grep -qE 'release_pypi_canonical_dist\.py verify' \ + || fail "publish-pypi must verify PyPI-served wheel/sdist hashes against canonical dist" +grep -q 'pypi.org/pypi' tests/release_pypi_canonical_dist.py \ + || fail "release_pypi_canonical_dist.py must query PyPI for served file hashes" # ---------------------------------------------------------------------- # (10) publish-github-release un-drafts ONLY AFTER both registry publishes succeed.