Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -358,10 +358,11 @@ jobs:
# Pattern adapted from microsoft/DiskANN's CI (also a vector-search crate).
# The local setup-intel-sde action owns the fixed Intel downloadmirror build,
# SHA256 verification, and x86_64 runner guard. The SHA gate fails closed for
# any archive we extract. Pull requests may soft-skip during Intel mirror
# outages, but push/workflow_dispatch runs fail closed; the release gate only
# accepts the post-merge push workflow result, so a release cannot proceed
# without the SDE probe and AVX-512 tests actually executing on main.
# any archive we extract. Pull requests and push runs may soft-skip during
# Intel mirror outages so external downloadmirror challenges do not hold the
# whole branch red. Manual workflow_dispatch runs remain fail-closed, and the
# tag-triggered release workflow has its own fail-closed AVX-512 proof before
# assets can be staged or published.
avx512:
name: avx512 (Intel SDE / Sapphire Rapids)
runs-on: ubuntu-24.04
Expand Down Expand Up @@ -394,11 +395,11 @@ jobs:
with:
version: ${{ env.SDE_VERSION }}
sha256: ${{ env.SDE_SHA256 }}
allow-unavailable: ${{ github.event_name == 'pull_request' }}
- name: note Intel SDE unavailable on PR
if: ${{ github.event_name == 'pull_request' && steps.sde.outputs.sde-available != 'true' }}
allow-unavailable: ${{ github.event_name != 'workflow_dispatch' }}
- name: note Intel SDE unavailable
if: ${{ steps.sde.outputs.sde-available != 'true' }}
run: |
echo "::warning::Intel SDE archive unavailable on this pull request; push and release-gated runs fail closed."
echo "::warning::Intel SDE archive unavailable; SDE-dependent CI steps skipped. The release workflow has a separate fail-closed AVX-512 proof."
- name: sanity-check AVX-512 detection under SDE
if: ${{ steps.sde.outputs.sde-available == 'true' }}
env:
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ jobs:
with:
version: ${{ env.SDE_VERSION }}
sha256: ${{ env.SDE_SHA256 }}
allow-unavailable: ${{ github.event_name == 'pull_request' }}
- name: note Intel SDE unavailable on PR
if: ${{ github.event_name == 'pull_request' && steps.sde.outputs.sde-available != 'true' }}
allow-unavailable: ${{ github.event_name != 'workflow_dispatch' }}
- name: note Intel SDE unavailable
if: ${{ steps.sde.outputs.sde-available != 'true' }}
run: |
echo "::warning::Intel SDE archive unavailable on this pull request; push and release-gated runs fail closed."
echo "::warning::Intel SDE archive unavailable; SDE-backed coverage skipped. The release workflow has a separate fail-closed AVX-512 proof."
- name: Install cargo-llvm-cov (pinned)
if: ${{ steps.sde.outputs.sde-available == 'true' }}
run: cargo install cargo-llvm-cov --version 0.8.7 --locked
Expand Down
70 changes: 69 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@
# sets, so nothing is published unless the artifact source is verified; and
# `publish-github-release` `needs:` every registry gate, so the Release stays
# DRAFT unless all pass.
# `release-avx512` reruns the Intel SDE CPUID probe and AVX-512 tests inside
# this tag workflow and is a hard dependency of core asset staging. Routine
# CI may tolerate Intel mirror outages, but a release cannot publish on skipped
# AVX-512 coverage.
# The signed-release graph is pinned in
# `tests/release_signed_release_invariants.sh` (run by ci.yml's release-guard
# on every push/PR) so a future commit can't silently dismantle it.
Expand Down Expand Up @@ -191,6 +195,70 @@ jobs:
fi
done

release-avx512:
name: prove AVX-512 coverage under Intel SDE
needs: [guard, require-ci-green]
if: needs.guard.outputs.ok == 'true'
runs-on: ubuntu-24.04
permissions:
contents: read
env:
SDE_VERSION: sde-external-10.8.0-2026-03-15-lin
SDE_SHA256: 50b320cd226acef7a491f5b321fc1be3c3c7984f9e27a456e64894b5b0979dd3
steps:
- name: Harden the runner
uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
with:
egress-policy: audit
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # stable (2026-03-27)
with:
toolchain: stable
- name: Install Intel SDE
id: sde
uses: ./.github/actions/setup-intel-sde
with:
version: ${{ env.SDE_VERSION }}
sha256: ${{ env.SDE_SHA256 }}
allow-unavailable: "false"
- name: Sanity-check AVX-512 detection under SDE
env:
SDE_PATH: ${{ steps.sde.outputs.sde-path }}
run: |
set -euo pipefail
mkdir -p "${RUNNER_TEMP}/sde-probe/src"
cat > "${RUNNER_TEMP}/sde-probe/Cargo.toml" <<'EOF'
[package]
name = "sde-probe"
version = "0.0.0"
edition = "2021"
[[bin]]
name = "sde-probe"
path = "src/main.rs"
EOF
cat > "${RUNNER_TEMP}/sde-probe/src/main.rs" <<'EOF'
fn main() {
let f = is_x86_feature_detected!("avx512f");
let p = is_x86_feature_detected!("avx512vpopcntdq");
println!("avx512f={f} avx512vpopcntdq={p}");
assert!(f, "SDE did not expose avx512f to the guest");
assert!(p, "SDE did not expose avx512vpopcntdq to the guest");
}
EOF
cargo build --release --manifest-path "${RUNNER_TEMP}/sde-probe/Cargo.toml"
"${SDE_PATH}" -spr -- \
"${RUNNER_TEMP}/sde-probe/target/release/sde-probe"
- name: cargo test under SDE (AVX-512 kernels)
env:
CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER: ${{ steps.sde.outputs.sde-path }} -spr --
ORDVEC_REQUIRE_AVX512: "1"
run: |
set -euo pipefail
cargo test
cargo test --features experimental

notes:
name: release notes (git-cliff) + draft Release
needs: guard
Expand Down Expand Up @@ -962,7 +1030,7 @@ jobs:

release-assets-draft:
name: stage core/Python assets on the DRAFT Release (does NOT un-draft)
needs: [guard, notes, attest, provenance, pypi-canonical-dist, require-ci-green, smoke-linux-aarch64-wheel]
needs: [guard, notes, attest, provenance, pypi-canonical-dist, require-ci-green, release-avx512, smoke-linux-aarch64-wheel]
if: needs.guard.outputs.ok == 'true'
runs-on: ubuntu-latest
permissions:
Expand Down
11 changes: 6 additions & 5 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- **Made Intel SDE AVX-512 coverage fail closed for release gating.** Pull
requests may emit a visible warning and skip SDE-dependent steps during an
Intel mirror outage, but the push/workflow-dispatch runs used by the release
gate still fail closed; setup must succeed, the AVX-512 CPUID probe must run,
and the SDE-backed test/coverage commands must execute before release.
- **Made Intel SDE AVX-512 coverage fail closed for release publishes.** Pull
requests and main pushes may emit a visible warning and skip SDE-dependent
steps during an Intel mirror outage, but the tag-triggered release workflow
reruns a fail-closed SDE proof before staging release assets; setup must
succeed, the AVX-512 CPUID probe must run, and SDE-backed tests must execute
before publish.
- **Closed manifest verifier path-reopen drift.** Verification and SQLite
cache-key construction now hash, probe, and validate the canonical path that
was checked and recorded, rather than reopening the pre-canonical joined path.
Expand Down
15 changes: 9 additions & 6 deletions RELEASING.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,15 @@ the OIDC exchange (no risk of a bad publish; just a failed run).
`main` HEAD's SHA — which needs a **completed, successful** (not
`cancelled`, not in-progress) run of `ci.yml`, `python.yml`, `fuzz.yml`,
`codeql.yml`, `actionlint.yml`, and `zizmor.yml`.
- The `ci.yml` AVX-512 job is release-blocking and installs Intel SDE. A
downloadmirror `403` / outage is external infrastructure, but it still means
the SHA is **not releasable** until that same SHA has a successful `ci.yml`
run on `main`. The setup action restores a SHA-verified archive cache when
available; if the cache misses and Intel's download path is unavailable,
wait, rerun, or land a reviewed SDE pin/cache update before tagging.
- Routine `ci.yml` / `coverage.yml` runs may warn and skip SDE-dependent
steps when Intel's downloadmirror challenges GitHub-hosted runners. That
keeps external mirror outages from holding `main` red, but it does **not**
make a release shippable by itself: `release.yml` has a fail-closed
`release-avx512` job that installs Intel SDE, runs the AVX-512 CPUID
probe, and runs the AVX-512 test lane before assets can be staged.
This release proof deliberately avoids writable workflow caches in the
tag workflow; if Intel's download path is unavailable, wait, rerun, or land
a reviewed SDE pin/update before tagging.
- Before the final tag, spot-check `.github/actions/setup-intel-sde/action.yml`
against Intel's SDE download page: version, Linux archive name, and SHA-256
must match the currently accepted pin.
Expand Down
150 changes: 106 additions & 44 deletions tests/release_publish_invariants.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,10 @@
SDE_ACTION_PATH = os.environ.get(
"SDE_ACTION_PATH", ".github/actions/setup-intel-sde/action.yml"
)
PR_ONLY_SDE_ALLOW_UNAVAILABLE = "${{ github.event_name == 'pull_request' }}"
ROUTINE_CI_SDE_ALLOW_UNAVAILABLE = "${{ github.event_name != 'workflow_dispatch' }}"
RELEASE_SDE_ALLOW_UNAVAILABLE = "false"
SDE_AVAILABLE_IF = "${{ steps.sde.outputs.sde-available == 'true' }}"
PR_SDE_UNAVAILABLE_IF = (
"${{ github.event_name == 'pull_request' && steps.sde.outputs.sde-available != 'true' }}"
)
SDE_UNAVAILABLE_NOTICE_IF = "${{ steps.sde.outputs.sde-available != 'true' }}"
PYPI_CANONICAL_EXPECTED_ARGS = (
"--expected-wheels 4",
"--expected-sdists 1",
Expand Down Expand Up @@ -1764,7 +1763,16 @@ def check_sde_setup_action(path: str) -> None:
fail(f"{path}: Intel SDE outage softening must include {fragment!r}")


def check_sde_cache_job(workflow: dict[str, Any], path: str, job_name: str) -> None:
def check_sde_cache_job(
workflow: dict[str, Any],
path: str,
job_name: str,
*,
expected_allow_unavailable: str,
expected_notice_if: str | None,
require_cache: bool,
require_guarded_sde_steps: bool,
) -> None:
jobs = mapping(workflow.get("jobs"), f"{path}: jobs")
job = mapping(jobs.get(job_name), f"{path}: jobs.{job_name}")
job_env = mapping(job.get("env"), f"{path}: jobs.{job_name}.env")
Expand All @@ -1787,28 +1795,39 @@ def check_sde_cache_job(workflow: dict[str, Any], path: str, job_name: str) -> N
with_map = mapping(step.get("with", {}), f"{path}: {step_label(index, step)} with")
setup_steps.append((index, step, with_map))

if len(cache_steps) != 1:
fail(f"{path}: jobs.{job_name} must restore exactly one Intel SDE archive cache")
_, _, cache_with = cache_steps[0]
key = cache_with.get("key")
expected_key = (
"intel-sde-${{ runner.os }}-${{ runner.arch }}-"
"${{ env.SDE_VERSION }}-${{ env.SDE_SHA256 }}"
)
if key != expected_key:
fail(
f"{path}: jobs.{job_name} Intel SDE cache key must be version+sha pinned, "
"not action-file-hash based"
if require_cache:
if len(cache_steps) != 1:
fail(f"{path}: jobs.{job_name} must restore exactly one Intel SDE archive cache")
_, _, cache_with = cache_steps[0]
key = cache_with.get("key")
expected_key = (
"intel-sde-${{ runner.os }}-${{ runner.arch }}-"
"${{ env.SDE_VERSION }}-${{ env.SDE_SHA256 }}"
)
restore_keys = str(cache_with.get("restore-keys") or "")
expected_restore_key = "intel-sde-${{ runner.os }}-${{ runner.arch }}-"
if expected_restore_key not in {line.strip() for line in restore_keys.splitlines()}:
fail(
f"{path}: jobs.{job_name} Intel SDE cache restore-keys must include "
"the runner OS/arch prefix"
)
if contains_text(key, "hashFiles") or contains_text(key, "setup-intel-sde/action.yml"):
fail(f"{path}: jobs.{job_name} Intel SDE cache key must not hash the action file")
if key != expected_key:
fail(
f"{path}: jobs.{job_name} Intel SDE cache key must be version+sha pinned, "
"not action-file-hash based"
)
restore_keys = str(cache_with.get("restore-keys") or "")
expected_restore_key = "intel-sde-${{ runner.os }}-${{ runner.arch }}-"
if expected_restore_key not in {line.strip() for line in restore_keys.splitlines()}:
fail(
f"{path}: jobs.{job_name} Intel SDE cache restore-keys must include "
"the runner OS/arch prefix"
)
if contains_text(key, "hashFiles") or contains_text(key, "setup-intel-sde/action.yml"):
fail(f"{path}: jobs.{job_name} Intel SDE cache key must not hash the action file")
else:
if cache_steps:
fail(f"{path}: jobs.{job_name} must not restore workflow caches in release context")
for index, step in enumerate(steps):
action = action_name(step)
if action in {"actions/cache", "swatinem/rust-cache"}:
fail(
f"{path}: {step_label(index, step)} must not use workflow caches "
"in the release fail-closed SDE proof"
)

if len(setup_steps) != 1:
fail(f"{path}: jobs.{job_name} must use exactly one setup-intel-sde action")
Expand All @@ -1817,24 +1836,30 @@ def check_sde_cache_job(workflow: dict[str, Any], path: str, job_name: str) -> N
fail(f"{path}: jobs.{job_name} setup-intel-sde must receive env.SDE_VERSION")
if setup_with.get("sha256") != "${{ env.SDE_SHA256 }}":
fail(f"{path}: jobs.{job_name} setup-intel-sde must receive env.SDE_SHA256")
if setup_with.get("allow-unavailable") != PR_ONLY_SDE_ALLOW_UNAVAILABLE:
if setup_with.get("allow-unavailable") != expected_allow_unavailable:
fail(
f"{path}: jobs.{job_name} may soften Intel SDE outages only on pull_request; "
"push and workflow_dispatch runs must fail closed"
f"{path}: jobs.{job_name} setup-intel-sde allow-unavailable must be "
f"{expected_allow_unavailable!r}"
)

outage_notice_steps = []
for index, raw_step in enumerate(steps):
step = mapping(raw_step, f"{path}: jobs.{job_name}.steps[{index}]")
if step.get("if") == PR_SDE_UNAVAILABLE_IF and contains_text(
step.get("run"), "Intel SDE archive unavailable"
):
outage_notice_steps.append(step)
if len(outage_notice_steps) != 1:
fail(
f"{path}: jobs.{job_name} must emit exactly one PR-only Intel SDE outage notice; "
"release-gated runs must not green-skip AVX-512 coverage"
outage_notice_steps = [
mapping(raw_step, f"{path}: jobs.{job_name}.steps[{index}]")
for index, raw_step in enumerate(steps)
if contains_text(
mapping(raw_step, f"{path}: jobs.{job_name}.steps[{index}]").get("run"),
"Intel SDE archive unavailable",
)
]
if expected_notice_if is None:
if outage_notice_steps:
fail(f"{path}: jobs.{job_name} must not contain a soft-skip Intel SDE outage notice")
else:
matching_notices = [step for step in outage_notice_steps if step.get("if") == expected_notice_if]
if len(matching_notices) != 1:
fail(
f"{path}: jobs.{job_name} must emit exactly one Intel SDE outage notice "
f"guarded by {expected_notice_if!r}"
)

sde_guarded_names = {
"Install cargo-llvm-cov (pinned)",
Expand All @@ -1852,17 +1877,54 @@ def check_sde_cache_job(workflow: dict[str, Any], path: str, job_name: str) -> N
or contains_nested_text(step.get("env"), "steps.sde.outputs.sde-path")
or contains_text(step.get("run"), "SDE_PATH")
):
if step.get("if") != SDE_AVAILABLE_IF:
if require_guarded_sde_steps and step.get("if") != SDE_AVAILABLE_IF:
fail(
f"{path}: {step_label(index, step)} must run after SDE setup succeeds, "
"and may be skipped only when PR-only SDE setup reports unavailable"
"and may be skipped only when SDE setup reports unavailable"
)
if not require_guarded_sde_steps and step.get("if") is not None:
fail(
f"{path}: {step_label(index, step)} is in a release fail-closed SDE proof "
"and must not be guarded behind a green-skip condition"
)


def check_sde_cache_invariants() -> None:
check_sde_setup_action(SDE_ACTION_PATH)
check_sde_cache_job(load_workflow(CI_WORKFLOW_PATH), CI_WORKFLOW_PATH, "avx512")
check_sde_cache_job(load_workflow(COVERAGE_WORKFLOW_PATH), COVERAGE_WORKFLOW_PATH, "coverage")
check_sde_cache_job(
load_workflow(CI_WORKFLOW_PATH),
CI_WORKFLOW_PATH,
"avx512",
expected_allow_unavailable=ROUTINE_CI_SDE_ALLOW_UNAVAILABLE,
expected_notice_if=SDE_UNAVAILABLE_NOTICE_IF,
require_cache=True,
require_guarded_sde_steps=True,
)
check_sde_cache_job(
load_workflow(COVERAGE_WORKFLOW_PATH),
COVERAGE_WORKFLOW_PATH,
"coverage",
expected_allow_unavailable=ROUTINE_CI_SDE_ALLOW_UNAVAILABLE,
expected_notice_if=SDE_UNAVAILABLE_NOTICE_IF,
require_cache=True,
require_guarded_sde_steps=True,
)
release_workflow = load_workflow(WORKFLOW_PATH)
check_sde_cache_job(
release_workflow,
WORKFLOW_PATH,
"release-avx512",
expected_allow_unavailable=RELEASE_SDE_ALLOW_UNAVAILABLE,
expected_notice_if=None,
require_cache=False,
require_guarded_sde_steps=False,
)
jobs = mapping(release_workflow.get("jobs"), f"{WORKFLOW_PATH}: jobs")
draft_job = mapping(
jobs.get("release-assets-draft"), f"{WORKFLOW_PATH}: jobs.release-assets-draft"
)
if not has_need(draft_job, "release-avx512"):
fail(f"{WORKFLOW_PATH}: release-assets-draft must need release-avx512")


def main() -> None:
Expand Down
4 changes: 2 additions & 2 deletions tests/release_signed_release_invariants.sh
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ job_downloads_artifact_to_path() {

# ----------------------------------------------------------------------
# (1) release-assets-draft needs attest + provenance + require-ci-green + notes
# + exact linux/aarch64 wheel smoke
# + fail-closed release AVX-512 proof + exact linux/aarch64 wheel smoke
# ----------------------------------------------------------------------
for dep in attest provenance pypi-canonical-dist require-ci-green notes smoke-linux-aarch64-wheel; do
for dep in attest provenance pypi-canonical-dist require-ci-green release-avx512 notes smoke-linux-aarch64-wheel; do
job_needs release-assets-draft "$dep" \
|| fail "release-assets-draft must \`needs: $dep\` (fail-closed on missing provenance/CI)"
done
Expand Down
Loading