diff --git a/.github/actions/setup-lean/action.yml b/.github/actions/setup-lean/action.yml
index 726d8204..0ae39eee 100644
--- a/.github/actions/setup-lean/action.yml
+++ b/.github/actions/setup-lean/action.yml
@@ -30,12 +30,13 @@ runs:
         LEAN_TOOLCHAIN_HASH: ${{ hashFiles('lean-toolchain') }}
         LAKEFILE_HASH: ${{ hashFiles('lakefile.lean') }}
         LAKE_MANIFEST_HASH: ${{ hashFiles('lake-manifest.json') }}
+        CACHE_SALT_HASH: ${{ hashFiles('.github/cache-salt') }}
       run: |
-        elan_key="elan-benchmark-${CACHE_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}"
-        packages_key="lake-packages-benchmark-${CACHE_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}"
-        packages_main_key="lake-packages-benchmark-${MAIN_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}"
-        build_key="lake-build-benchmark-${CACHE_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}"
-        build_main_key="lake-build-benchmark-${MAIN_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}"
+        elan_key="elan-benchmark-${CACHE_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${CACHE_SALT_HASH}"
+        packages_key="lake-packages-benchmark-${CACHE_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}-${CACHE_SALT_HASH}"
+        packages_main_key="lake-packages-benchmark-${MAIN_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}-${CACHE_SALT_HASH}"
+        build_key="lake-build-benchmark-${CACHE_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}-${CACHE_SALT_HASH}"
+        build_main_key="lake-build-benchmark-${MAIN_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}-${CACHE_SALT_HASH}"
         {
           echo "use_sticky=${USE_STICKY}"
           echo "use_build_sticky=${USE_BUILD_STICKY}"
diff --git a/.github/cache-salt b/.github/cache-salt
new file mode 100644
index 00000000..d00491fd
--- /dev/null
+++ b/.github/cache-salt
@@ -0,0 +1 @@
+1
diff --git a/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/DepositSetsPoolBalance.lean b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/DepositSetsPoolBalance.lean
new file mode 100644
index 00000000..1ffd50b5
--- /dev/null
+++ b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/DepositSetsPoolBalance.lean
@@ -0,0 +1,22 @@
+import Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.DamnVulnerableDeFi.SideEntrance
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `deposit` stores `oldPoolBalance + amount` in `poolBalance`.
+-/
+theorem deposit_sets_pool_balance
+    (amount : Uint256) (s : ContractState) :
+    let s' := ((SideEntrance.deposit amount).run s).snd
+    deposit_sets_pool_balance_spec amount s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold deposit_sets_pool_balance_spec
+  grind [SideEntrance.deposit, SideEntrance.poolBalance, SideEntrance.totalCredits, SideEntrance.creditOf]
+
+end Benchmark.Cases.DamnVulnerableDeFi.SideEntrance
diff --git a/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/DepositSetsSenderCredit.lean b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/DepositSetsSenderCredit.lean
new file mode 100644
index 00000000..6ed16810
--- /dev/null
+++ b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/DepositSetsSenderCredit.lean
@@ -0,0 +1,22 @@
+import Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.DamnVulnerableDeFi.SideEntrance
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `deposit` increases the caller's credited balance by `amount`.
+-/
+theorem deposit_sets_sender_credit
+    (amount : Uint256) (s : ContractState) :
+    let s' := ((SideEntrance.deposit amount).run s).snd
+    deposit_sets_sender_credit_spec amount s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold deposit_sets_sender_credit_spec
+  grind [SideEntrance.deposit, SideEntrance.poolBalance, SideEntrance.totalCredits, SideEntrance.creditOf]
+
+end Benchmark.Cases.DamnVulnerableDeFi.SideEntrance
diff --git a/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/ExploitTraceDrainsPool.lean b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/ExploitTraceDrainsPool.lean
new file mode 100644
index 00000000..0e1c33ce
--- /dev/null
+++ b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/ExploitTraceDrainsPool.lean
@@ -0,0 +1,27 @@
+import Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.DamnVulnerableDeFi.SideEntrance
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+If the caller starts with zero credited balance, then borrowing `amount`,
+repaying through `deposit`, and withdrawing immediately reduces pool ETH by
+exactly `amount`.
+-/
+theorem exploit_trace_drains_pool
+    (amount : Uint256) (s : ContractState)
+    (hBorrow : amount <= s.storage 0)
+    (hFresh : s.storageMap 2 s.sender = 0) :
+    let s' := ((SideEntrance.flashLoanViaDeposit amount).run s).snd
+    let s'' := ((SideEntrance.withdraw).run s').snd
+    exploit_trace_drains_pool_spec amount s s'' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold exploit_trace_drains_pool_spec
+  grind [SideEntrance.flashLoanViaDeposit, SideEntrance.withdraw, SideEntrance.poolBalance, SideEntrance.totalCredits, SideEntrance.creditOf]
+
+end Benchmark.Cases.DamnVulnerableDeFi.SideEntrance
diff --git a/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/FlashLoanViaDepositPreservesPoolBalance.lean b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/FlashLoanViaDepositPreservesPoolBalance.lean
new file mode 100644
index 00000000..7a8de9e5
--- /dev/null
+++ b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/FlashLoanViaDepositPreservesPoolBalance.lean
@@ -0,0 +1,24 @@
+import Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.DamnVulnerableDeFi.SideEntrance
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing the summarized flash-loan-plus-deposit path leaves tracked pool ETH
+unchanged.
+-/
+theorem flashLoanViaDeposit_preserves_pool_balance
+    (amount : Uint256) (s : ContractState)
+    (hBorrow : amount <= s.storage 0) :
+    let s' := ((SideEntrance.flashLoanViaDeposit amount).run s).snd
+    flashLoanViaDeposit_preserves_pool_balance_spec amount s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold flashLoanViaDeposit_preserves_pool_balance_spec
+  grind [SideEntrance.flashLoanViaDeposit, SideEntrance.poolBalance, SideEntrance.totalCredits, SideEntrance.creditOf]
+
+end Benchmark.Cases.DamnVulnerableDeFi.SideEntrance
diff --git a/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/FlashLoanViaDepositSetsSenderCredit.lean b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/FlashLoanViaDepositSetsSenderCredit.lean
new file mode 100644
index 00000000..3024ac89
--- /dev/null
+++ b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/FlashLoanViaDepositSetsSenderCredit.lean
@@ -0,0 +1,24 @@
+import Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.DamnVulnerableDeFi.SideEntrance
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing the summarized flash-loan-plus-deposit path mints caller credit
+equal to the borrowed amount.
+-/
+theorem flashLoanViaDeposit_sets_sender_credit
+    (amount : Uint256) (s : ContractState)
+    (hBorrow : amount <= s.storage 0) :
+    let s' := ((SideEntrance.flashLoanViaDeposit amount).run s).snd
+    flashLoanViaDeposit_sets_sender_credit_spec amount s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold flashLoanViaDeposit_sets_sender_credit_spec
+  grind [SideEntrance.flashLoanViaDeposit, SideEntrance.poolBalance, SideEntrance.totalCredits, SideEntrance.creditOf]
+
+end Benchmark.Cases.DamnVulnerableDeFi.SideEntrance
diff --git a/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/ChainStartThreshold.lean b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/ChainStartThreshold.lean
new file mode 100644
index 00000000..cfbe50b9
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/ChainStartThreshold.lean
@@ -0,0 +1,26 @@
+import Benchmark.Cases.Ethereum.DepositContractMinimal.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Ethereum.DepositContractMinimal
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing a threshold-crossing full deposit sets `chainStarted`.
+-/
+theorem full_deposit_starts_chain_at_threshold
+    (depositAmount : Uint256) (s : ContractState)
+    (hCount : s.storage 0 < 4294967295)
+    (hMin : depositAmount >= 1000000000)
+    (hFull : depositAmount >= 32000000000)
+    (hThreshold : add (s.storage 1) 1 = 65536) :
+    let s' := ((DepositContractMinimal.deposit depositAmount).run s).snd
+    deposit_starts_chain_at_threshold_spec depositAmount s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold deposit_starts_chain_at_threshold_spec
+  grind [DepositContractMinimal.deposit, DepositContractMinimal.depositCount, DepositContractMinimal.fullDepositCount, DepositContractMinimal.chainStarted]
+
+end Benchmark.Cases.Ethereum.DepositContractMinimal
diff --git a/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/DepositCount.lean b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/DepositCount.lean
new file mode 100644
index 00000000..e4cf08ba
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/DepositCount.lean
@@ -0,0 +1,25 @@
+import Benchmark.Cases.Ethereum.DepositContractMinimal.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Ethereum.DepositContractMinimal
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `deposit` on the successful path increments the total deposit counter
+by exactly one.
+-/
+theorem deposit_increments_deposit_count
+    (depositAmount : Uint256) (s : ContractState)
+    (hCount : s.storage 0 < 4294967295)
+    (hMin : depositAmount >= 1000000000) :
+    let s' := ((DepositContractMinimal.deposit depositAmount).run s).snd
+    deposit_increments_deposit_count_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold deposit_increments_deposit_count_spec
+  grind [DepositContractMinimal.deposit, DepositContractMinimal.depositCount, DepositContractMinimal.fullDepositCount, DepositContractMinimal.chainStarted]
+
+end Benchmark.Cases.Ethereum.DepositContractMinimal
diff --git a/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/FullDepositIncrementsFullCount.lean b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/FullDepositIncrementsFullCount.lean
new file mode 100644
index 00000000..b3f8587c
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/FullDepositIncrementsFullCount.lean
@@ -0,0 +1,26 @@
+import Benchmark.Cases.Ethereum.DepositContractMinimal.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Ethereum.DepositContractMinimal
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `deposit` at or above the full threshold increments
+`fullDepositCount` by one.
+-/
+theorem full_deposit_increments_full_count
+    (depositAmount : Uint256) (s : ContractState)
+    (hCount : s.storage 0 < 4294967295)
+    (hMin : depositAmount >= 1000000000)
+    (hFull : depositAmount >= 32000000000) :
+    let s' := ((DepositContractMinimal.deposit depositAmount).run s).snd
+    deposit_increments_full_count_for_full_deposit_spec depositAmount s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold deposit_increments_full_count_for_full_deposit_spec
+  grind [DepositContractMinimal.deposit, DepositContractMinimal.depositCount, DepositContractMinimal.fullDepositCount, DepositContractMinimal.chainStarted]
+
+end Benchmark.Cases.Ethereum.DepositContractMinimal
diff --git a/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/FullDepositPreservesPartialGap.lean b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/FullDepositPreservesPartialGap.lean
new file mode 100644
index 00000000..368c8623
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/FullDepositPreservesPartialGap.lean
@@ -0,0 +1,25 @@
+import Benchmark.Cases.Ethereum.DepositContractMinimal.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Ethereum.DepositContractMinimal
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing a full deposit increments both counters in lockstep, so the gap
+between all deposits and full deposits is preserved.
+-/
+theorem full_deposit_preserves_partial_gap
+    (depositAmount : Uint256) (s : ContractState)
+    (hCount : s.storage 0 < 4294967295)
+    (hMin : depositAmount >= 1000000000)
+    (hFull : depositAmount >= 32000000000) :
+    let s' := ((DepositContractMinimal.deposit depositAmount).run s).snd
+    s'.storage 0 - s'.storage 1 = s.storage 0 - s.storage 1 := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [DepositContractMinimal.deposit, DepositContractMinimal.depositCount, DepositContractMinimal.fullDepositCount, DepositContractMinimal.chainStarted]
+
+end Benchmark.Cases.Ethereum.DepositContractMinimal
diff --git a/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/SmallDepositPreservesFullCount.lean b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/SmallDepositPreservesFullCount.lean
new file mode 100644
index 00000000..be5da501
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/SmallDepositPreservesFullCount.lean
@@ -0,0 +1,26 @@
+import Benchmark.Cases.Ethereum.DepositContractMinimal.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Ethereum.DepositContractMinimal
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `deposit` below the full threshold leaves `fullDepositCount`
+unchanged.
+-/
+theorem small_deposit_preserves_full_count
+    (depositAmount : Uint256) (s : ContractState)
+    (hCount : s.storage 0 < 4294967295)
+    (hMin : depositAmount >= 1000000000)
+    (hSmall : depositAmount < 32000000000) :
+    let s' := ((DepositContractMinimal.deposit depositAmount).run s).snd
+    deposit_preserves_full_count_for_small_deposit_spec depositAmount s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold deposit_preserves_full_count_for_small_deposit_spec
+  grind [DepositContractMinimal.deposit, DepositContractMinimal.depositCount, DepositContractMinimal.fullDepositCount, DepositContractMinimal.chainStarted]
+
+end Benchmark.Cases.Ethereum.DepositContractMinimal
diff --git a/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/DrawIntervalMatchesWeights.lean b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/DrawIntervalMatchesWeights.lean
new file mode 100644
index 00000000..e092fc3d
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/DrawIntervalMatchesWeights.lean
@@ -0,0 +1,25 @@
+import Benchmark.Cases.Kleros.SortitionTrees.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Kleros.SortitionTrees
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `draw` follows the encoded ticket intervals used by the
+implementation.
+-/
+theorem draw_interval_matches_weights
+    (ticket : Uint256) (s : ContractState)
+    (hRoot : s.storage 0 != 0)
+    (hInRange : ticket < s.storage 0) :
+    let s' := ((SortitionTrees.draw ticket).run s).snd
+    draw_interval_matches_weights_spec ticket s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold draw_interval_matches_weights_spec
+  grind [SortitionTrees.draw, SortitionTrees.rootSum, SortitionTrees.leftSum, SortitionTrees.rightSum, SortitionTrees.leaf0, SortitionTrees.leaf1, SortitionTrees.leaf2, SortitionTrees.leaf3, SortitionTrees.nodeIndexesToIDs, SortitionTrees.IDsToNodeIndexes, SortitionTrees.selectedNode]
+
+end Benchmark.Cases.Kleros.SortitionTrees
diff --git a/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/DrawSelectsValidLeaf.lean b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/DrawSelectsValidLeaf.lean
new file mode 100644
index 00000000..1365bd55
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/DrawSelectsValidLeaf.lean
@@ -0,0 +1,24 @@
+import Benchmark.Cases.Kleros.SortitionTrees.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Kleros.SortitionTrees
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Any successful `draw` resolves to one of the four leaf node indices.
+-/
+theorem draw_selects_valid_leaf
+    (ticket : Uint256) (s : ContractState)
+    (hRoot : s.storage 0 != 0)
+    (hInRange : ticket < s.storage 0) :
+    let s' := ((SortitionTrees.draw ticket).run s).snd
+    draw_selects_valid_leaf_spec s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold draw_selects_valid_leaf_spec
+  grind [SortitionTrees.draw, SortitionTrees.rootSum, SortitionTrees.leftSum, SortitionTrees.rightSum, SortitionTrees.leaf0, SortitionTrees.leaf1, SortitionTrees.leaf2, SortitionTrees.leaf3, SortitionTrees.nodeIndexesToIDs, SortitionTrees.IDsToNodeIndexes, SortitionTrees.selectedNode]
+
+end Benchmark.Cases.Kleros.SortitionTrees
diff --git a/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/NodeIdBijection.lean b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/NodeIdBijection.lean
new file mode 100644
index 00000000..f0ea91ed
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/NodeIdBijection.lean
@@ -0,0 +1,25 @@
+import Benchmark.Cases.Kleros.SortitionTrees.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Kleros.SortitionTrees
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `setLeaf` writes matching forward and reverse mapping entries for the
+updated node and stake-path id.
+-/
+theorem node_id_bijection
+    (nodeIndex stakePathID weight : Uint256) (s : ContractState)
+    (hLow : nodeIndex >= 3)
+    (hHigh : nodeIndex <= 6) :
+    let s' := ((SortitionTrees.setLeaf nodeIndex stakePathID weight).run s).snd
+    node_id_bijection_spec nodeIndex stakePathID s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold node_id_bijection_spec
+  grind [SortitionTrees.setLeaf, SortitionTrees.rootSum, SortitionTrees.leftSum, SortitionTrees.rightSum, SortitionTrees.leaf0, SortitionTrees.leaf1, SortitionTrees.leaf2, SortitionTrees.leaf3, SortitionTrees.nodeIndexesToIDs, SortitionTrees.IDsToNodeIndexes, SortitionTrees.selectedNode]
+
+end Benchmark.Cases.Kleros.SortitionTrees
diff --git a/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/ParentEqualsSumOfChildren.lean b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/ParentEqualsSumOfChildren.lean
new file mode 100644
index 00000000..def9850c
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/ParentEqualsSumOfChildren.lean
@@ -0,0 +1,24 @@
+import Benchmark.Cases.Kleros.SortitionTrees.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Kleros.SortitionTrees
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `setLeaf` recomputes each parent node from its direct children.
+-/
+theorem parent_equals_sum_of_children
+    (nodeIndex stakePathID weight : Uint256) (s : ContractState)
+    (hLow : nodeIndex >= 3)
+    (hHigh : nodeIndex <= 6) :
+    let s' := ((SortitionTrees.setLeaf nodeIndex stakePathID weight).run s).snd
+    parent_equals_sum_of_children_spec s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold parent_equals_sum_of_children_spec
+  grind [SortitionTrees.setLeaf, SortitionTrees.rootSum, SortitionTrees.leftSum, SortitionTrees.rightSum, SortitionTrees.leaf0, SortitionTrees.leaf1, SortitionTrees.leaf2, SortitionTrees.leaf3, SortitionTrees.nodeIndexesToIDs, SortitionTrees.IDsToNodeIndexes, SortitionTrees.selectedNode]
+
+end Benchmark.Cases.Kleros.SortitionTrees
diff --git a/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/RootEqualsSumOfLeaves.lean b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/RootEqualsSumOfLeaves.lean
new file mode 100644
index 00000000..1b6ce94d
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/RootEqualsSumOfLeaves.lean
@@ -0,0 +1,24 @@
+import Benchmark.Cases.Kleros.SortitionTrees.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Kleros.SortitionTrees
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `setLeaf` recomputes the root as the sum of the four leaf weights.
+-/
+theorem root_equals_sum_of_leaves
+    (nodeIndex stakePathID weight : Uint256) (s : ContractState)
+    (hLow : nodeIndex >= 3)
+    (hHigh : nodeIndex <= 6) :
+    let s' := ((SortitionTrees.setLeaf nodeIndex stakePathID weight).run s).snd
+    root_equals_sum_of_leaves_spec s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold root_equals_sum_of_leaves_spec
+  grind [SortitionTrees.setLeaf, SortitionTrees.rootSum, SortitionTrees.leftSum, SortitionTrees.rightSum, SortitionTrees.leaf0, SortitionTrees.leaf1, SortitionTrees.leaf2, SortitionTrees.leaf3, SortitionTrees.nodeIndexesToIDs, SortitionTrees.IDsToNodeIndexes, SortitionTrees.selectedNode]
+
+end Benchmark.Cases.Kleros.SortitionTrees
diff --git a/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/RootMinusLeftEqualsRightSubtree.lean b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/RootMinusLeftEqualsRightSubtree.lean
new file mode 100644
index 00000000..c6b679ab
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/RootMinusLeftEqualsRightSubtree.lean
@@ -0,0 +1,25 @@
+import Benchmark.Cases.Kleros.SortitionTrees.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Kleros.SortitionTrees
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `setLeaf` keeps the root partitioned into left and right subtree
+weights.
+-/
+theorem root_minus_left_equals_right_subtree
+    (nodeIndex stakePathID weight : Uint256) (s : ContractState)
+    (hLow : nodeIndex >= 3)
+    (hHigh : nodeIndex <= 6) :
+    let s' := ((SortitionTrees.setLeaf nodeIndex stakePathID weight).run s).snd
+    root_minus_left_equals_right_subtree_spec s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold root_minus_left_equals_right_subtree_spec
+  grind [SortitionTrees.setLeaf, SortitionTrees.rootSum, SortitionTrees.leftSum, SortitionTrees.rightSum, SortitionTrees.leaf0, SortitionTrees.leaf1, SortitionTrees.leaf2, SortitionTrees.leaf3, SortitionTrees.nodeIndexesToIDs, SortitionTrees.IDsToNodeIndexes, SortitionTrees.selectedNode]
+
+end Benchmark.Cases.Kleros.SortitionTrees
diff --git a/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/CeildivSandwich.lean b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/CeildivSandwich.lean
new file mode 100644
index 00000000..c1036363
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/CeildivSandwich.lean
@@ -0,0 +1,25 @@
+import Benchmark.Cases.Lido.VaulthubLocked.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Lido.VaulthubLocked
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Supporting arithmetic lemma: ceil(x/d) * d >= x for positive d.
+This is a key bound used in the F-01 solvency proof to connect the
+ceiling division in the reserve computation back to the original amount.
+-/
+theorem ceildiv_sandwich
+    (x d : Uint256)
+    (hd : d > 0)
+    (hNoOverflow : (ceilDiv x d).val * d.val < modulus) :
+    ceildiv_sandwich_spec x d := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold ceildiv_sandwich_spec
+  grind
+
+end Benchmark.Cases.Lido.VaulthubLocked
diff --git a/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/LockedFundsSolvency.lean b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/LockedFundsSolvency.lean
new file mode 100644
index 00000000..b60c8c5b
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/LockedFundsSolvency.lean
@@ -0,0 +1,55 @@
+import Benchmark.Cases.Lido.VaulthubLocked.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Lido.VaulthubLocked
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Certora F-01: Locked funds solvency.
+After executing `syncLocked`, the stored locked amount (slot 6) multiplied by
+the reserve ratio complement is at least the liability (from liabilityShares
+in slot 1) multiplied by total basis points:
+
+  s'.storage 6 * (BP - RR) >= getPooledEthBySharesRoundUp(LS, TPE, TS) * BP
+
+The proof requires a case split on whether the computed reserve or the minimal
+reserve dominates, then algebraic manipulation using the ceilDiv sandwich bound
+and share conversion monotonicity.
+-/
+theorem locked_funds_solvency
+    (s : ContractState)
+    -- Axioms
+    (hMaxLS : s.storage 0 ≥ s.storage 1)
+    (hRR_pos : s.storage 3 > 0)
+    (hRR_lt : s.storage 3 < TOTAL_BASIS_POINTS)
+    (hTS : s.storage 5 > 0)
+    (hTPE : s.storage 4 > 0)
+    -- No overflow: maxLiabilityShares * totalPooledEther fits in Uint256
+    (hNoOverflow1 : (s.storage 0).val * (s.storage 4).val < modulus)
+    -- No overflow: liability * reserveRatioBP fits in Uint256
+    (hNoOverflow2 : (getPooledEthBySharesRoundUp (s.storage 0) (s.storage 4) (s.storage 5)).val
+                    * (s.storage 3).val < modulus)
+    -- No overflow: the add inside locked (liability + effectiveReserve) fits in Uint256
+    (hNoOverflow3 : let liab := getPooledEthBySharesRoundUp (s.storage 0) (s.storage 4) (s.storage 5)
+                    let reserve := ceilDiv (mul liab (s.storage 3)) (sub TOTAL_BASIS_POINTS (s.storage 3))
+                    let eff := if reserve ≥ s.storage 2 then reserve else s.storage 2
+                    liab.val + eff.val < modulus)
+    -- No overflow: locked * (BP - RR) fits in Uint256
+    (hNoOverflow4 : let liab := getPooledEthBySharesRoundUp (s.storage 0) (s.storage 4) (s.storage 5)
+                    let reserve := ceilDiv (mul liab (s.storage 3)) (sub TOTAL_BASIS_POINTS (s.storage 3))
+                    let eff := if reserve ≥ s.storage 2 then reserve else s.storage 2
+                    (add liab eff).val * (sub TOTAL_BASIS_POINTS (s.storage 3)).val < modulus)
+    -- No overflow: liability * BP fits in Uint256
+    (hNoOverflow5 : (getPooledEthBySharesRoundUp (s.storage 1) (s.storage 4) (s.storage 5)).val
+                    * TOTAL_BASIS_POINTS.val < modulus) :
+    let s' := ((VaultHubLocked.syncLocked).run s).snd
+    locked_funds_solvency_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold locked_funds_solvency_spec
+  grind [VaultHubLocked.syncLocked, VaultHubLocked.maxLiabilityShares, VaultHubLocked.liabilityShares, VaultHubLocked.minimalReserve, VaultHubLocked.reserveRatioBP, VaultHubLocked.totalPooledEther, VaultHubLocked.totalShares, VaultHubLocked.lockedAmount]
+
+end Benchmark.Cases.Lido.VaulthubLocked
diff --git a/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/MaxLiabilitySharesBound.lean b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/MaxLiabilitySharesBound.lean
new file mode 100644
index 00000000..e89d4ea4
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/MaxLiabilitySharesBound.lean
@@ -0,0 +1,23 @@
+import Benchmark.Cases.Lido.VaulthubLocked.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Lido.VaulthubLocked
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Certora P-VH-04: maxLiabilityShares >= liabilityShares.
+This invariant is maintained by the VaultHub's minting and reporting logic.
+-/
+theorem max_liability_shares_bound
+    (maxLiabilityShares liabilityShares : Uint256)
+    (hBound : maxLiabilityShares ≥ liabilityShares) :
+    max_liability_shares_bound_spec maxLiabilityShares liabilityShares := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold max_liability_shares_bound_spec
+  grind
+
+end Benchmark.Cases.Lido.VaulthubLocked
diff --git a/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/ReserveRatioBounds.lean b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/ReserveRatioBounds.lean
new file mode 100644
index 00000000..8ce57a5b
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/ReserveRatioBounds.lean
@@ -0,0 +1,24 @@
+import Benchmark.Cases.Lido.VaulthubLocked.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Lido.VaulthubLocked
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Certora P-VH-03: Reserve ratio is strictly between 0 and TOTAL_BASIS_POINTS.
+This is enforced by the vault connection validation logic.
+-/
+theorem reserve_ratio_bounds
+    (reserveRatioBP : Uint256)
+    (hPos : reserveRatioBP > 0)
+    (hLt : reserveRatioBP < TOTAL_BASIS_POINTS) :
+    reserve_ratio_bounds_spec reserveRatioBP := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold reserve_ratio_bounds_spec
+  grind
+
+end Benchmark.Cases.Lido.VaulthubLocked
diff --git a/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/SharesConversionMonotone.lean b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/SharesConversionMonotone.lean
new file mode 100644
index 00000000..08162108
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/SharesConversionMonotone.lean
@@ -0,0 +1,26 @@
+import Benchmark.Cases.Lido.VaulthubLocked.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Lido.VaulthubLocked
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Supporting arithmetic lemma: getPooledEthBySharesRoundUp is monotone in shares.
+If a >= b then getPooledEthBySharesRoundUp(a) >= getPooledEthBySharesRoundUp(b).
+Needed to lift the F-01 solvency bound from maxLiabilityShares to liabilityShares.
+-/
+theorem shares_conversion_monotone
+    (a b : Uint256)
+    (totalPooledEther totalShares : Uint256)
+    (hTS : totalShares > 0)
+    (hNoOverflow : a.val * totalPooledEther.val < modulus) :
+    shares_conversion_monotone_spec a b totalPooledEther totalShares := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold shares_conversion_monotone_spec
+  grind
+
+end Benchmark.Cases.Lido.VaulthubLocked
diff --git a/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsBookValue.lean b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsBookValue.lean
new file mode 100644
index 00000000..249f7159
--- /dev/null
+++ b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsBookValue.lean
@@ -0,0 +1,23 @@
+import Benchmark.Cases.NexusMutual.RammPriceBand.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.NexusMutual.RammPriceBand
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `syncPriceBand` stores the synchronized book value.
+-/
+theorem syncPriceBand_sets_book_value
+    (capital_ supply_ : Uint256) (s : ContractState)
+    (hSupply : supply_ != 0) :
+    let s' := ((RammPriceBand.syncPriceBand capital_ supply_).run s).snd
+    syncPriceBand_sets_book_value_spec capital_ supply_ s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold syncPriceBand_sets_book_value_spec
+  grind [RammPriceBand.syncPriceBand, RammPriceBand.capital, RammPriceBand.supply, RammPriceBand.bookValue, RammPriceBand.buySpotPrice, RammPriceBand.sellSpotPrice]
+
+end Benchmark.Cases.NexusMutual.RammPriceBand
diff --git a/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsBuyPrice.lean b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsBuyPrice.lean
new file mode 100644
index 00000000..b2af2f7d
--- /dev/null
+++ b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsBuyPrice.lean
@@ -0,0 +1,23 @@
+import Benchmark.Cases.NexusMutual.RammPriceBand.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.NexusMutual.RammPriceBand
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `syncPriceBand` stores the synchronized buy quote.
+-/
+theorem syncPriceBand_sets_buy_price
+    (capital_ supply_ : Uint256) (s : ContractState)
+    (hSupply : supply_ != 0) :
+    let s' := ((RammPriceBand.syncPriceBand capital_ supply_).run s).snd
+    syncPriceBand_sets_buy_price_spec capital_ supply_ s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold syncPriceBand_sets_buy_price_spec
+  grind [RammPriceBand.syncPriceBand, RammPriceBand.capital, RammPriceBand.supply, RammPriceBand.bookValue, RammPriceBand.buySpotPrice, RammPriceBand.sellSpotPrice]
+
+end Benchmark.Cases.NexusMutual.RammPriceBand
diff --git a/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsCapital.lean b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsCapital.lean
new file mode 100644
index 00000000..36954bbd
--- /dev/null
+++ b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsCapital.lean
@@ -0,0 +1,23 @@
+import Benchmark.Cases.NexusMutual.RammPriceBand.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.NexusMutual.RammPriceBand
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `syncPriceBand` stores the provided capital value.
+-/
+theorem syncPriceBand_sets_capital
+    (capital_ supply_ : Uint256) (s : ContractState)
+    (hSupply : supply_ != 0) :
+    let s' := ((RammPriceBand.syncPriceBand capital_ supply_).run s).snd
+    syncPriceBand_sets_capital_spec capital_ s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold syncPriceBand_sets_capital_spec
+  grind [RammPriceBand.syncPriceBand, RammPriceBand.capital, RammPriceBand.supply, RammPriceBand.bookValue, RammPriceBand.buySpotPrice, RammPriceBand.sellSpotPrice]
+
+end Benchmark.Cases.NexusMutual.RammPriceBand
diff --git a/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsSellPrice.lean b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsSellPrice.lean
new file mode 100644
index 00000000..a8c83109
--- /dev/null
+++ b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsSellPrice.lean
@@ -0,0 +1,23 @@
+import Benchmark.Cases.NexusMutual.RammPriceBand.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.NexusMutual.RammPriceBand
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `syncPriceBand` stores the synchronized sell quote.
+-/
+theorem syncPriceBand_sets_sell_price
+    (capital_ supply_ : Uint256) (s : ContractState)
+    (hSupply : supply_ != 0) :
+    let s' := ((RammPriceBand.syncPriceBand capital_ supply_).run s).snd
+    syncPriceBand_sets_sell_price_spec capital_ supply_ s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold syncPriceBand_sets_sell_price_spec
+  grind [RammPriceBand.syncPriceBand, RammPriceBand.capital, RammPriceBand.supply, RammPriceBand.bookValue, RammPriceBand.buySpotPrice, RammPriceBand.sellSpotPrice]
+
+end Benchmark.Cases.NexusMutual.RammPriceBand
diff --git a/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/BuyGeBookValue.lean b/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/BuyGeBookValue.lean
new file mode 100644
index 00000000..227f18df
--- /dev/null
+++ b/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/BuyGeBookValue.lean
@@ -0,0 +1,28 @@
+import Benchmark.Cases.NexusMutual.RammPriceBand.Proofs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.NexusMutual.RammSpotPrice
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+The buy spot price is always at or above book value, regardless of whether
+the ratchet has converged (BV branch) or is still converging (ratchet branch).
+-/
+theorem spotPrice_buy_ge_book_value
+    (eth oldEth oldNxmBuyReserve oldNxmSellReserve capital supply elapsed speed : Uint256)
+    (hEth : eth != 0)
+    (hOldEth : oldEth != 0)
+    (hSupply : supply != 0)
+    (hCapital : capital != 0)
+    (hBuyReserve : calculateBuyReserve eth oldEth oldNxmBuyReserve capital supply elapsed speed != 0)
+    (hSafe : buyArithmeticSafe eth oldEth oldNxmBuyReserve capital supply elapsed speed) :
+    spotPrice_buy_ge_book_value_spec eth oldEth oldNxmBuyReserve oldNxmSellReserve capital supply elapsed speed := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold spotPrice_buy_ge_book_value_spec
+  grind
+
+end Benchmark.Cases.NexusMutual.RammSpotPrice
diff --git a/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/SellLeBookValue.lean b/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/SellLeBookValue.lean
new file mode 100644
index 00000000..22df4afd
--- /dev/null
+++ b/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/SellLeBookValue.lean
@@ -0,0 +1,29 @@
+import Benchmark.Cases.NexusMutual.RammPriceBand.Proofs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.NexusMutual.RammSpotPrice
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+The sell spot price is always at or below book value, regardless of whether
+the ratchet has converged (BV branch) or is still converging (ratchet branch).
+-/
+theorem spotPrice_sell_le_book_value
+    (eth oldEth oldNxmBuyReserve oldNxmSellReserve capital supply elapsed speed : Uint256)
+    (hEth : eth != 0)
+    (hOldEth : oldEth != 0)
+    (hSupply : supply != 0)
+    (hCapital : capital != 0)
+    (hSellReserve : calculateSellReserve eth oldEth oldNxmSellReserve capital supply elapsed speed != 0)
+    (hSafe : sellArithmeticSafe eth oldEth oldNxmSellReserve capital supply elapsed speed)
+    (hScale : realisticSellScale eth capital supply) :
+    spotPrice_sell_le_book_value_spec eth oldEth oldNxmBuyReserve oldNxmSellReserve capital supply elapsed speed := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold spotPrice_sell_le_book_value_spec
+  grind
+
+end Benchmark.Cases.NexusMutual.RammSpotPrice
diff --git a/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/SellLeBuy.lean b/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/SellLeBuy.lean
new file mode 100644
index 00000000..3cf73197
--- /dev/null
+++ b/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/SellLeBuy.lean
@@ -0,0 +1,31 @@
+import Benchmark.Cases.NexusMutual.RammPriceBand.Proofs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.NexusMutual.RammSpotPrice
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+The sell spot price never exceeds the buy spot price.
+Together with buy_ge_book_value and sell_le_book_value, this gives: sell ≤ bv ≤ buy.
+-/
+theorem spotPrice_sell_le_buy
+    (eth oldEth oldNxmBuyReserve oldNxmSellReserve capital supply elapsed speed : Uint256)
+    (hEth : eth != 0)
+    (hOldEth : oldEth != 0)
+    (hSupply : supply != 0)
+    (hCapital : capital != 0)
+    (hBuyReserve : calculateBuyReserve eth oldEth oldNxmBuyReserve capital supply elapsed speed != 0)
+    (hSellReserve : calculateSellReserve eth oldEth oldNxmSellReserve capital supply elapsed speed != 0)
+    (hBuySafe : buyArithmeticSafe eth oldEth oldNxmBuyReserve capital supply elapsed speed)
+    (hSellSafe : sellArithmeticSafe eth oldEth oldNxmSellReserve capital supply elapsed speed)
+    (hScale : realisticSellScale eth capital supply) :
+    spotPrice_sell_le_buy_spec eth oldEth oldNxmBuyReserve oldNxmSellReserve capital supply elapsed speed := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold spotPrice_sell_le_buy_spec
+  grind
+
+end Benchmark.Cases.NexusMutual.RammSpotPrice
diff --git a/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/DepositSetsTotalAssets.lean b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/DepositSetsTotalAssets.lean
new file mode 100644
index 00000000..0fa5c7c9
--- /dev/null
+++ b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/DepositSetsTotalAssets.lean
@@ -0,0 +1,22 @@
+import Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `deposit` stores `oldTotalAssets + assets` in `totalAssets`.
+-/
+theorem deposit_sets_totalAssets
+    (assets : Uint256) (s : ContractState) :
+    let s' := ((ERC4626VirtualOffsetDeposit.deposit assets).run s).snd
+    deposit_sets_totalAssets_spec assets s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold deposit_sets_totalAssets_spec
+  grind [ERC4626VirtualOffsetDeposit.deposit, ERC4626VirtualOffsetDeposit.totalAssets, ERC4626VirtualOffsetDeposit.totalShares]
+
+end Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit
diff --git a/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/DepositSetsTotalShares.lean b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/DepositSetsTotalShares.lean
new file mode 100644
index 00000000..077be747
--- /dev/null
+++ b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/DepositSetsTotalShares.lean
@@ -0,0 +1,22 @@
+import Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `deposit` stores `oldTotalShares + previewDeposit(assets)` in `totalShares`.
+-/
+theorem deposit_sets_totalShares
+    (assets : Uint256) (s : ContractState) :
+    let s' := ((ERC4626VirtualOffsetDeposit.deposit assets).run s).snd
+    deposit_sets_totalShares_spec assets s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold deposit_sets_totalShares_spec
+  grind [ERC4626VirtualOffsetDeposit.deposit, ERC4626VirtualOffsetDeposit.totalAssets, ERC4626VirtualOffsetDeposit.totalShares]
+
+end Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit
diff --git a/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/PositiveDepositMintsPositiveSharesUnderRateBound.lean b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/PositiveDepositMintsPositiveSharesUnderRateBound.lean
new file mode 100644
index 00000000..962daefe
--- /dev/null
+++ b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/PositiveDepositMintsPositiveSharesUnderRateBound.lean
@@ -0,0 +1,29 @@
+import Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit.Specs
+import Verity.Stdlib.Math
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit
+
+open Verity
+open Verity.EVM.Uint256
+open Verity.Stdlib.Math
+
+/--
+Under the rate-bound assumption that the exact numerator already reaches one full
+denominator-width, a positive deposit mints a positive number of shares.
+-/
+theorem positive_deposit_mints_positive_shares_under_rate_bound
+    (assets : Uint256) (s : ContractState)
+    (hAssets : assets ≠ 0)
+    (hDenom : add (s.storage 0) virtualAssets ≠ 0)
+    (hRate : ((add (s.storage 0) virtualAssets : Uint256) : Nat)
+      <= (assets : Nat) * ((add (s.storage 1) virtualShares : Uint256) : Nat))
+    (hMul : (assets : Nat) * ((add (s.storage 1) virtualShares : Uint256) : Nat) <= MAX_UINT256) :
+    positive_deposit_mints_positive_shares_under_rate_bound_spec assets s := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold positive_deposit_mints_positive_shares_under_rate_bound_spec
+  grind
+
+end Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit
diff --git a/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/PreviewDepositRoundsDown.lean b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/PreviewDepositRoundsDown.lean
new file mode 100644
index 00000000..300b5060
--- /dev/null
+++ b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/PreviewDepositRoundsDown.lean
@@ -0,0 +1,25 @@
+import Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit.Specs
+import Verity.Stdlib.Math
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit
+
+open Verity
+open Verity.EVM.Uint256
+open Verity.Stdlib.Math
+
+/--
+`previewDeposit` rounds down, so the minted share estimate times the denominator
+never exceeds the exact numerator product when the multiplication is exact.
+-/
+theorem previewDeposit_rounds_down
+    (assets : Uint256) (s : ContractState)
+    (hMul : (assets : Nat) * ((add (s.storage 1) virtualShares : Uint256) : Nat) <= MAX_UINT256) :
+    previewDeposit_rounds_down_spec assets s := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold previewDeposit_rounds_down_spec
+  grind
+
+end Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimMarksBothClaimed.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimMarksBothClaimed.lean
new file mode 100644
index 00000000..0aa9a987
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimMarksBothClaimed.lean
@@ -0,0 +1,29 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimBoth` on the successful path marks the caller as claimed for
+both tokens.
+-/
+theorem claimBoth_marks_both_claimed
+    (usdcShareWad wethShareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hUsdcFresh : s.storageMap 5 s.sender = 0)
+    (hWethFresh : s.storageMap 9 s.sender = 0)
+    (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0)
+    (hWethBound : add (s.storage 7) (computedWethClaimAmount wethShareWad s) <= s.storage 6) :
+    let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad true).run s).snd
+    claimBoth_marks_both_claimed_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimBoth_marks_both_claimed_spec
+  grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimUpdatesRoundClaimed.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimUpdatesRoundClaimed.lean
new file mode 100644
index 00000000..c27fa521
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimUpdatesRoundClaimed.lean
@@ -0,0 +1,29 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimBoth` on the successful path increases both claimed counters
+by exactly their computed claim amounts.
+-/
+theorem claimBoth_updates_round_claimed
+    (usdcShareWad wethShareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hUsdcFresh : s.storageMap 5 s.sender = 0)
+    (hWethFresh : s.storageMap 9 s.sender = 0)
+    (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0)
+    (hWethBound : add (s.storage 7) (computedWethClaimAmount wethShareWad s) <= s.storage 6) :
+    let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad true).run s).snd
+    claimBoth_updates_round_claimed_spec usdcShareWad wethShareWad s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimBoth_updates_round_claimed_spec
+  grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimUpdatesTotalAllocated.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimUpdatesTotalAllocated.lean
new file mode 100644
index 00000000..c160d241
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimUpdatesTotalAllocated.lean
@@ -0,0 +1,29 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimBoth` on the successful path decreases both allocated counters
+by exactly their computed claim amounts.
+-/
+theorem claimBoth_updates_total_allocated
+    (usdcShareWad wethShareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hUsdcFresh : s.storageMap 5 s.sender = 0)
+    (hWethFresh : s.storageMap 9 s.sender = 0)
+    (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0)
+    (hWethBound : add (s.storage 7) (computedWethClaimAmount wethShareWad s) <= s.storage 6) :
+    let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad true).run s).snd
+    claimBoth_updates_total_allocated_spec usdcShareWad wethShareWad s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimBoth_updates_total_allocated_spec
+  grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimedPlusAllocatedConserved.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimedPlusAllocatedConserved.lean
new file mode 100644
index 00000000..b78a55de
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimedPlusAllocatedConserved.lean
@@ -0,0 +1,29 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimBoth` preserves the claimed-plus-allocated accounting mass
+for both tokens.
+-/
+theorem claimBoth_claimed_plus_allocated_conserved
+    (usdcShareWad wethShareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hUsdcFresh : s.storageMap 5 s.sender = 0)
+    (hWethFresh : s.storageMap 9 s.sender = 0)
+    (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0)
+    (hWethBound : add (s.storage 7) (computedWethClaimAmount wethShareWad s) <= s.storage 6) :
+    let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad true).run s).snd
+    claimBoth_claimed_plus_allocated_conserved_spec usdcShareWad wethShareWad s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimBoth_claimed_plus_allocated_conserved_spec
+  grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothMatchesIndependentClaims.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothMatchesIndependentClaims.lean
new file mode 100644
index 00000000..bfd56df8
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothMatchesIndependentClaims.lean
@@ -0,0 +1,29 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimBoth` yields the same USDC slice as `claimUsdc` alone and the
+same WETH slice as `claimWeth` alone.
+-/
+theorem claimBoth_matches_independent_claims
+    (usdcShareWad wethShareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hUsdcFresh : s.storageMap 5 s.sender = 0)
+    (hWethFresh : s.storageMap 9 s.sender = 0)
+    (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0)
+    (hWethBound : add (s.storage 7) (computedWethClaimAmount wethShareWad s) <= s.storage 6) :
+    let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad true).run s).snd
+    claimBoth_matches_independent_claims_spec usdcShareWad wethShareWad s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimBoth_matches_independent_claims_spec
+  grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothNoOverclaim.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothNoOverclaim.lean
new file mode 100644
index 00000000..d40c9519
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothNoOverclaim.lean
@@ -0,0 +1,28 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimBoth` on the successful path preserves both round bounds.
+-/
+theorem claimBoth_preserves_round_bounds
+    (usdcShareWad wethShareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hUsdcFresh : s.storageMap 5 s.sender = 0)
+    (hWethFresh : s.storageMap 9 s.sender = 0)
+    (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0)
+    (hWethBound : add (s.storage 7) (computedWethClaimAmount wethShareWad s) <= s.storage 6) :
+    let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad true).run s).snd
+    claimBoth_preserves_round_bounds_spec s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimBoth_preserves_round_bounds_spec
+  grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothUsdcBoundViolationRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothUsdcBoundViolationRejected.lean
new file mode 100644
index 00000000..781d4181
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothUsdcBoundViolationRejected.lean
@@ -0,0 +1,30 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimBoth` when the computed USDC payout would exceed the round
+total reverts before any state writes, leaving the contract state unchanged.
+-/
+theorem claimBoth_reverts_if_usdc_exceeds_total
+    (usdcShareWad : Uint256)
+    (wethProofAccepted : Bool)
+    (wethShareWad : Uint256)
+    (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hUsdcFresh : s.storageMap 5 s.sender = 0)
+    (hUsdcExceeds : add (s.storage 1) (computedClaimAmount usdcShareWad s) > s.storage 0) :
+    let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad wethProofAccepted).run s).snd
+    claimBoth_reverts_if_usdc_exceeds_total_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimBoth_reverts_if_usdc_exceeds_total_spec
+  grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothUsdcDoubleClaimRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothUsdcDoubleClaimRejected.lean
new file mode 100644
index 00000000..1df572bd
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothUsdcDoubleClaimRejected.lean
@@ -0,0 +1,29 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimBoth` with a previously claimed USDC entitlement reverts
+before any state writes, leaving the contract state unchanged.
+-/
+theorem claimBoth_reverts_if_usdc_already_claimed
+    (usdcShareWad : Uint256)
+    (usdcProofAccepted wethProofAccepted : Bool)
+    (wethShareWad : Uint256)
+    (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hClaimed : s.storageMap 5 s.sender != 0) :
+    let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad usdcProofAccepted wethShareWad wethProofAccepted).run s).snd
+    claimBoth_reverts_if_usdc_already_claimed_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimBoth_reverts_if_usdc_already_claimed_spec
+  grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothWethBoundViolationRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothWethBoundViolationRejected.lean
new file mode 100644
index 00000000..08b77542
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothWethBoundViolationRejected.lean
@@ -0,0 +1,30 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimBoth` when the computed WETH payout would exceed the round
+total reverts and rolls back the earlier USDC sub-claim, leaving the contract
+state unchanged.
+-/
+theorem claimBoth_reverts_if_weth_exceeds_total
+    (usdcShareWad wethShareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hUsdcFresh : s.storageMap 5 s.sender = 0)
+    (hWethFresh : s.storageMap 9 s.sender = 0)
+    (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0)
+    (hWethExceeds : add (s.storage 7) (computedWethClaimAmount wethShareWad s) > s.storage 6) :
+    let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad true).run s).snd
+    claimBoth_reverts_if_weth_exceeds_total_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimBoth_reverts_if_weth_exceeds_total_spec
+  grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothWethDoubleClaimRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothWethDoubleClaimRejected.lean
new file mode 100644
index 00000000..1d7a5ad0
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothWethDoubleClaimRejected.lean
@@ -0,0 +1,30 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimBoth` with a previously claimed WETH entitlement reverts and
+rolls back the earlier USDC sub-claim, leaving the contract state unchanged.
+-/
+theorem claimBoth_reverts_if_weth_already_claimed
+    (usdcShareWad wethShareWad : Uint256)
+    (wethProofAccepted : Bool)
+    (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hUsdcFresh : s.storageMap 5 s.sender = 0)
+    (hWethClaimed : s.storageMap 9 s.sender != 0)
+    (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0) :
+    let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad wethProofAccepted).run s).snd
+    claimBoth_reverts_if_weth_already_claimed_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimBoth_reverts_if_weth_already_claimed_spec
+  grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BoundViolationRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BoundViolationRejected.lean
new file mode 100644
index 00000000..04d9d696
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BoundViolationRejected.lean
@@ -0,0 +1,27 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimUsdc` when the computed payout would exceed the round total
+reverts before any state writes, leaving the contract state unchanged.
+-/
+theorem claimUsdc_reverts_if_exceeds_total
+    (shareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hFresh : s.storageMap 5 s.sender = 0)
+    (hExceeds : add (s.storage 1) (computedClaimAmount shareWad s) > s.storage 0) :
+    let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad true).run s).snd
+    claimUsdc_reverts_if_exceeds_total_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimUsdc_reverts_if_exceeds_total_spec
+  grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimMarksUser.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimMarksUser.lean
new file mode 100644
index 00000000..b9bee7b2
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimMarksUser.lean
@@ -0,0 +1,26 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimUsdc` on the successful path marks the caller as claimed.
+-/
+theorem claimUsdc_marks_user_claimed
+    (shareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hFresh : s.storageMap 5 s.sender = 0)
+    (hBound : add (s.storage 1) (computedClaimAmount shareWad s) <= s.storage 0) :
+    let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad true).run s).snd
+    claimUsdc_marks_claimed_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimUsdc_marks_claimed_spec
+  grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimUpdatesRoundClaimed.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimUpdatesRoundClaimed.lean
new file mode 100644
index 00000000..bd26fbda
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimUpdatesRoundClaimed.lean
@@ -0,0 +1,27 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimUsdc` on the successful path increases `roundUsdcClaimed`
+by exactly the computed claim amount.
+-/
+theorem claimUsdc_updates_round_claimed
+    (shareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hFresh : s.storageMap 5 s.sender = 0)
+    (hBound : add (s.storage 1) (computedClaimAmount shareWad s) <= s.storage 0) :
+    let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad true).run s).snd
+    claimUsdc_updates_round_claimed_spec shareWad s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimUsdc_updates_round_claimed_spec
+  grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimUpdatesTotalAllocated.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimUpdatesTotalAllocated.lean
new file mode 100644
index 00000000..f8d7ae44
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimUpdatesTotalAllocated.lean
@@ -0,0 +1,27 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimUsdc` on the successful path decreases `totalUsdcAllocated`
+by exactly the computed claim amount.
+-/
+theorem claimUsdc_updates_total_allocated
+    (shareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hFresh : s.storageMap 5 s.sender = 0)
+    (hBound : add (s.storage 1) (computedClaimAmount shareWad s) <= s.storage 0) :
+    let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad true).run s).snd
+    claimUsdc_updates_total_allocated_spec shareWad s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimUsdc_updates_total_allocated_spec
+  grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimedPlusAllocatedConserved.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimedPlusAllocatedConserved.lean
new file mode 100644
index 00000000..91a4f0fe
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimedPlusAllocatedConserved.lean
@@ -0,0 +1,27 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimUsdc` moves the computed amount from `totalUsdcAllocated`
+into `roundUsdcClaimed`, preserving the combined accounting mass.
+-/
+theorem claimUsdc_claimed_plus_allocated_conserved
+    (shareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hFresh : s.storageMap 5 s.sender = 0)
+    (hBound : add (s.storage 1) (computedClaimAmount shareWad s) <= s.storage 0) :
+    let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad true).run s).snd
+    claimUsdc_claimed_plus_allocated_conserved_spec shareWad s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimUsdc_claimed_plus_allocated_conserved_spec
+  grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/DoubleClaimRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/DoubleClaimRejected.lean
new file mode 100644
index 00000000..2428a3bc
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/DoubleClaimRejected.lean
@@ -0,0 +1,26 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimUsdc` for an address that already claimed reverts before any
+state writes, leaving the contract state unchanged.
+-/
+theorem claimUsdc_reverts_if_already_claimed
+    (shareWad : Uint256) (proofAccepted : Bool) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hClaimed : s.storageMap 5 s.sender != 0) :
+    let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad proofAccepted).run s).snd
+    claimUsdc_reverts_if_already_claimed_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimUsdc_reverts_if_already_claimed_spec
+  grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/NoOverclaim.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/NoOverclaim.lean
new file mode 100644
index 00000000..3d3b7616
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/NoOverclaim.lean
@@ -0,0 +1,26 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimUsdc` on the successful path preserves the round bound.
+-/
+theorem claimUsdc_preserves_round_bound
+    (shareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hFresh : s.storageMap 5 s.sender = 0)
+    (hBound : add (s.storage 1) (computedClaimAmount shareWad s) <= s.storage 0) :
+    let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad true).run s).snd
+    claimUsdc_preserves_round_bound_spec s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimUsdc_preserves_round_bound_spec
+  grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/UsdcPreservesWethState.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/UsdcPreservesWethState.lean
new file mode 100644
index 00000000..feb369ee
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/UsdcPreservesWethState.lean
@@ -0,0 +1,27 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimUsdc` on the successful path preserves the WETH accounting
+slice.
+-/
+theorem claimUsdc_preserves_weth_state
+    (shareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hFresh : s.storageMap 5 s.sender = 0)
+    (hBound : add (s.storage 1) (computedClaimAmount shareWad s) <= s.storage 0) :
+    let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad true).run s).snd
+    claimUsdc_preserves_weth_state_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimUsdc_preserves_weth_state_spec
+  grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethBoundViolationRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethBoundViolationRejected.lean
new file mode 100644
index 00000000..b427fab5
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethBoundViolationRejected.lean
@@ -0,0 +1,27 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimWeth` when the computed payout would exceed the round total
+reverts before any state writes, leaving the contract state unchanged.
+-/
+theorem claimWeth_reverts_if_exceeds_total
+    (shareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hFresh : s.storageMap 9 s.sender = 0)
+    (hExceeds : add (s.storage 7) (computedWethClaimAmount shareWad s) > s.storage 6) :
+    let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad true).run s).snd
+    claimWeth_reverts_if_exceeds_total_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimWeth_reverts_if_exceeds_total_spec
+  grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimMarksUser.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimMarksUser.lean
new file mode 100644
index 00000000..bd2e9eff
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimMarksUser.lean
@@ -0,0 +1,26 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimWeth` on the successful path marks the caller as claimed.
+-/
+theorem claimWeth_marks_user_claimed
+    (shareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hFresh : s.storageMap 9 s.sender = 0)
+    (hBound : add (s.storage 7) (computedWethClaimAmount shareWad s) <= s.storage 6) :
+    let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad true).run s).snd
+    claimWeth_marks_claimed_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimWeth_marks_claimed_spec
+  grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimUpdatesRoundClaimed.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimUpdatesRoundClaimed.lean
new file mode 100644
index 00000000..171d95a5
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimUpdatesRoundClaimed.lean
@@ -0,0 +1,27 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimWeth` on the successful path increases `roundWethClaimed`
+by exactly the computed claim amount.
+-/
+theorem claimWeth_updates_round_claimed
+    (shareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hFresh : s.storageMap 9 s.sender = 0)
+    (hBound : add (s.storage 7) (computedWethClaimAmount shareWad s) <= s.storage 6) :
+    let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad true).run s).snd
+    claimWeth_updates_round_claimed_spec shareWad s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimWeth_updates_round_claimed_spec
+  grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimUpdatesTotalAllocated.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimUpdatesTotalAllocated.lean
new file mode 100644
index 00000000..bc9bee1e
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimUpdatesTotalAllocated.lean
@@ -0,0 +1,27 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimWeth` on the successful path decreases `totalWethAllocated`
+by exactly the computed claim amount.
+-/
+theorem claimWeth_updates_total_allocated
+    (shareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hFresh : s.storageMap 9 s.sender = 0)
+    (hBound : add (s.storage 7) (computedWethClaimAmount shareWad s) <= s.storage 6) :
+    let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad true).run s).snd
+    claimWeth_updates_total_allocated_spec shareWad s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimWeth_updates_total_allocated_spec
+  grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimedPlusAllocatedConserved.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimedPlusAllocatedConserved.lean
new file mode 100644
index 00000000..09bd7f40
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimedPlusAllocatedConserved.lean
@@ -0,0 +1,27 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimWeth` moves the computed amount from `totalWethAllocated`
+into `roundWethClaimed`, preserving the combined accounting mass.
+-/
+theorem claimWeth_claimed_plus_allocated_conserved
+    (shareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hFresh : s.storageMap 9 s.sender = 0)
+    (hBound : add (s.storage 7) (computedWethClaimAmount shareWad s) <= s.storage 6) :
+    let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad true).run s).snd
+    claimWeth_claimed_plus_allocated_conserved_spec shareWad s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimWeth_claimed_plus_allocated_conserved_spec
+  grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethDoubleClaimRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethDoubleClaimRejected.lean
new file mode 100644
index 00000000..04b5428d
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethDoubleClaimRejected.lean
@@ -0,0 +1,26 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimWeth` for an address that already claimed reverts before any
+state writes, leaving the contract state unchanged.
+-/
+theorem claimWeth_reverts_if_already_claimed
+    (shareWad : Uint256) (proofAccepted : Bool) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hClaimed : s.storageMap 9 s.sender != 0) :
+    let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad proofAccepted).run s).snd
+    claimWeth_reverts_if_already_claimed_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimWeth_reverts_if_already_claimed_spec
+  grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethNoOverclaim.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethNoOverclaim.lean
new file mode 100644
index 00000000..c6160e09
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethNoOverclaim.lean
@@ -0,0 +1,26 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimWeth` on the successful path preserves the round bound.
+-/
+theorem claimWeth_preserves_round_bound
+    (shareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hFresh : s.storageMap 9 s.sender = 0)
+    (hBound : add (s.storage 7) (computedWethClaimAmount shareWad s) <= s.storage 6) :
+    let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad true).run s).snd
+    claimWeth_preserves_round_bound_spec s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimWeth_preserves_round_bound_spec
+  grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethPreservesUsdcState.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethPreservesUsdcState.lean
new file mode 100644
index 00000000..539bb8eb
--- /dev/null
+++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethPreservesUsdcState.lean
@@ -0,0 +1,27 @@
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `claimWeth` on the successful path preserves the USDC accounting
+slice.
+-/
+theorem claimWeth_preserves_usdc_state
+    (shareWad : Uint256) (s : ContractState)
+    (hWaiver : s.storageMap 4 s.sender != 0)
+    (hActive : s.storage 3 != 0)
+    (hFresh : s.storageMap 9 s.sender = 0)
+    (hBound : add (s.storage 7) (computedWethClaimAmount shareWad s) <= s.storage 6) :
+    let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad true).run s).snd
+    claimWeth_preserves_usdc_state_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold claimWeth_preserves_usdc_state_spec
+  grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth]
+
+end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerAcyclicity.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerAcyclicity.lean
new file mode 100644
index 00000000..950e6a92
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerAcyclicity.lean
@@ -0,0 +1,32 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+addOwner preserves acyclicity of the owner linked list.
+
+After addOwner(owner), the list becomes:
+  SENTINEL → owner → old_head → ... → SENTINEL
+
+Acyclicity is a tautology — it holds for any state. The proof
+(acyclic_generic) shows that any duplicate-free chain from SENTINEL's
+successor ending at key ≠ SENTINEL cannot contain SENTINEL, purely
+by the structure of the definitions. No pre-state hypotheses are needed
+beyond the Solidity require guards.
+-/
+theorem addOwner_acyclicity
+    (owner : Address) (s : ContractState)
+    (hNotZero : (owner != zeroAddress) = true)
+    (hNotSentinel : (owner != SENTINEL) = true)
+    (hFresh : (wordToAddress (s.storageMap 0 owner) == zeroAddress) = true) :
+    acyclic ((OwnerManager.addOwner owner).run s).snd := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [OwnerManager.addOwner, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerIsOwnerCorrectness.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerIsOwnerCorrectness.lean
new file mode 100644
index 00000000..c2c82e84
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerIsOwnerCorrectness.lean
@@ -0,0 +1,33 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Functional correctness of `addOwner`: the new address becomes an owner
+and all other addresses' ownership status is unchanged.
+
+`isOwner s addr` holds iff `next s addr ≠ zeroAddress ∧ addr ≠ SENTINEL`.
+
+Proof strategy: use `addOwner_next_eq` to characterise the post-state
+`next` function, then split into the two conjuncts of `addOwner_correctness`.
+For the new owner: `next s' owner = next s SENTINEL ≠ 0`.
+For others: `next s' k = next s k` when `k ≠ SENTINEL` and `k ≠ owner`.
+-/
+theorem addOwner_isOwnerCorrectness
+    (owner : Address) (s : ContractState)
+    (hNotZero : (owner != zeroAddress) = true)
+    (hNotSentinel : (owner != SENTINEL) = true)
+    (hFresh : (wordToAddress (s.storageMap 0 owner) == zeroAddress) = true)
+    (hPreInv : ownerListInvariant s) :
+    let s' := ((OwnerManager.addOwner owner).run s).snd
+    addOwner_correctness s s' owner := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [OwnerManager.addOwner, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerOwnerListInvariant.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerOwnerListInvariant.lean
new file mode 100644
index 00000000..e3c1c0bd
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerOwnerListInvariant.lean
@@ -0,0 +1,38 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Combined `ownerListInvariant` preservation under `addOwner`.
+
+The ownerListInvariant merges `inListReachable` and `reachableInList`:
+membership (non-zero successor) is equivalent to reachability from
+SENTINEL. This is strictly stronger than proving inListReachable alone.
+
+Proof strategy: prove both directions of the biconditional separately.
+The forward direction (membership → reachability) follows from the
+existing inListReachable proof. The reverse direction (reachability →
+membership) requires showing that the new chain structure doesn't
+introduce reachability to nodes with zero successors.
+
+Acyclicity and freshness are derived from ownerListInvariant internally,
+not required as separate hypotheses.
+-/
+theorem addOwner_ownerListInvariant
+    (owner : Address) (s : ContractState)
+    (hNotZero : (owner != zeroAddress) = true)
+    (hNotSentinel : (owner != SENTINEL) = true)
+    (hFresh : (wordToAddress (s.storageMap 0 owner) == zeroAddress) = true)
+    (hPreInv : ownerListInvariant s) :
+    let s' := ((OwnerManager.addOwner owner).run s).snd
+    ownerListInvariant s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [OwnerManager.addOwner, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/InListReachable.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/InListReachable.lean
new file mode 100644
index 00000000..4340fd71
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/InListReachable.lean
@@ -0,0 +1,48 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Certora `inListReachable` invariant preservation under `addOwner`.
+
+Given that in the pre-state every node with a non-zero successor is reachable
+from SENTINEL, show that the same holds in the post-state after inserting
+`owner` at the head of the linked list.
+
+Proof strategy: SENTINEL is trivially reachable (reflexivity). The new owner
+is reachable via [SENTINEL, owner]. For any other key with a non-zero successor,
+its next pointer is unchanged, so we can lift its pre-state witness chain to
+the post-state and prepend the new path SENTINEL → owner → old_head.
+-/
+theorem in_list_reachable
+    (owner : Address) (s : ContractState)
+    (hNotZero : (owner != zeroAddress) = true)
+    (hNotSentinel : (owner != SENTINEL) = true)
+    (hFresh : (wordToAddress (s.storageMap 0 owner) == zeroAddress) = true)
+    (hPreReach : ∀ key : Address, next s key ≠ zeroAddress → reachable s SENTINEL key)
+    -- Raw acyclicity: SENTINEL ∉ any chain from next s SENTINEL.
+    -- Strictly stronger than `acyclic s` (no noDuplicates guard).
+    (hAcyclic : ∀ key : Address, ∀ chain : List Address,
+      chain.head? = some (next s SENTINEL) →
+      chain.getLast? = some key →
+      isChain s chain →
+      SENTINEL ∉ chain)
+    -- Raw freshness: owner ∉ any chain from next s SENTINEL.
+    -- Strictly stronger than `freshInList s owner` (no noDuplicates guard).
+    (hOwnerFresh : ∀ key : Address, ∀ chain : List Address,
+      chain.head? = some (next s SENTINEL) →
+      chain.getLast? = some key →
+      isChain s chain →
+      owner ∉ chain) :
+    in_list_reachable_spec s ((OwnerManager.addOwner owner).run s).snd := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold in_list_reachable_spec
+  grind [OwnerManager.addOwner, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerAcyclicity.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerAcyclicity.lean
new file mode 100644
index 00000000..de4213ab
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerAcyclicity.lean
@@ -0,0 +1,30 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+removeOwner preserves acyclicity of the owner linked list.
+
+Acyclicity is a tautology — it holds for any state. The proof
+(acyclic_generic) shows that any duplicate-free chain from SENTINEL's
+successor ending at key ≠ SENTINEL cannot contain SENTINEL, purely
+by the structure of the definitions. No pre-state acyclicity hypothesis
+is needed.
+-/
+theorem removeOwner_acyclicity
+    (prevOwner owner : Address) (s : ContractState)
+    (hNotZero : (owner != zeroAddress) = true)
+    (hNotSentinel : (owner != SENTINEL) = true)
+    (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == owner) = true)
+    (hOwnerInList : next s owner ≠ zeroAddress) :
+    acyclic ((OwnerManager.removeOwner prevOwner owner).run s).snd := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [OwnerManager.removeOwner, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerInListReachable.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerInListReachable.lean
new file mode 100644
index 00000000..bb024614
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerInListReachable.lean
@@ -0,0 +1,44 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Certora `inListReachable` invariant preservation under `removeOwner`.
+
+After removing `owner` by unlinking it from `prevOwner`, show that every
+node with a non-zero successor in the post-state is still reachable from
+SENTINEL.
+
+Proof strategy: The removed owner's mapping becomes 0 so it no longer
+triggers the invariant. prevOwner now points to owner's old successor,
+so chains that went through owner can "skip" it: replace
+[... → prevOwner → owner → X → ...] with [... → prevOwner → X → ...].
+All other next pointers are unchanged.
+-/
+theorem removeOwner_inListReachable
+    (prevOwner owner : Address) (s : ContractState)
+    (hNotZero : (owner != zeroAddress) = true)
+    (hNotSentinel : (owner != SENTINEL) = true)
+    (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == owner) = true)
+    -- The removed owner must have a non-zero successor (i.e. be in the list).
+    (hOwnerInList : next s owner ≠ zeroAddress)
+    -- Pre-state invariant
+    (hPreInv : inListReachable s)
+    -- Unique predecessor: each non-zero node has at most one non-zero predecessor.
+    (hUniquePred : uniquePredecessor s)
+    -- prevOwner is non-zero (a valid list node)
+    (hPrevNZ : prevOwner ≠ zeroAddress)
+    -- Zero address maps to itself
+    (hZeroInert : next s zeroAddress = zeroAddress) :
+    let s' := ((OwnerManager.removeOwner prevOwner owner).run s).snd
+    inListReachable s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [OwnerManager.removeOwner, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerIsOwnerCorrectness.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerIsOwnerCorrectness.lean
new file mode 100644
index 00000000..df54abba
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerIsOwnerCorrectness.lean
@@ -0,0 +1,33 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Functional correctness of `removeOwner`: the removed address is no longer
+an owner and all other addresses' ownership status is unchanged.
+
+`isOwner s addr` holds iff `next s addr ≠ zeroAddress ∧ addr ≠ SENTINEL`.
+
+Proof strategy: use `removeOwner_storageMap` to characterise the post-state
+`next` function, then show `next s' owner = zeroAddress` and for all
+`k ≠ owner`, `next s' k ≠ 0 ↔ next s k ≠ 0` by case-splitting on
+`k = prevOwner`.
+-/
+theorem removeOwner_isOwnerCorrectness
+    (prevOwner owner : Address) (s : ContractState)
+    (hNotZero : (owner != zeroAddress) = true)
+    (hNotSentinel : (owner != SENTINEL) = true)
+    (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == owner) = true)
+    (hOwnerInList : next s owner ≠ zeroAddress) :
+    let s' := ((OwnerManager.removeOwner prevOwner owner).run s).snd
+    removeOwner_correctness s s' owner := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [OwnerManager.removeOwner, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerOwnerListInvariant.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerOwnerListInvariant.lean
new file mode 100644
index 00000000..a417106c
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerOwnerListInvariant.lean
@@ -0,0 +1,32 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Combined `ownerListInvariant` preservation under `removeOwner`.
+
+Properties like noSelfLoops and owner ≠ prevOwner are derived internally
+from ownerListInvariant + uniquePredecessor, not required as hypotheses.
+-/
+theorem removeOwner_ownerListInvariant
+    (prevOwner owner : Address) (s : ContractState)
+    (hNotZero : (owner != zeroAddress) = true)
+    (hNotSentinel : (owner != SENTINEL) = true)
+    (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == owner) = true)
+    (hOwnerInList : next s owner ≠ zeroAddress)
+    (hPreInv : ownerListInvariant s)
+    (hUniquePred : uniquePredecessor s)
+    (hPrevNZ : prevOwner ≠ zeroAddress)
+    (hZeroInert : next s zeroAddress = zeroAddress) :
+    let s' := ((OwnerManager.removeOwner prevOwner owner).run s).snd
+    ownerListInvariant s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [OwnerManager.removeOwner, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersAcyclicity.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersAcyclicity.lean
new file mode 100644
index 00000000..30c1e904
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersAcyclicity.lean
@@ -0,0 +1,37 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+setupOwners establishes acyclicity of the owner linked list (base case).
+
+The constructed list SENTINEL → o1 → o2 → o3 → SENTINEL has no internal
+cycles because all three owners are distinct, non-zero, and non-sentinel.
+SENTINEL appears only as the list head and the terminal pointer
+(o3 → SENTINEL), never in the interior of any chain starting from
+SENTINEL's successor.
+-/
+theorem setupOwners_acyclicity
+    (owner1 owner2 owner3 : Address) (s : ContractState)
+    (h1NZ : (owner1 != zeroAddress) = true)
+    (h1NS : (owner1 != SENTINEL) = true)
+    (h2NZ : (owner2 != zeroAddress) = true)
+    (h2NS : (owner2 != SENTINEL) = true)
+    (h3NZ : (owner3 != zeroAddress) = true)
+    (h3NS : (owner3 != SENTINEL) = true)
+    (h12 : (owner1 != owner2) = true)
+    (h13 : (owner1 != owner3) = true)
+    (h23 : (owner2 != owner3) = true)
+    (hClean : ∀ addr : Address, s.storageMap 0 addr = 0) :
+    let s' := ((OwnerManager.setupOwners owner1 owner2 owner3).run s).snd
+    acyclic s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [OwnerManager.setupOwners, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersInListReachable.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersInListReachable.lean
new file mode 100644
index 00000000..e72e3cdf
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersInListReachable.lean
@@ -0,0 +1,40 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+setupOwners establishes the `inListReachable` invariant from a clean state.
+This is the base case: no pre-state invariant is required.
+
+After setupOwners(owner1, owner2, owner3), the linked list is:
+  SENTINEL → owner1 → owner2 → owner3 → SENTINEL
+
+Every node with a non-zero successor (SENTINEL, owner1, owner2, owner3)
+is reachable from SENTINEL by construction. This can be proven by
+characterizing the post-state storageMap and building explicit witness
+chains for each node.
+-/
+theorem setupOwners_inListReachable
+    (owner1 owner2 owner3 : Address) (s : ContractState)
+    (h1NZ : (owner1 != zeroAddress) = true)
+    (h1NS : (owner1 != SENTINEL) = true)
+    (h2NZ : (owner2 != zeroAddress) = true)
+    (h2NS : (owner2 != SENTINEL) = true)
+    (h3NZ : (owner3 != zeroAddress) = true)
+    (h3NS : (owner3 != SENTINEL) = true)
+    (h12 : (owner1 != owner2) = true)
+    (h13 : (owner1 != owner3) = true)
+    (h23 : (owner2 != owner3) = true)
+    (hClean : ∀ addr : Address, s.storageMap 0 addr = 0) :
+    let s' := ((OwnerManager.setupOwners owner1 owner2 owner3).run s).snd
+    inListReachable s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [OwnerManager.setupOwners, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersOwnerListInvariant.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersOwnerListInvariant.lean
new file mode 100644
index 00000000..676511c5
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersOwnerListInvariant.lean
@@ -0,0 +1,40 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+setupOwners establishes the combined `ownerListInvariant` (base case).
+
+After setupOwners(owner1, owner2, owner3), the linked list is:
+  SENTINEL → owner1 → owner2 → owner3 → SENTINEL
+
+Both directions of the biconditional hold: every node with a non-zero
+successor is reachable from SENTINEL (by explicit chains), and every
+node reachable from SENTINEL has a non-zero successor (because only
+SENTINEL, owner1, owner2, owner3 are reachable, and they all have
+non-zero successors).
+-/
+theorem setupOwners_ownerListInvariant
+    (owner1 owner2 owner3 : Address) (s : ContractState)
+    (h1NZ : (owner1 != zeroAddress) = true)
+    (h1NS : (owner1 != SENTINEL) = true)
+    (h2NZ : (owner2 != zeroAddress) = true)
+    (h2NS : (owner2 != SENTINEL) = true)
+    (h3NZ : (owner3 != zeroAddress) = true)
+    (h3NS : (owner3 != SENTINEL) = true)
+    (h12 : (owner1 != owner2) = true)
+    (h13 : (owner1 != owner3) = true)
+    (h23 : (owner2 != owner3) = true)
+    (hClean : ∀ addr : Address, s.storageMap 0 addr = 0) :
+    let s' := ((OwnerManager.setupOwners owner1 owner2 owner3).run s).snd
+    ownerListInvariant s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [OwnerManager.setupOwners, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerAcyclicity.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerAcyclicity.lean
new file mode 100644
index 00000000..413689a1
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerAcyclicity.lean
@@ -0,0 +1,32 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+swapOwner preserves acyclicity of the owner linked list.
+
+Acyclicity is a tautology — it holds for any state. The proof
+(acyclic_generic) shows that any duplicate-free chain from SENTINEL's
+successor ending at key ≠ SENTINEL cannot contain SENTINEL, purely
+by the structure of the definitions. No pre-state hypotheses are needed
+beyond the Solidity require guards.
+-/
+theorem swapOwner_acyclicity
+    (prevOwner oldOwner newOwner : Address) (s : ContractState)
+    (hNewNotZero : (newOwner != zeroAddress) = true)
+    (hNewNotSentinel : (newOwner != SENTINEL) = true)
+    (hNewFresh : (wordToAddress (s.storageMap 0 newOwner) == zeroAddress) = true)
+    (hOldNotZero : (oldOwner != zeroAddress) = true)
+    (hOldNotSentinel : (oldOwner != SENTINEL) = true)
+    (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == oldOwner) = true) :
+    acyclic ((OwnerManager.swapOwner prevOwner oldOwner newOwner).run s).snd := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [OwnerManager.swapOwner, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerInListReachable.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerInListReachable.lean
new file mode 100644
index 00000000..c4055e4b
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerInListReachable.lean
@@ -0,0 +1,46 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Certora `inListReachable` invariant preservation under `swapOwner`.
+
+swapOwner atomically replaces oldOwner with newOwner in-place:
+  owners[newOwner] = owners[oldOwner]
+  owners[prevOwner] = newOwner
+  owners[oldOwner] = 0
+
+Proof strategy: newOwner inherits oldOwner's successor. For any key with
+a non-zero successor in the post-state, its pre-state chain through
+oldOwner can be adapted by replacing oldOwner with newOwner:
+[... → prevOwner → oldOwner → X → ...] becomes
+[... → prevOwner → newOwner → X → ...].
+-/
+theorem swapOwner_inListReachable
+    (prevOwner oldOwner newOwner : Address) (s : ContractState)
+    (hNewNotZero : (newOwner != zeroAddress) = true)
+    (hNewNotSentinel : (newOwner != SENTINEL) = true)
+    (hNewFresh : (wordToAddress (s.storageMap 0 newOwner) == zeroAddress) = true)
+    (hOldNotZero : (oldOwner != zeroAddress) = true)
+    (hOldNotSentinel : (oldOwner != SENTINEL) = true)
+    (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == oldOwner) = true)
+    -- Pre-state invariant (full ownerListInvariant, not just inListReachable)
+    (hPreInvFull : ownerListInvariant s)
+    -- Unique predecessor: each non-zero node has at most one non-zero predecessor.
+    (hUniquePred : uniquePredecessor s)
+    -- prevOwner is non-zero (a valid list node)
+    (hPrevNZ : prevOwner ≠ zeroAddress)
+    -- Zero address maps to itself
+    (hZeroInert : next s zeroAddress = zeroAddress) :
+    let s' := ((OwnerManager.swapOwner prevOwner oldOwner newOwner).run s).snd
+    inListReachable s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [OwnerManager.swapOwner, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerIsOwnerCorrectness.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerIsOwnerCorrectness.lean
new file mode 100644
index 00000000..2aff455a
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerIsOwnerCorrectness.lean
@@ -0,0 +1,38 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Functional correctness of `swapOwner`: the old owner is removed, the new
+owner is added, and all other addresses' ownership status is unchanged.
+
+`isOwner s addr` holds iff `next s addr ≠ zeroAddress ∧ addr ≠ SENTINEL`.
+
+Proof strategy: use `swapOwner_storageMap` to characterise the post-state
+`next` function, then show:
+  1. `next s' oldOwner = zeroAddress` (old owner removed)
+  2. `next s' newOwner = next s oldOwner ≠ 0` (new owner added)
+  3. For all `k ≠ oldOwner, k ≠ newOwner`: `next s' k ≠ 0 ↔ next s k ≠ 0`
+     by case-splitting on `k = prevOwner`.
+-/
+theorem swapOwner_isOwnerCorrectness
+    (prevOwner oldOwner newOwner : Address) (s : ContractState)
+    (hNewNotZero : (newOwner != zeroAddress) = true)
+    (hNewNotSentinel : (newOwner != SENTINEL) = true)
+    (hNewFresh : (wordToAddress (s.storageMap 0 newOwner) == zeroAddress) = true)
+    (hOldNotZero : (oldOwner != zeroAddress) = true)
+    (hOldNotSentinel : (oldOwner != SENTINEL) = true)
+    (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == oldOwner) = true)
+    (hOldInList : next s oldOwner ≠ zeroAddress) :
+    let s' := ((OwnerManager.swapOwner prevOwner oldOwner newOwner).run s).snd
+    swapOwner_correctness s s' oldOwner newOwner := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [OwnerManager.swapOwner, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerOwnerListInvariant.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerOwnerListInvariant.lean
new file mode 100644
index 00000000..d1ceab34
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerOwnerListInvariant.lean
@@ -0,0 +1,35 @@
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Safe.OwnerManagerReach
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Combined `ownerListInvariant` preservation under `swapOwner`.
+
+Properties like noSelfLoops, freshInList, and oldOwner ≠ prevOwner are
+derived internally from ownerListInvariant + uniquePredecessor, not
+required as hypotheses.
+-/
+theorem swapOwner_ownerListInvariant
+    (prevOwner oldOwner newOwner : Address) (s : ContractState)
+    (hNewNotZero : (newOwner != zeroAddress) = true)
+    (hNewNotSentinel : (newOwner != SENTINEL) = true)
+    (hNewFresh : (wordToAddress (s.storageMap 0 newOwner) == zeroAddress) = true)
+    (hOldNotZero : (oldOwner != zeroAddress) = true)
+    (hOldNotSentinel : (oldOwner != SENTINEL) = true)
+    (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == oldOwner) = true)
+    (hPreInv : ownerListInvariant s)
+    (hUniquePred : uniquePredecessor s)
+    (hPrevNZ : prevOwner ≠ zeroAddress)
+    (hZeroInert : next s zeroAddress = zeroAddress) :
+    let s' := ((OwnerManager.swapOwner prevOwner oldOwner newOwner).run s).snd
+    ownerListInvariant s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  grind [OwnerManager.swapOwner, OwnerManager.owners, OwnerManager.ownerCount]
+
+end Benchmark.Cases.Safe.OwnerManagerReach
diff --git a/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapEnforcesFeeAdjustedInvariant.lean b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapEnforcesFeeAdjustedInvariant.lean
new file mode 100644
index 00000000..ab80e3a9
--- /dev/null
+++ b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapEnforcesFeeAdjustedInvariant.lean
@@ -0,0 +1,28 @@
+import Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `applySwap` is only possible when the fee-adjusted product guard holds.
+-/
+theorem applySwap_enforces_fee_adjusted_invariant
+    (balance0 balance1 amount0In amount1In : Uint256) (s : ContractState)
+    (hInput : amount0In != 0 || amount1In != 0)
+    (hFee0 : mul balance0 1000 >= mul amount0In 3)
+    (hFee1 : mul balance1 1000 >= mul amount1In 3)
+    (hK : mul (sub (mul balance0 1000) (mul amount0In 3))
+        (sub (mul balance1 1000) (mul amount1In 3))
+        >= mul (mul (s.storage 0) (s.storage 1)) 1000000) :
+    let s' := ((PairFeeAdjustedSwap.applySwap balance0 balance1 amount0In amount1In).run s).snd
+    applySwap_enforces_fee_adjusted_invariant_spec balance0 balance1 amount0In amount1In s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold applySwap_enforces_fee_adjusted_invariant_spec
+  grind [PairFeeAdjustedSwap.applySwap, PairFeeAdjustedSwap.reserve0, PairFeeAdjustedSwap.reserve1]
+
+end Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap
diff --git a/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserve0.lean b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserve0.lean
new file mode 100644
index 00000000..083857db
--- /dev/null
+++ b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserve0.lean
@@ -0,0 +1,28 @@
+import Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `applySwap` stores the observed `balance0` as `reserve0`.
+-/
+theorem applySwap_sets_reserve0
+    (balance0 balance1 amount0In amount1In : Uint256) (s : ContractState)
+    (hInput : amount0In != 0 || amount1In != 0)
+    (hFee0 : mul balance0 1000 >= mul amount0In 3)
+    (hFee1 : mul balance1 1000 >= mul amount1In 3)
+    (hK : mul (sub (mul balance0 1000) (mul amount0In 3))
+        (sub (mul balance1 1000) (mul amount1In 3))
+        >= mul (mul (s.storage 0) (s.storage 1)) 1000000) :
+    let s' := ((PairFeeAdjustedSwap.applySwap balance0 balance1 amount0In amount1In).run s).snd
+    applySwap_sets_reserve0_spec balance0 s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold applySwap_sets_reserve0_spec
+  grind [PairFeeAdjustedSwap.applySwap, PairFeeAdjustedSwap.reserve0, PairFeeAdjustedSwap.reserve1]
+
+end Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap
diff --git a/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserve1.lean b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserve1.lean
new file mode 100644
index 00000000..9aecda24
--- /dev/null
+++ b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserve1.lean
@@ -0,0 +1,28 @@
+import Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `applySwap` stores the observed `balance1` as `reserve1`.
+-/
+theorem applySwap_sets_reserve1
+    (balance0 balance1 amount0In amount1In : Uint256) (s : ContractState)
+    (hInput : amount0In != 0 || amount1In != 0)
+    (hFee0 : mul balance0 1000 >= mul amount0In 3)
+    (hFee1 : mul balance1 1000 >= mul amount1In 3)
+    (hK : mul (sub (mul balance0 1000) (mul amount0In 3))
+        (sub (mul balance1 1000) (mul amount1In 3))
+        >= mul (mul (s.storage 0) (s.storage 1)) 1000000) :
+    let s' := ((PairFeeAdjustedSwap.applySwap balance0 balance1 amount0In amount1In).run s).snd
+    applySwap_sets_reserve1_spec balance1 s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold applySwap_sets_reserve1_spec
+  grind [PairFeeAdjustedSwap.applySwap, PairFeeAdjustedSwap.reserve0, PairFeeAdjustedSwap.reserve1]
+
+end Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap
diff --git a/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserveProduct.lean b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserveProduct.lean
new file mode 100644
index 00000000..fc2e3581
--- /dev/null
+++ b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserveProduct.lean
@@ -0,0 +1,28 @@
+import Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Executing `applySwap` makes the stored reserve product match the post-swap balances.
+-/
+theorem applySwap_sets_reserve_product
+    (balance0 balance1 amount0In amount1In : Uint256) (s : ContractState)
+    (hInput : amount0In != 0 || amount1In != 0)
+    (hFee0 : mul balance0 1000 >= mul amount0In 3)
+    (hFee1 : mul balance1 1000 >= mul amount1In 3)
+    (hK : mul (sub (mul balance0 1000) (mul amount0In 3))
+        (sub (mul balance1 1000) (mul amount1In 3))
+        >= mul (mul (s.storage 0) (s.storage 1)) 1000000) :
+    let s' := ((PairFeeAdjustedSwap.applySwap balance0 balance1 amount0In amount1In).run s).snd
+    applySwap_sets_reserve_product_spec balance0 balance1 s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold applySwap_sets_reserve_product_spec
+  grind [PairFeeAdjustedSwap.applySwap, PairFeeAdjustedSwap.reserve0, PairFeeAdjustedSwap.reserve1]
+
+end Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap
diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/BurnDecreasesSupply.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/BurnDecreasesSupply.lean
new file mode 100644
index 00000000..3e44fd6b
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/BurnDecreasesSupply.lean
@@ -0,0 +1,31 @@
+import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Successful burn decreases both sender balance and totalSupply.
+
+When the sender has sufficient balance (fromBalance >= amount), burning
+decreases balances[from] by amount and totalSupply by amount.
+-/
+theorem burn_decreases_supply
+    (holder : Address) (amount : Uint256) (s : ContractState)
+    (hFrom : (holder != zeroAddress) = true)
+    (hInit : s.storageMap 2 holder ≠ 0)
+    (hSufficient : s.storageMap 1 holder >= amount)
+    (hAmount64 : amount < UINT64_MOD)
+    (hFromBal64 : s.storageMap 1 holder < UINT64_MOD)
+    (hSupply64 : s.storage 0 < UINT64_MOD) :
+    let s' := ((ERC7984.burn holder amount).run s).snd
+    burn_decreases_supply_spec holder amount s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold burn_decreases_supply_spec
+  grind [ERC7984.burn, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators]
+
+end Benchmark.Cases.Zama.ERC7984ConfidentialToken
diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/BurnInsufficient.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/BurnInsufficient.lean
new file mode 100644
index 00000000..df425975
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/BurnInsufficient.lean
@@ -0,0 +1,33 @@
+import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+When the holder has insufficient balance, burn silently burns nothing.
+
+If `balances[holder] < amount`, then both the holder's balance and
+totalSupply are unchanged. This mirrors the FHE.select pattern used
+in transfer: the balance comparison cannot cause a revert or leak
+information; it only chooses between transferring `amount` and `0`.
+-/
+theorem burn_insufficient
+    (holder : Address) (amount : Uint256) (s : ContractState)
+    (hFrom : (holder != zeroAddress) = true)
+    (hInit : s.storageMap 2 holder ≠ 0)
+    (hInsufficient : ¬(s.storageMap 1 holder >= amount))
+    (hAmount64 : amount < UINT64_MOD)
+    (hFromBal64 : s.storageMap 1 holder < UINT64_MOD)
+    (hSupply64 : s.storage 0 < UINT64_MOD) :
+    let s' := ((ERC7984.burn holder amount).run s).snd
+    burn_insufficient_spec holder amount s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold burn_insufficient_spec
+  grind [ERC7984.burn, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators]
+
+end Benchmark.Cases.Zama.ERC7984ConfidentialToken
diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/MintIncreasesSupply.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/MintIncreasesSupply.lean
new file mode 100644
index 00000000..04e58939
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/MintIncreasesSupply.lean
@@ -0,0 +1,31 @@
+import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Successful mint increases totalSupply and receiver balance by amount.
+
+When totalSupply + amount does not overflow uint64 (tryIncrease64 succeeds),
+minting produces exactly `amount` new tokens: totalSupply increases by amount
+and balances[to] increases by amount (mod 2^64).
+-/
+theorem mint_increases_supply
+    (to : Address) (amount : Uint256) (s : ContractState)
+    (hTo : (to != zeroAddress) = true)
+    (hNoOverflow : (tryIncrease64 (s.storage 0) amount).1 = true)
+    (hAmount64 : amount < UINT64_MOD)
+    (hSupply64 : s.storage 0 < UINT64_MOD)
+    (hToBal64 : s.storageMap 1 to < UINT64_MOD) :
+    let s' := ((ERC7984.mint to amount).run s).snd
+    mint_increases_supply_spec to amount s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold mint_increases_supply_spec
+  grind [ERC7984.mint, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators]
+
+end Benchmark.Cases.Zama.ERC7984ConfidentialToken
diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/MintOverflowProtection.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/MintOverflowProtection.lean
new file mode 100644
index 00000000..89c22139
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/MintOverflowProtection.lean
@@ -0,0 +1,33 @@
+import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Mint overflow protection: when totalSupply + amount overflows uint64,
+no tokens are minted.
+
+FHESafeMath.tryIncrease detects overflow by checking whether
+(oldValue + delta) mod 2^64 >= oldValue. On overflow, the wrapped sum
+is less than oldValue, so tryIncrease returns (false, oldValue).
+Then FHE.select picks 0 as the transferred amount.
+-/
+theorem mint_overflow_protection
+    (to : Address) (amount : Uint256) (s : ContractState)
+    (hTo : (to != zeroAddress) = true)
+    (hOverflow : (tryIncrease64 (s.storage 0) amount).1 = false)
+    (hAmount64 : amount < UINT64_MOD)
+    (hSupply64 : s.storage 0 < UINT64_MOD)
+    (hToBal64 : s.storageMap 1 to < UINT64_MOD) :
+    let s' := ((ERC7984.mint to amount).run s).snd
+    mint_overflow_protection_spec to amount s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold mint_overflow_protection_spec
+  grind [ERC7984.mint, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators]
+
+end Benchmark.Cases.Zama.ERC7984ConfidentialToken
diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/SetOperatorUpdates.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/SetOperatorUpdates.lean
new file mode 100644
index 00000000..64e8003c
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/SetOperatorUpdates.lean
@@ -0,0 +1,27 @@
+import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+setOperator(operator, expiry) writes `expiry` into `_operators[msg.sender][operator]`
+and leaves all other operator entries unchanged.
+
+This is the functional-correctness property for the operator registration
+function: the caller can set an expiry for a specific operator, but cannot
+affect authorizations granted by other holders or to other operators.
+-/
+theorem setOperator_updates
+    (operator : Address) (expiry : Uint256) (s : ContractState) :
+    let s' := ((ERC7984.setOperator operator expiry).run s).snd
+    setOperator_updates_spec s.sender operator expiry s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold setOperator_updates_spec
+  grind [ERC7984.setOperator, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators]
+
+end Benchmark.Cases.Zama.ERC7984ConfidentialToken
diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferConservation.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferConservation.lean
new file mode 100644
index 00000000..6dfce253
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferConservation.lean
@@ -0,0 +1,35 @@
+import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Transfer conserves the sum of sender and receiver balances.
+
+After transfer(from, to, amount), `balances[from] + balances[to]` is unchanged.
+This holds regardless of whether the sender has sufficient balance:
+- Sufficient: from loses `amount`, to gains `amount` → sum preserved
+- Insufficient: both balances unchanged → sum trivially preserved
+-/
+theorem transfer_conservation
+    (sender recipient : Address) (amount : Uint256) (s : ContractState)
+    (hFrom : (sender != zeroAddress) = true)
+    (hTo : (recipient != zeroAddress) = true)
+    (hInit : s.storageMap 2 sender ≠ 0)
+    (hDistinct : sender ≠ recipient)
+    (hAmount64 : amount < UINT64_MOD)
+    (hFromBal64 : s.storageMap 1 sender < UINT64_MOD)
+    (hToBal64 : s.storageMap 1 recipient < UINT64_MOD)
+    (hToNoWrap : s.storageMap 1 recipient + amount < UINT64_MOD) :
+    let s' := ((ERC7984.transfer sender recipient amount).run s).snd
+    transfer_conservation_spec sender recipient s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold transfer_conservation_spec
+  grind [ERC7984.transfer, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators]
+
+end Benchmark.Cases.Zama.ERC7984ConfidentialToken
diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferFromConservation.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferFromConservation.lean
new file mode 100644
index 00000000..af1b6a27
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferFromConservation.lean
@@ -0,0 +1,40 @@
+import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Operator-gated transferFrom preserves balance conservation.
+
+When the caller is authorized (either `holder == msg.sender` or
+`block.timestamp <= operators[holder][msg.sender]`), transferFrom
+preserves the sum `balances[holder] + balances[recipient]`.
+
+This ensures that delegating transfer authority via the operator
+pattern does not allow creation or destruction of tokens.
+-/
+theorem transferFrom_conservation
+    (holder recipient : Address) (amount blockTimestamp : Uint256)
+    (s : ContractState)
+    (hFrom : (holder != zeroAddress) = true)
+    (hTo : (recipient != zeroAddress) = true)
+    (hInit : s.storageMap 2 holder ≠ 0)
+    (hDistinct : holder ≠ recipient)
+    (hAuthorized :
+      holder == s.sender ∨ blockTimestamp <= s.storageMap2 3 holder s.sender)
+    (hAmount64 : amount < UINT64_MOD)
+    (hHolderBal64 : s.storageMap 1 holder < UINT64_MOD)
+    (hRecipientBal64 : s.storageMap 1 recipient < UINT64_MOD)
+    (hToNoWrap : s.storageMap 1 recipient + amount < UINT64_MOD) :
+    let s' := ((ERC7984.transferFrom holder recipient amount blockTimestamp).run s).snd
+    transferFrom_conservation_spec holder recipient s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold transferFrom_conservation_spec
+  grind [ERC7984.transferFrom, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators]
+
+end Benchmark.Cases.Zama.ERC7984ConfidentialToken
diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferInsufficient.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferInsufficient.lean
new file mode 100644
index 00000000..f3ca6c04
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferInsufficient.lean
@@ -0,0 +1,34 @@
+import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+When the sender has insufficient balance, no tokens move.
+
+If `balances[from] < amount`, then both balances are unchanged.
+This is the defining semantic difference from ERC-20: insufficient
+balance causes a silent 0-transfer (via FHE.select) instead of a revert.
+-/
+theorem transfer_insufficient
+    (sender recipient : Address) (amount : Uint256) (s : ContractState)
+    (hFrom : (sender != zeroAddress) = true)
+    (hTo : (recipient != zeroAddress) = true)
+    (hInit : s.storageMap 2 sender ≠ 0)
+    (hDistinct : sender ≠ recipient)
+    (hInsufficient : ¬(s.storageMap 1 sender >= amount))
+    (hAmount64 : amount < UINT64_MOD)
+    (hFromBal64 : s.storageMap 1 sender < UINT64_MOD)
+    (hToBal64 : s.storageMap 1 recipient < UINT64_MOD) :
+    let s' := ((ERC7984.transfer sender recipient amount).run s).snd
+    transfer_insufficient_spec sender recipient amount s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold transfer_insufficient_spec
+  grind [ERC7984.transfer, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators]
+
+end Benchmark.Cases.Zama.ERC7984ConfidentialToken
diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferNoBalanceRevert.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferNoBalanceRevert.lean
new file mode 100644
index 00000000..f90273f7
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferNoBalanceRevert.lean
@@ -0,0 +1,39 @@
+import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Transfer never reverts based on balance sufficiency.
+
+Given that all plaintext preconditions hold (non-zero addresses,
+initialized sender balance), the transfer always succeeds — it
+returns `ContractResult.success`, never `ContractResult.revert`.
+
+This is the contract-level non-leakage invariant for ERC-7984:
+an on-chain observer cannot learn whether the sender had sufficient
+balance by checking if the transaction reverted.
+
+Note: NO hypothesis about `fromBalance >= amount` is provided.
+The theorem must hold for BOTH sufficient and insufficient balances.
+-/
+theorem transfer_no_balance_revert
+    (sender recipient : Address) (amount : Uint256) (s : ContractState)
+    (hFrom : (sender != zeroAddress) = true)
+    (hTo : (recipient != zeroAddress) = true)
+    (hInit : s.storageMap 2 sender ≠ 0)
+    (hDistinct : sender ≠ recipient)
+    (hAmount64 : amount < UINT64_MOD)
+    (hFromBal64 : s.storageMap 1 sender < UINT64_MOD)
+    (hToBal64 : s.storageMap 1 recipient < UINT64_MOD) :
+    transfer_no_balance_revert_spec sender recipient amount s := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold transfer_no_balance_revert_spec
+  grind
+
+end Benchmark.Cases.Zama.ERC7984ConfidentialToken
diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferPreservesSupply.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferPreservesSupply.lean
new file mode 100644
index 00000000..d6b43503
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferPreservesSupply.lean
@@ -0,0 +1,32 @@
+import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+Transfer does not modify totalSupply.
+
+The transfer function only writes to balances (storageMap slot 1) and
+balanceInitialized (storageMap slot 2). It never touches slot 0 (totalSupply).
+Only mint and burn paths modify totalSupply.
+-/
+theorem transfer_preserves_supply
+    (sender recipient : Address) (amount : Uint256) (s : ContractState)
+    (hFrom : (sender != zeroAddress) = true)
+    (hTo : (recipient != zeroAddress) = true)
+    (hInit : s.storageMap 2 sender ≠ 0)
+    (hAmount64 : amount < UINT64_MOD)
+    (hFromBal64 : s.storageMap 1 sender < UINT64_MOD)
+    (hToBal64 : s.storageMap 1 recipient < UINT64_MOD) :
+    let s' := ((ERC7984.transfer sender recipient amount).run s).snd
+    transfer_preserves_supply_spec s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold transfer_preserves_supply_spec
+  grind [ERC7984.transfer, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators]
+
+end Benchmark.Cases.Zama.ERC7984ConfidentialToken
diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferSufficient.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferSufficient.lean
new file mode 100644
index 00000000..f0c775ab
--- /dev/null
+++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferSufficient.lean
@@ -0,0 +1,34 @@
+import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs
+import Benchmark.Grindset
+
+namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken
+
+open Verity
+open Verity.EVM.Uint256
+
+/--
+When the sender has sufficient balance, transfer moves exactly `amount` tokens.
+
+If `balances[from] >= amount`, then:
+- `balances[from]` decreases by `amount`
+- `balances[to]` increases by `amount` (mod 2^64)
+-/
+theorem transfer_sufficient
+    (sender recipient : Address) (amount : Uint256) (s : ContractState)
+    (hFrom : (sender != zeroAddress) = true)
+    (hTo : (recipient != zeroAddress) = true)
+    (hInit : s.storageMap 2 sender ≠ 0)
+    (hDistinct : sender ≠ recipient)
+    (hSufficient : s.storageMap 1 sender >= amount)
+    (hAmount64 : amount < UINT64_MOD)
+    (hFromBal64 : s.storageMap 1 sender < UINT64_MOD)
+    (hToBal64 : s.storageMap 1 recipient < UINT64_MOD) :
+    let s' := ((ERC7984.transfer sender recipient amount).run s).snd
+    transfer_sufficient_spec sender recipient amount s s' := by
+  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md.
+  -- Try `grind` with contract symbol hints; fall back to `simp` /
+  -- `by_cases` if grind leaves goals. Use `grind?` for hints.
+  unfold transfer_sufficient_spec
+  grind [ERC7984.transfer, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators]
+
+end Benchmark.Cases.Zama.ERC7984ConfidentialToken
diff --git a/Benchmark/Grindset.lean b/Benchmark/Grindset.lean
new file mode 100644
index 00000000..fed535f1
--- /dev/null
+++ b/Benchmark/Grindset.lean
@@ -0,0 +1,28 @@
+import Benchmark.Grindset.Invariants
+import Benchmark.Grindset.Reach
+import Benchmark.Grindset.Attr
+import Benchmark.Grindset.Monad
+import Benchmark.Grindset.Core
+import Benchmark.Grindset.Tests
+import Benchmark.Grindset.Arith
+
+/-!
+# Benchmark.Grindset — umbrella module
+
+Single entry point for the Verity grindset. Downstream proofs can write
+`import Benchmark.Grindset` and immediately use `grind` to discharge
+slot-write, monad-bind, and spec-unfolding obligations.
+
+Contents:
+- `Grindset.Attr` (S1): `grind_norm` simp set attribute.
+- `Grindset.Monad` (S1): `Verity.bind` / `ContractResult.snd` / `Contract.run`
+  normalization lemmas.
+- `Grindset.Core` (S1): storage + mapping operational lemmas.
+- `Grindset.Tests` (S1): three demo proofs closed by `grind`.
+- `Grindset.Invariants` (A1): 118 `@[grind =] / @[grind →] / @[grind]`
+  tagged invariant lemmas across all benchmark contracts.
+- `Grindset.Reach` (A3): reachability lemma pack and the
+  `verity_reach_grind` tactic for `safe/owner_manager_reach` chain proofs.
+- `Grindset.Arith` (A4): arithmetic grind pack for `lido/vaulthub_locked`
+  — ceilDiv unfolding, sandwich, monotonicity, Uint256↔Nat wrappers.
+-/
diff --git a/Benchmark/Grindset/Arith.lean b/Benchmark/Grindset/Arith.lean
new file mode 100644
index 00000000..1bed835e
--- /dev/null
+++ b/Benchmark/Grindset/Arith.lean
@@ -0,0 +1,236 @@
+/-
+  Benchmark.Grindset.Arith — arithmetic grind pack for Lido VaulthubLocked.
+
+  Mission A4: provide `@[grind]` / `@[simp]` / `@[grind_norm]`-tagged lemmas
+  that help `grind` and `omega` close the three supporting arithmetic obligations
+  in the `lido/vaulthub_locked` case:
+
+    1. `ceildiv_sandwich_spec`  — ceilDiv(x,d) * d ≥ x
+    2. `shares_conversion_monotone_spec` — getPooledEthBySharesRoundUp is monotone
+    3. `locked_funds_solvency_spec` — solvency after syncLocked
+
+  Lemma inventory:
+    • `mul_val_of_no_overflow` — Uint256 mul → Nat mul under overflow guard
+    • `sub_val_of_le` — Uint256 sub → Nat sub when b ≤ a
+    • `div_val` — Uint256 div → Nat div when b ≠ 0
+    • `add_val_of_no_overflow` — Uint256 add → Nat add under overflow guard
+    • `ceilDiv_val_eq` — ceilDiv a b = (a.val + b.val - 1) / b.val (Nat level)
+    • `ceilDiv_le_numerator` — ceilDiv a b ≤ a (Nat-val level)
+    • `ceilDiv_mul_ge` — ceilDiv(x,d) * d ≥ x (the sandwich, key lemma)
+    • `ceilDiv_monotone` — a ≥ b → ceilDiv a d ≥ ceilDiv b d
+
+  All lemmas carry `@[grind_norm, simp]` so that downstream proofs can
+  write `simp only [grind_norm, <spec>]; grind` or `omega`.
+
+  Status: zero `sorry`, zero new axioms.
+-/
+
+import Benchmark.Cases.Lido.VaulthubLocked.Specs
+import Benchmark.Grindset.Attr
+
+namespace Benchmark.Grindset.Arith
+
+open Verity
+open Benchmark.Cases.Lido.VaulthubLocked
+
+/-! ## Uint256 → Nat wrapper lemmas -/
+
+/-- Uint256 multiplication reduces to Nat multiplication when no overflow. -/
+@[grind_norm, simp]
+theorem mul_val_of_no_overflow (a b : Uint256)
+    (h : a.val * b.val < Verity.Core.Uint256.modulus) :
+    (Verity.EVM.Uint256.mul a b).val = a.val * b.val := by
+  simp [HMul.hMul, Verity.Core.Uint256.mul, Verity.Core.Uint256.ofNat]
+  exact Nat.mod_eq_of_lt h
+
+/-- Uint256 subtraction reduces to Nat subtraction when b ≤ a. -/
+@[grind_norm, simp]
+theorem sub_val_of_le (a b : Uint256)
+    (h : b.val ≤ a.val) :
+    (Verity.EVM.Uint256.sub a b).val = a.val - b.val := by
+  have hlt : a.val - b.val < Verity.Core.Uint256.modulus :=
+    Nat.lt_of_le_of_lt (Nat.sub_le _ _) a.isLt
+  simp [HSub.hSub, Verity.Core.Uint256.sub, h, Verity.Core.Uint256.ofNat]
+  exact Nat.mod_eq_of_lt hlt
+
+/-- Uint256 division reduces to Nat division when divisor is nonzero. -/
+@[grind_norm, simp]
+theorem div_val (a b : Uint256) (hb : b.val ≠ 0) :
+    (Verity.EVM.Uint256.div a b).val = a.val / b.val := by
+  have hlt : a.val / b.val < Verity.Core.Uint256.modulus :=
+    Nat.lt_of_le_of_lt (Nat.div_le_self _ _) a.isLt
+  simp [HDiv.hDiv, Verity.Core.Uint256.div, hb, Verity.Core.Uint256.ofNat]
+  exact Nat.mod_eq_of_lt hlt
+
+/-- Uint256 addition reduces to Nat addition when no overflow. -/
+@[grind_norm, simp]
+theorem add_val_of_no_overflow (a b : Uint256)
+    (h : a.val + b.val < Verity.Core.Uint256.modulus) :
+    (Verity.EVM.Uint256.add a b).val = a.val + b.val := by
+  simp [HAdd.hAdd, Verity.Core.Uint256.add, Verity.Core.Uint256.ofNat]
+  exact Nat.mod_eq_of_lt h
+
+/-! ## ceilDiv val-level unfolding -/
+
+/-- Natural-number identity: for a > 0, b > 0, (a-1)/b + 1 = (a+b-1)/b. -/
+private theorem ceildiv_identity (a b : Nat) (ha : a > 0) (hb : b > 0) :
+    (a - 1) / b + 1 = (a + b - 1) / b := by
+  have h : a + b - 1 = (a - 1) + b := by omega
+  rw [h, Nat.add_div_right _ hb]
+
+/-- Nat-level: (a+b-1)/b ≤ a when b ≥ 1. -/
+private theorem ceilDiv_nat_le (a b : Nat) (hb : b ≥ 1) :
+    (a + b - 1) / b ≤ a := by
+  by_cases ha : a = 0
+  · subst ha; simp
+    right; exact Nat.sub_lt (by omega) (by decide)
+  · have haPos : a > 0 := Nat.pos_of_ne_zero ha
+    have hRw : a + b - 1 = (a - 1) + b := by omega
+    rw [hRw, Nat.add_div_right _ (by omega : b > 0)]
+    have := Nat.div_le_self (a - 1) b; omega
+
+/-- ceilDiv(a,b).val = (a.val + b.val - 1) / b.val when b > 0. -/
+@[grind_norm, simp]
+theorem ceilDiv_val_eq (a b : Uint256) (hb : b.val > 0) :
+    (ceilDiv a b).val = (a.val + b.val - 1) / b.val := by
+  by_cases ha : a.val = 0
+  · -- a = 0 case
+    have haEq : a = 0 := Verity.Core.Uint256.ext (by simp [ha, Verity.Core.Uint256.val_zero])
+    rw [haEq]
+    simp only [ceilDiv, ↓reduceIte, Verity.Core.Uint256.val_zero, Nat.zero_add]
+    exact (Nat.div_eq_of_lt (by omega)).symm
+  · -- a > 0 case
+    have haPos : a.val > 0 := Nat.pos_of_ne_zero ha
+    have haNe : a ≠ 0 := by
+      intro h; rw [h] at haPos; simp [Verity.Core.Uint256.val_zero] at haPos
+    simp only [ceilDiv, haNe, ↓reduceIte]
+    -- sub a 1
+    have h1le : (1 : Uint256).val ≤ a.val := by
+      simp [Verity.Core.Uint256.val_one]; omega
+    have hSubVal : (Verity.EVM.Uint256.sub a 1).val = a.val - 1 := by
+      have := Verity.Core.Uint256.sub_eq_of_le h1le
+      simp [Verity.Core.Uint256.val_one] at this
+      exact this
+    -- div (sub a 1) b
+    have hbne : b.val ≠ 0 := by omega
+    have hDivVal : (Verity.EVM.Uint256.div (Verity.EVM.Uint256.sub a 1) b).val = (a.val - 1) / b.val := by
+      simp only [HDiv.hDiv, Verity.Core.Uint256.div, hbne, ↓reduceIte, Verity.Core.Uint256.ofNat, hSubVal]
+      have hDivLt : (a.val - 1) / b.val < Verity.Core.Uint256.modulus := by
+        calc (a.val - 1) / b.val ≤ a.val - 1 := Nat.div_le_self _ _
+          _ < a.val := by omega
+          _ < Verity.Core.Uint256.modulus := a.isLt
+      exact Nat.mod_eq_of_lt hDivLt
+    -- add (div ...) 1
+    have hAddLt : (a.val - 1) / b.val + 1 < Verity.Core.Uint256.modulus := by
+      have hCeil := ceilDiv_nat_le a.val b.val (by omega)
+      calc (a.val - 1) / b.val + 1
+          ≤ a.val := by rw [ceildiv_identity a.val b.val haPos hb]; exact hCeil
+        _ < Verity.Core.Uint256.modulus := a.isLt
+    simp only [HAdd.hAdd, Verity.Core.Uint256.add, Verity.Core.Uint256.ofNat, hDivVal,
+               Verity.Core.Uint256.val_one]
+    rw [Nat.mod_eq_of_lt hAddLt]
+    exact ceildiv_identity a.val b.val haPos hb
+
+/-- ceilDiv(a,b) ≤ a (Nat val level) when b ≥ 1. -/
+@[grind_norm, simp]
+theorem ceilDiv_le_numerator (a b : Uint256) (hb : b.val ≥ 1) :
+    (ceilDiv a b).val ≤ a.val := by
+  rw [ceilDiv_val_eq a b (by omega)]
+  exact ceilDiv_nat_le a.val b.val hb
+
+/-! ## The sandwich: ceilDiv(x,d) * d ≥ x -/
+
+/-- ceilDiv(x,d) * d ≥ x when the product does not overflow. Core sandwich lemma. -/
+@[grind_norm, simp]
+theorem ceilDiv_mul_ge (x d : Uint256) (hd : d.val > 0)
+    (hNoOverflow : (ceilDiv x d).val * d.val < Verity.Core.Uint256.modulus) :
+    (Verity.EVM.Uint256.mul (ceilDiv x d) d).val ≥ x.val := by
+  have hMulEq : (Verity.EVM.Uint256.mul (ceilDiv x d) d).val = (ceilDiv x d).val * d.val := by
+    simp [HMul.hMul, Verity.Core.Uint256.mul, Verity.Core.Uint256.ofNat]
+    exact Nat.mod_eq_of_lt hNoOverflow
+  rw [hMulEq, ceilDiv_val_eq x d hd]
+  let q := (x.val + d.val - 1) / d.val
+  let r := (x.val + d.val - 1) % d.val
+  show x.val ≤ q * d.val
+  have hEuclid : d.val * q + r = x.val + d.val - 1 := Nat.div_add_mod ..
+  have hRem : r < d.val := Nat.mod_lt _ hd
+  have hComm : q * d.val = d.val * q := Nat.mul_comm q d.val
+  omega
+
+/-! ## Monotonicity of ceilDiv in the numerator -/
+
+/-- ceilDiv is monotone in the numerator: a ≥ b → ceilDiv a d ≥ ceilDiv b d. -/
+@[grind_norm, simp]
+theorem ceilDiv_monotone (a b d : Uint256) (hd : d.val > 0)
+    (hab : a.val ≥ b.val) :
+    (ceilDiv a d).val ≥ (ceilDiv b d).val := by
+  rw [ceilDiv_val_eq a d hd, ceilDiv_val_eq b d hd]
+  exact Nat.div_le_div_right (by omega)
+
+/-! ## Spec-level convenience lemmas -/
+
+/-- ceildiv_sandwich_spec stated directly for grind consumption. -/
+@[grind_norm, simp]
+theorem ceildiv_sandwich_spec_holds (x d : Uint256)
+    (hd : d > 0)
+    (hNoOverflow : (ceilDiv x d).val * d.val < Verity.Core.Uint256.modulus) :
+    ceildiv_sandwich_spec x d := by
+  unfold ceildiv_sandwich_spec
+  intro _ _
+  simp [Verity.Core.Uint256.le_def]
+  exact ceilDiv_mul_ge x d (by simp [Verity.Core.Uint256.lt_def] at hd; exact hd) hNoOverflow
+
+/-- shares_conversion_monotone_spec stated directly for grind consumption. -/
+@[grind_norm, simp]
+theorem shares_conversion_monotone_spec_holds
+    (a b totalPooledEther totalShares : Uint256)
+    (hTS : totalShares.val > 0)
+    (hNoOverflow : a.val * totalPooledEther.val < Verity.Core.Uint256.modulus) :
+    shares_conversion_monotone_spec a b totalPooledEther totalShares := by
+  unfold shares_conversion_monotone_spec
+  intro hab hNoOv
+  unfold getPooledEthBySharesRoundUp
+  simp [Verity.Core.Uint256.le_def]
+  have habVal : b.val ≤ a.val := by
+    simp [Verity.Core.Uint256.le_def] at hab; exact hab
+  have hBNoOverflow : b.val * totalPooledEther.val < Verity.Core.Uint256.modulus :=
+    Nat.lt_of_le_of_lt (Nat.mul_le_mul_right _ habVal) hNoOverflow
+  have hMulA : (Verity.EVM.Uint256.mul a totalPooledEther).val = a.val * totalPooledEther.val := by
+    simp [HMul.hMul, Verity.Core.Uint256.mul, Verity.Core.Uint256.ofNat]
+    exact Nat.mod_eq_of_lt hNoOverflow
+  have hMulB : (Verity.EVM.Uint256.mul b totalPooledEther).val = b.val * totalPooledEther.val := by
+    simp [HMul.hMul, Verity.Core.Uint256.mul, Verity.Core.Uint256.ofNat]
+    exact Nat.mod_eq_of_lt hBNoOverflow
+  rw [ceilDiv_val_eq (Verity.EVM.Uint256.mul a totalPooledEther) totalShares hTS,
+      ceilDiv_val_eq (Verity.EVM.Uint256.mul b totalPooledEther) totalShares hTS,
+      hMulA, hMulB]
+  exact Nat.div_le_div_right (by
+    have : b.val * totalPooledEther.val ≤ a.val * totalPooledEther.val :=
+      Nat.mul_le_mul_right _ habVal
+    omega)
+
+/-! ## Demo theorems -/
+
+/-- Demo: ceildiv_sandwich_spec is closable with the grindset. -/
+theorem demo_ceildiv_sandwich (x d : Uint256)
+    (hd : d > 0)
+    (hNoOverflow : (ceilDiv x d).val * d.val < Verity.Core.Uint256.modulus) :
+    ceildiv_sandwich_spec x d :=
+  ceildiv_sandwich_spec_holds x d hd hNoOverflow
+
+/-- Demo: shares_conversion_monotone_spec is closable with the grindset. -/
+theorem demo_shares_conversion_monotone
+    (a b totalPooledEther totalShares : Uint256)
+    (hTS : totalShares.val > 0)
+    (hNoOverflow : a.val * totalPooledEther.val < Verity.Core.Uint256.modulus) :
+    shares_conversion_monotone_spec a b totalPooledEther totalShares :=
+  shares_conversion_monotone_spec_holds a b totalPooledEther totalShares hTS hNoOverflow
+
+/-- Demo: ceilDiv_mul_ge directly yields the sandwich inequality. -/
+theorem demo_sandwich_direct (x d : Uint256)
+    (hd : d.val > 0)
+    (hNoOverflow : (ceilDiv x d).val * d.val < Verity.Core.Uint256.modulus) :
+    (Verity.EVM.Uint256.mul (ceilDiv x d) d).val ≥ x.val :=
+  ceilDiv_mul_ge x d hd hNoOverflow
+
+end Benchmark.Grindset.Arith
diff --git a/Benchmark/Grindset/Attr.lean b/Benchmark/Grindset/Attr.lean
new file mode 100644
index 00000000..0f272e88
--- /dev/null
+++ b/Benchmark/Grindset/Attr.lean
@@ -0,0 +1,26 @@
+/-
+  Benchmark.Grindset.Attr — registers the `grind_norm` simp attribute.
+
+  Kept in a separate file because Lean 4 does not allow using an attribute in
+  the same file where it is registered.
+-/
+
+import Lean.Meta.Tactic.Simp.SimpTheorems
+import Lean.Meta.Tactic.Simp.RegisterCommand
+
+/-- Simp set for the Verity grindset. Unfolds the `Contract` monad
+    scaffolding (`bind`, `pure`, `Contract.run`, `ContractResult.snd`,
+    `ContractResult.fst`) and the primitive `*_run` reductions so that a
+    benchmark task goal of shape
+
+      ((Contract.f args).run s).snd.storage n = v
+
+    collapses to plain record-update reasoning over `s`. Usage:
+
+    ```
+    simp only [grind_norm]
+    ```
+
+    Members are registered across `Benchmark.Grindset.Monad` and
+    `Benchmark.Grindset.Core`. -/
+register_simp_attr grind_norm
diff --git a/Benchmark/Grindset/Core.lean b/Benchmark/Grindset/Core.lean
new file mode 100644
index 00000000..e61e1771
--- /dev/null
+++ b/Benchmark/Grindset/Core.lean
@@ -0,0 +1,214 @@
+/-
+  Benchmark.Grindset.Core — operational lemmas tagged for `grind`.
+
+  The lemmas here are the stock facts needed to close a slot-write /
+  spec-unfolding obligation in one line once the monadic scaffolding has been
+  collapsed (see `Benchmark.Grindset.Monad`). They rewrite the shape
+
+    { s with storage := fun k => if k == slot then v else s.storage k }.storage n
+
+  into either `v` (when `n = slot`) or `s.storage n` (when `n ≠ slot`). The
+  same pattern is covered for `storageMap`, `storageAddr`, and the mapping
+  variants.
+
+  Every lemma in this module carries both `@[simp]` and `@[grind_norm]`. A
+  couple of fully-ground forms also carry `@[grind =]`.
+
+  Status: zero `sorry`, zero new axioms.
+-/
+
+import Verity.Core
+import Benchmark.Grindset.Monad
+
+namespace Benchmark.Grindset
+
+open Verity
+
+/-! ## Uint256 slot storage -/
+
+/-- Reading the slot just written returns the written value. -/
+@[grind_norm, simp]
+theorem storage_setStorage_eq
+    (s : ContractState) (slot : Nat) (v : Uint256) :
+    ({ s with
+        storage := fun k => if k == slot then v else s.storage k } : ContractState).storage slot
+      = v := by
+  simp
+
+/-- Reading a different slot from a `setStorage`-style update ignores the
+    update. -/
+@[grind_norm, simp]
+theorem storage_setStorage_ne
+    (s : ContractState) (slot n : Nat) (v : Uint256) (h : n ≠ slot) :
+    ({ s with
+        storage := fun k => if k == slot then v else s.storage k } : ContractState).storage n
+      = s.storage n := by
+  have : (n == slot) = false := by
+    simpa [Nat.beq_eq_true_eq] using h
+  simp [this]
+
+/-! ## Address slot storage -/
+
+@[grind_norm, simp]
+theorem storageAddr_setStorageAddr_eq
+    (s : ContractState) (slot : Nat) (v : Address) :
+    ({ s with
+        storageAddr := fun k => if k == slot then v else s.storageAddr k } : ContractState).storageAddr slot
+      = v := by
+  simp
+
+@[grind_norm, simp]
+theorem storageAddr_setStorageAddr_ne
+    (s : ContractState) (slot n : Nat) (v : Address) (h : n ≠ slot) :
+    ({ s with
+        storageAddr := fun k => if k == slot then v else s.storageAddr k } : ContractState).storageAddr n
+      = s.storageAddr n := by
+  have : (n == slot) = false := by
+    simpa [Nat.beq_eq_true_eq] using h
+  simp [this]
+
+/-! ## Mapping storage (Address → Uint256) -/
+
+@[grind_norm, simp]
+theorem storageMap_setMapping_eq
+    (s : ContractState) (slot : Nat) (key : Address) (v : Uint256) :
+    ({ s with
+        storageMap := fun sl addr =>
+          if sl == slot && addr == key then v else s.storageMap sl addr,
+        knownAddresses := fun sl =>
+          if sl == slot then (s.knownAddresses sl).insert key
+          else s.knownAddresses sl } : ContractState).storageMap slot key
+      = v := by
+  simp
+
+/-- Writing `setMapping` at `(slot, key)` and reading the same slot at a
+    different key yields the pre-state value at that key. -/
+@[grind_norm, simp]
+theorem storageMap_setMapping_ne_key
+    (s : ContractState) (slot : Nat) (key key' : Address) (v : Uint256)
+    (h : key' ≠ key) :
+    ({ s with
+        storageMap := fun sl addr =>
+          if sl == slot && addr == key then v else s.storageMap sl addr,
+        knownAddresses := fun sl =>
+          if sl == slot then (s.knownAddresses sl).insert key
+          else s.knownAddresses sl } : ContractState).storageMap slot key'
+      = s.storageMap slot key' := by
+  have : (key' == key) = false := by
+    simpa [beq_iff_eq] using h
+  simp [this]
+
+@[grind_norm, simp]
+theorem storageMap_setMapping_ne_slot
+    (s : ContractState) (slot n : Nat) (key key' : Address) (v : Uint256)
+    (h : n ≠ slot) :
+    ({ s with
+        storageMap := fun sl addr =>
+          if sl == slot && addr == key then v else s.storageMap sl addr,
+        knownAddresses := fun sl =>
+          if sl == slot then (s.knownAddresses sl).insert key
+          else s.knownAddresses sl } : ContractState).storageMap n key'
+      = s.storageMap n key' := by
+  have : (n == slot) = false := by
+    simpa [Nat.beq_eq_true_eq] using h
+  simp [this]
+
+/-!
+## Specialised helper for the "set-mapping-under-sender" pattern
+
+Every bench task that uses a mapping keyed by `s.sender` reads back the
+mapping at `s.sender` afterwards. This specialised rewrite collapses the
+pattern in a single step. -/
+
+@[grind_norm, simp]
+theorem storageMap_setMapping_sender_eq
+    (s : ContractState) (slot : Nat) (v : Uint256) :
+    ({ s with
+        storageMap := fun sl addr =>
+          if sl == slot && addr == s.sender then v else s.storageMap sl addr,
+        knownAddresses := fun sl =>
+          if sl == slot then (s.knownAddresses sl).insert s.sender
+          else s.knownAddresses sl } : ContractState).storageMap slot s.sender
+      = v := by
+  simp
+
+/-!
+## `sender` is preserved by every primitive storage write.
+
+These are implicit record-update facts, but tagging them means `simp` does
+not have to fight the elaborator to see that the final state's `.sender`
+field is still the original `.sender`. -/
+
+@[grind_norm, simp]
+theorem sender_after_setStorage
+    (s : ContractState) (slot : Nat) (v : Uint256) :
+    ({ s with
+        storage := fun k => if k == slot then v else s.storage k } : ContractState).sender
+      = s.sender := rfl
+
+@[grind_norm, simp]
+theorem sender_after_setMapping
+    (s : ContractState) (slot : Nat) (key : Address) (v : Uint256) :
+    ({ s with
+        storageMap := fun sl addr =>
+          if sl == slot && addr == key then v else s.storageMap sl addr,
+        knownAddresses := fun sl =>
+          if sl == slot then (s.knownAddresses sl).insert key
+          else s.knownAddresses sl } : ContractState).sender
+      = s.sender := rfl
+
+@[grind_norm, simp]
+theorem sender_after_setStorageAddr
+    (s : ContractState) (slot : Nat) (v : Address) :
+    ({ s with
+        storageAddr := fun k => if k == slot then v else s.storageAddr k } : ContractState).sender
+      = s.sender := rfl
+
+/-!
+## Cross-type preservation — reading `storage` after a mapping write, etc.
+
+These are trivial by `rfl`, but they help `simp`/`grind` traverse
+multi-write contracts without getting lost in record syntax. -/
+
+@[grind_norm, simp]
+theorem storage_after_setMapping
+    (s : ContractState) (n slot : Nat) (key : Address) (v : Uint256) :
+    ({ s with
+        storageMap := fun sl addr =>
+          if sl == slot && addr == key then v else s.storageMap sl addr,
+        knownAddresses := fun sl =>
+          if sl == slot then (s.knownAddresses sl).insert key
+          else s.knownAddresses sl } : ContractState).storage n
+      = s.storage n := rfl
+
+@[grind_norm, simp]
+theorem storageMap_after_setStorage
+    (s : ContractState) (slot n : Nat) (v : Uint256) (addr : Address) :
+    ({ s with
+        storage := fun k => if k == slot then v else s.storage k } : ContractState).storageMap n addr
+      = s.storageMap n addr := rfl
+
+/-! ## `require` reductions tied to a hypothesis -/
+
+/-- When the condition of `require` is definitely `true`, the monadic step
+    reduces to `pure ()`. Useful for branch-heavy contracts where the
+    precondition fires a `require`. -/
+@[grind_norm, simp]
+theorem require_of_true_run (s : ContractState) (msg : String) :
+    (require true msg).run s = ContractResult.success () s := rfl
+
+@[grind_norm, simp]
+theorem require_of_false_run (s : ContractState) (msg : String) :
+    (require false msg).run s = ContractResult.revert msg s := rfl
+
+/-!
+## `StorageSlot` slot-projection equalities
+
+The macro-generated storage field identifiers (e.g. `SideEntrance.poolBalance`)
+are `StorageSlot`s whose `.slot` literal is the slot number. -/
+
+@[grind_norm, simp]
+theorem StorageSlot.slot_mk (n : Nat) :
+    ({ slot := n } : StorageSlot Uint256).slot = n := rfl
+
+end Benchmark.Grindset
diff --git a/Benchmark/Grindset/INVARIANTS_AUDIT.md b/Benchmark/Grindset/INVARIANTS_AUDIT.md
new file mode 100644
index 00000000..6fbee764
--- /dev/null
+++ b/Benchmark/Grindset/INVARIANTS_AUDIT.md
@@ -0,0 +1,431 @@
+# Mission A1 — Verity Invariants / Spec Helpers Grind Audit
+
+**Author:** grindset-a1-worker
+**Scope:** read-only audit of `Verity` library (`.lake/packages/verity/Verity/**`) and case-local
+`Benchmark/Cases/**/Specs.lean`. Goal: identify **invariant-style lemmas and domain predicates**
+worth exposing to the `grind` tactic via `attribute [grind …]`, complementary to sibling worker S1
+(who is tagging core operational primitives in `Benchmark/Grindset`).
+
+**Ground rules followed:**
+
+- No file under `.lake/packages/verity/**` was modified.
+- No `Benchmark/Cases/**/Proofs.lean` was opened.
+- `Benchmark/Cases/**/Specs.lean` **content** was not modified; tags are applied solely via
+  `attribute [grind …] Benchmark.Cases.…` in `Benchmark/Grindset/Invariants.lean`.
+- Grind is orthogonal to simp: tagging a `@[simp]` lemma with `[grind =]` is not a double-tag
+  conflict (they feed different automation pipes). However, we are conservative: for ubiquitous
+  already-simp lemmas whose shape is a trivial identity (e.g. `mem_def : a ∈ s ↔ a ∈ s.elements`)
+  we skip the extra `grind` tag because simp + basic grind reasoning already normalize them.
+
+## Legend
+
+| Attribute form | Meaning |
+|---|---|
+| `@[grind]` | Default bundle — equations as bidirectional rewrites, implications as match rules. Only safe for non-looping shapes. |
+| `@[grind =]` | Equation, bidirectional — good for LHS = RHS where neither side contains the other's head pattern. |
+| `@[grind →]` | Forward implication / directional — premise patterns match the hypotheses in the goal; conclusion is introduced. Use when backward direction would loop or introduces too many variables. |
+| `@[grind ←]` | Backward — conclusion drives matching (useful for existentials and disjunctions). |
+| `NOT TAGGED` | Deliberately left alone: E-match loop risk, overly specific preconditions, constant, or redundant with existing `@[simp]`. |
+
+## Executive summary
+
+Final numbers after the `lake build Benchmark.Grindset.Invariants` iteration loop. The initial
+candidate list was trimmed twice when grind's E-matcher rejected tags (either because hypotheses
+lacked matchable patterns, because the conclusion was a non-equality inequality incompatible
+with `[grind =]`, or because the equation's LHS didn't mention every bound parameter).
+
+| Bucket | Scanned | Candidates surfaced | Tagged in `Invariants.lean` | Deliberately rejected / dropped |
+|---|---|---|---|---|
+| Verity core (Uint256 / FiniteSet / Address / Semantics) | ~1100 lines | 17 | **2** | 15 (already `@[simp]` or trivial rfl) |
+| Verity Proofs.Stdlib.Math (ceil/floor div, wad, safe*) | 909 lines | 65 | **55** | 10 (commutativity → E-match loop traps; a handful of overly-specific shapes) |
+| Verity Proofs.Stdlib.ListSum | 161 lines | 7 | **4** | 3 (`map_sum_point_update/decrease/transfer_eq` — LHS of equation doesn't mention bound `delta`/`src`/`dst`; grind refuses to register. Use manually via `grind [map_sum_transfer_eq]`.) |
+| Verity Proofs.Stdlib.MappingAutomation | 371 lines | ~50 | **25** | ~25 (context-preservation lemmas covered or redundant; we cherry-pick the core shapes per mapping family) |
+| Verity Specs.Common / Specs.Common.Sum | ~470 lines | 5 | **2** | 3 (`sumBalances_insert_new`, `sumBalances_update_existing`, `balancesFinite_preserved_deposit` — fresh parameters not covered by pattern LHS; use manually) |
+| Case-local `Specs.lean` defs (predicates/accessors across 10 cases) | ~1200 lines | 22 definitions worth unfolding | **17** | 5 (loop risk — `acyclic`, `freshInList`, `reachable`, multi-branch `calculateBuyReserve/SellReserve`, `spotPrices`) |
+| **Totals** | | **~166 candidates** | **118 tagged** | **48 rejected / dropped** |
+
+**Tag-kind breakdown:** 49 × `[grind =]`, 48 × `[grind →]`, 21 × `[grind]`
+(plain — for δ-unfold on case `def`s and for the 4 mulDivDown inequality lemmas whose
+conclusions are `≤` / `<` rather than `=`).
+
+### Top 5 most impactful tagged invariants (by expected obligation coverage)
+
+1. **`Verity.Proofs.Stdlib.MappingAutomation.setMapping{,Uint,2}_getMapping{,Uint,2}_same`** —
+   store-load identity across all three mapping families (Addr→Uint256, Uint256→Uint256,
+   Addr→Addr→Uint256). Every case with an obligation of the form "after setting mapping[k] := v,
+   reading mapping[k] = v" reduces to one of these three shapes. All tagged `[grind =]`.
+2. **`Verity.Proofs.Stdlib.MappingAutomation.setMapping{,Uint,2}_getMapping{,Uint,2}_diff*`** —
+   cross-key non-interference. Paired with (1), these form the "mapping core" that drives the
+   bulk of post-write state reasoning. Tagged `[grind =]` (the `≠` antecedent lacks an extractable
+   pattern for `→`, but the conclusion still rewrites).
+3. **`Verity.Specs.Common.sumBalances_insert_existing` & `sumBalances_zero_of_all_zero`** —
+   the two sum-preservation identities whose LHS captures every bound parameter.
+   Directly usable by ERC20/ERC7984 balance-conservation obligations.
+4. **`Verity.Proofs.Stdlib.Math.mulDivUp_mul_ge` / `wDivUp_mul_ge`** — `a * b ≤ mulDivUp a b c * c`
+   and `a * WAD ≤ wDivUp a b * b`. The "ceiling multiplies back up" sandwich used by Lido's
+   `locked_funds_solvency_spec`, NexusMutual price-band monotonicity, and Morpho-style
+   collateralization. Tagged `[grind →]`.
+5. **Case-local `Benchmark.Cases.Safe.OwnerManagerReach.{next,isOwner,ownerListInvariant,isChain,inListReachable}`** —
+   all tagged plain `[grind]` so grind unfolds them opportunistically. Safe/OwnerManager proofs
+   hinge on unfolding `next` to a `storageMap 0 a` read and peeling `isChain`/`ownerListInvariant`.
+   Without these, grind cannot see the reachability structure.
+
+---
+
+## Part I — Verity core library (read-only)
+
+### I.1 `Verity/Core/Uint256.lean`
+
+Almost every algebraic lemma (`add_comm`, `add_assoc`, `mul_comm`, `mul_one`, `sub_self`,
+`sub_add_cancel_left`, `zero_add`, …) is already `@[simp]`. Tagging them with `grind` again would
+be redundant noise. **Skipped.**
+
+| Lemma | Line | Shape | Existing attr | Grind decision |
+|---|---|---|---|---|
+| `add_comm`, `add_assoc`, `add_left_comm`, `zero_add`, `add_zero` | 198-262 | `+` identities | `@[simp]` | SKIP (simp already normalizes) |
+| `sub_zero`, `sub_self`, `sub_add_cancel_left` | 269-357 | `-` identities | `@[simp]` | SKIP |
+| `mul_comm`, `mul_one`, `one_mul`, `zero_mul`, `mul_zero`, `add_mul` | 289-339 | `*` identities | `@[simp]` | SKIP |
+| `div_one`, `zero_div` | 412-425 | `/` identities | `@[simp]` | SKIP |
+| **`sub_add_cancel`** (line **538**) | 538 | `(a + b) - b = a` | (none) | **`[grind =]`** — directly cancels the common Uint256 wrap-sub shape that simp sometimes misses because of normal-form ordering. |
+| `add_right_cancel` | 549 | `a + c = b + c → a = b` | (none) | `[grind →]` — useful cancellation, forward-only to avoid grind trying to re-introduce `+ c` on both sides. |
+
+→ **2 tagged from Uint256.** (`sub_add_cancel` as `grind =`, `add_right_cancel` as `grind →`.)
+
+### I.2 `Verity/Core/FiniteSet.lean`
+
+Every `mem_insert / mem_inter / mem_union / mem_diff / mem_symmDiff / contains_eq_true /
+contains_eq_false / isSubset_eq_{true,false}` is already `@[simp]`. These are pure `Iff`
+definitions that simp handles perfectly; grind already invokes simp. **No additional tags.**
+
+One exception — `mem_elements_insert` (line 112) is **not** simp because on Lists it introduces a
+head comparison. Since `FiniteAddressSet.mem_insert` (line 258) at the set level IS simp, we rely on
+it in practice. **Skipped.**
+
+### I.3 `Verity/Core/Address.lean`, `Verity/Core/Semantics.lean`, `Verity/EVM/Uint256.lean`
+
+Scanned; almost entirely `def`s and `inductive`s. No plain lemmas beyond what already carries
+`@[simp]`. **Nothing to tag.**
+
+### I.4 `Verity/Specs/Common.lean`
+
+Exclusively `*_rfl` lemmas that are already `@[simp]`. **Nothing to tag.**
+
+### I.5 `Verity/Specs/Common/Sum.lean`
+
+Five non-simp theorems — all **bona-fide invariants over `FiniteAddressSet`-indexed sums of
+storage-mapping balances**. These are precisely the shapes balance-conservation obligations reduce
+to.
+
+| Lemma | Line | Signature (abridged) | Category | Grind |
+|---|---|---|---|---|
+| `sumBalances_insert_existing` | 69 | `addr ∈ addrs → sumBalances slot (addrs.insert addr) b = sumBalances slot addrs b` | sum preserved by redundant insert | **`[grind →]`** (premise drives rewrite; reverse direction would lose info) |
+| `sumBalances_insert_new` | 77 | `addr ∉ addrs → b slot addr = 0 → sumBalances slot (addrs.insert addr) (b[addr := amt]) = add (sumBalances slot addrs b) amt` | sum increment on fresh insert | **`[grind →]`** |
+| `sumBalances_update_existing` | 179 | `addr ∈ addrs → sumBalances slot addrs (b[addr := new]) = add (sub (sumBalances slot addrs b) old) new` | sum delta on point-update | **`[grind →]`** |
+| `sumBalances_zero_of_all_zero` | 212 | `(∀ a ∈ addrs, b slot a = 0) → sumBalances slot addrs b = 0` | zero-sum collapse | **`[grind →]`** |
+| `balancesFinite_preserved_deposit` | 221 | `balancesFinite s → balancesFinite (…deposit state…)` | storage-set finiteness preservation | **`[grind →]`** |
+
+→ **5 tagged.** All directional because the preconditions (`addr ∈ addrs`, `addr ∉ addrs`, …) are
+driving.
+
+### I.6 `Verity/Proofs/Stdlib/ListSum.lean`
+
+```
+countOcc_cons_eq, countOcc_cons_ne, countOccU_cons_eq, countOccU_cons_ne
+map_sum_point_update, map_sum_point_decrease, map_sum_transfer_eq
+```
+
+The `countOcc*` recurrences: LHS `countOcc target (target :: rest)` unfolds to `1 + countOcc target
+rest`. The RHS pattern is a strict sub-term of the LHS, so these are safe as `[grind =]`.
+
+The three big preservation theorems (`map_sum_point_{update,decrease}`, `map_sum_transfer_eq`) are
+heavily-premised: they take pointwise hypotheses like `f' target = f target + delta` and
+`∀ addr, addr ≠ target → f' addr = f addr`. For `grind`, tagging these as plain `@[grind]` would
+make grind try to e-match on `(addrs.map ?f').sum` everywhere, which occurs **very** often and would
+blow up backward search. We tag them as `[grind →]`: grind uses them forward once the pointwise
+hypotheses are in context, which is the exact usage pattern in the benchmark proofs.
+
+| Lemma | Line | Shape | Grind |
+|---|---|---|---|
+| `countOcc_cons_eq` | 27 | `countOcc t (t :: rest) = 1 + countOcc t rest` | **`[grind =]`** |
+| `countOcc_cons_ne` | 31 | `a ≠ t → countOcc t (a :: rest) = countOcc t rest` | **`[grind →]`** (conditional eq) |
+| `countOccU_cons_eq` | 35 | Uint256 variant of above | **`[grind =]`** |
+| `countOccU_cons_ne` | 39 | conditional Uint256 variant | **`[grind →]`** |
+| `map_sum_point_update` | 58 | sum eq after pointwise add at target | **`[grind →]`** |
+| `map_sum_point_decrease` | 85 | sum eq after pointwise sub at target | **`[grind →]`** |
+| `map_sum_transfer_eq` | 117 | sum eq after transfer src → dst | **`[grind →]`** |
+
+→ **7 tagged.**
+
+### I.7 `Verity/Proofs/Stdlib/MappingAutomation.lean` — 40+ theorems, tag the core shapes
+
+This file is ~370 lines of `setX_getX_{same,diff}` and `setX_preserves_{storage,events,…}` for the
+three mapping families (`Address → Uint256`, `Uint256 → Uint256`, `Address → Address → Uint256`),
+plus `setStorage/setStorageAddr` cross-family preservations.
+
+**Rejected pattern — `setMapping_knownAddresses_*`**: these deal with a separate `knownAddresses`
+field that only a subset of cases use; tagging them broadly would add grind noise for cases that
+never touch it.
+
+**Tagged core shapes (`[grind =]` for the "same" identities, `[grind →]` for disequality-gated
+"diff" / "preserves"):**
+
+| Lemma | Line | Shape | Grind |
+|---|---|---|---|
+| `getMapping_runValue` | 32 | `(getMapping slot key).runValue s = s.storageMap slot.slot key` | `[grind =]` |
+| `setMapping_getMapping_same` | 52 | set-then-get-same-key → value | `[grind =]` |
+| `setMapping_getMapping_diff` | 57 | `k₁ ≠ k₂ → get after set = original` | `[grind →]` |
+| `setMapping_preserves_other_slot` | 66 | cross-slot preservation | `[grind →]` |
+| `getMappingUint_runValue` | 110 | Uint256-keyed accessor | `[grind =]` |
+| `setMappingUint_getMappingUint_same` | 125 | store-load identity | `[grind =]` |
+| `setMappingUint_getMappingUint_diff` | 131 | disjoint-key preservation | `[grind →]` |
+| `setMappingUint_preserves_storage` | 140 | cross-field preservation | `[grind →]` |
+| `setMappingUint_preserves_storageAddr` | 146 | cross-field preservation | `[grind →]` |
+| `setMappingUint_preserves_storageMap` | 152 | cross-field preservation | `[grind →]` |
+| `setMappingUint_preserves_storageMap2` | 158 | cross-field preservation | `[grind →]` |
+| `setMappingUint_preserves_sender` | 164 | context preservation | `[grind →]` |
+| `setMappingUint_preserves_thisAddress` | 170 | context preservation | `[grind →]` |
+| `getMapping2_runValue` | 189 | 2-key accessor | `[grind =]` |
+| `setMapping2_getMapping2_same` | 204 | 2-key store-load identity | `[grind =]` |
+| `setMapping2_getMapping2_diff_key1` | 210 | disjoint-key1 preservation | `[grind →]` |
+| `setMapping2_getMapping2_diff_key2` | 219 | disjoint-key2 preservation | `[grind →]` |
+| `setMapping2_preserves_storage` | 228 | cross-field | `[grind →]` |
+| `setMapping2_preserves_storageAddr` | 234 | cross-field | `[grind →]` |
+| `setMapping2_preserves_storageMap` | 240 | cross-field | `[grind →]` |
+| `setMapping2_preserves_storageMapUint` | 246 | cross-field | `[grind →]` |
+| `setMappingUint_preserves_events` | 360 | event preservation | `[grind →]` |
+| `setMapping2_preserves_events` | 366 | event preservation | `[grind →]` |
+| `setMapping_preserves_storageMapUint` | 314 | cross-family | `[grind →]` |
+| `setMapping_preserves_storageMap2` | 320 | cross-family | `[grind →]` |
+
+→ **25 tagged** (the "same" equalities + "preserves" directionals; skipping `_msgValue /
+_blockTimestamp / _blockNumber / _knownAddresses` which are adequately covered by a weaker set and
+would duplicate the context-preservation cluster without adding coverage).
+
+### I.8 `Verity/Proofs/Stdlib/Math.lean` — 65 theorems
+
+Triage:
+
+- **`*_comm` (commutativity) lemmas** (`mulDivDown_comm`, `mulDivUp_comm`, `wMulDown_comm`,
+  `safeAdd_comm`, `safeMul_comm`): **NOT tagged as `[grind =]`** — commutativity rules under
+  e-matching can drive unbounded rewriting if the RHS normal form isn't fixed. These are
+  traditionally `@[simp]` in other libraries for AC-normalization, but here they are not simp.
+  Tagging them `[grind]` is an E-match loop trap. **Skipped.**
+
+- **`*_nat_eq` bridging lemmas** (`mulDivDown_nat_eq`, `mulDivUp_nat_eq`, `wMulDown_nat_eq`,
+  `wDivUp_nat_eq`): exact equality of Uint256 op with Nat op, gated by a "fits within MAX" hypothesis.
+  Tagged `[grind →]`: when grind has the fits-within hypothesis, it can substitute the Nat form.
+
+- **`*_zero_{left,right}` / `*_one_{left,right}` / `*_by_wad` / `*_by_one`**: clean identity
+  rewrites, tagged `[grind =]` when they have no preconditions, `[grind →]` when gated.
+
+- **Monotonicity / antitonicity** (`mulDivDown_monotone_left`, `mulDivUp_antitone_divisor`,
+  `wMulDown_monotone_*`, `wDivUp_monotone_left`, `wDivUp_antitone_right`): preconditions are
+  driving; tagged `[grind →]`.
+
+- **Bound lemmas** (`mulDivDown_mul_le`, `mulDivUp_mul_ge`, `mulDivDown_mul_lt_add`,
+  `mulDivUp_mul_lt_add`, `wMulDown_mul_le`, `wMulDown_mul_lt_add`, `wDivUp_mul_ge`,
+  `wDivUp_mul_lt_add`, `mulDivDown_le_mulDivUp`, `mulDivUp_le_mulDivDown_add_one`): tagged
+  `[grind →]` — pure inequalities, no LHS ↔ RHS.
+
+- **Cancellation lemmas** (`mulDivDown_cancel_{left,right}`, `mulDivUp_cancel_{left,right}`):
+  tagged `[grind →]` — cancellations are gated by `c ≠ 0` + fits-within; forward only.
+
+- **Exactness disjunction** (`mulDivUp_eq_mulDivDown_or_succ`): tagged `[grind →]` — grind will
+  case-split on the disjunction.
+
+- **Safe-op lemmas** (`safeAdd_{some,none,zero_left,zero_right,result_bounded}`,
+  `safeSub_{some,none,zero,self,result_le}`, `safeMul_{some,none,zero_left,zero_right,one_left,one_right,result_bounded}`,
+  `safeDiv_{some,none,zero_numerator,by_one,self,result_le_numerator}`): **tagged `[grind →]`** —
+  these discharge option-elimination of the safe ops when the overflow hypothesis is present.
+
+Concrete tagged list:
+
+| Lemma | Grind |
+|---|---|
+| `mulDivDown_nat_eq`, `mulDivUp_nat_eq`, `wMulDown_nat_eq`, `wDivUp_nat_eq` | `[grind →]` (4) |
+| `mulDivDown_zero_left`, `mulDivDown_zero_right`, `mulDivUp_zero_left`, `mulDivUp_zero_right`, `wMulDown_zero_left`, `wMulDown_zero_right`, `wDivUp_zero` | `[grind =]` (7) |
+| `wMulDown_one_left`, `wMulDown_one_right`, `wDivUp_by_wad` | `[grind →]` (3) — gated by fits-within |
+| `mulDivDown_monotone_left/right`, `mulDivUp_monotone_left/right`, `wMulDown_monotone_left/right`, `wDivUp_monotone_left`, `wDivUp_antitone_right`, `mulDivDown_antitone_divisor`, `mulDivUp_antitone_divisor` | `[grind →]` (10) |
+| `mulDivDown_mul_le`, `mulDivUp_mul_ge`, `mulDivDown_mul_lt_add`, `mulDivUp_mul_lt_add`, `wMulDown_mul_le`, `wMulDown_mul_lt_add`, `wDivUp_mul_ge`, `wDivUp_mul_lt_add`, `mulDivDown_le_mulDivUp`, `mulDivUp_le_mulDivDown_add_one` | `[grind →]` (10) |
+| `mulDivUp_eq_mulDivDown_of_dvd`, `mulDivUp_eq_mulDivDown_add_one_of_not_dvd`, `mulDivUp_eq_mulDivDown_or_succ` | `[grind →]` (3) |
+| `mulDivDown_cancel_left/right`, `mulDivUp_cancel_left/right` | `[grind →]` (4) — conditional cancellation |
+| `mulDivDown_pos`, `mulDivUp_pos`, `wMulDown_pos`, `wDivUp_pos` | `[grind →]` (4) — positivity entailment |
+| `safeAdd_some/none/zero_left/zero_right/result_bounded` | `[grind →]` (5) |
+| `safeSub_some/none/zero/self/result_le` | `[grind →]` (5) |
+| `safeMul_some/none/zero_left/zero_right/one_left/one_right/result_bounded` | `[grind →]` (7) |
+| `safeDiv_some/none/zero_numerator/by_one/self/result_le_numerator` | `[grind →]` (6) |
+
+→ **~68 tagged** (approximately; exact count in `Invariants.lean`).
+
+**Deliberately skipped:**
+- `safeAdd_comm`, `safeMul_comm`, `mulDivDown_comm`, `mulDivUp_comm`, `wMulDown_comm` — **E-match loop risk**. Grind + commutativity in a rewrite bundle leads to swapping back and forth.
+
+---
+
+## Part II — Case-local `Specs.lean`
+
+Per-case namespace summary (all live under `Benchmark.Cases.*`):
+
+| Case file | Namespace(s) |
+|---|---|
+| `DamnVulnerableDeFi/SideEntrance/Specs.lean` | `Benchmark.Cases.DamnVulnerableDeFi.SideEntrance` |
+| `Ethereum/DepositContractMinimal/Specs.lean` | `Benchmark.Cases.Ethereum.DepositContractMinimal` |
+| `Kleros/SortitionTrees/Specs.lean` | `Benchmark.Cases.Kleros.SortitionTrees` |
+| `Lido/VaulthubLocked/Specs.lean` | `Benchmark.Cases.Lido.VaulthubLocked` |
+| `NexusMutual/RammPriceBand/Specs.lean` | `Benchmark.Cases.NexusMutual.RammPriceBand` + `Benchmark.Cases.NexusMutual.RammSpotPrice` |
+| `OpenZeppelin/ERC4626VirtualOffsetDeposit/Specs.lean` | `Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit` |
+| `PaladinVotes/StreamRecoveryClaimUsdc/Specs.lean` | `Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc` |
+| `Safe/OwnerManagerReach/Specs.lean` | `Benchmark.Cases.Safe.OwnerManagerReach` |
+| `UniswapV2/PairFeeAdjustedSwap/Specs.lean` | `Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap` |
+| `Zama/ERC7984ConfidentialToken/Specs.lean` | `Benchmark.Cases.Zama.ERC7984ConfidentialToken` |
+
+**Important clarification:** the Specs files contain `def`-based predicates rather than `theorem`
+lemmas. For grind, tagging a `def` with `@[grind]` registers it as an **unfolding candidate** — when
+grind sees the definition applied at the head of a term, it can β/δ-reduce it. This is exactly what
+we want for the invariant predicates (e.g. `ownerListInvariant`, `isOwner`, `balanceOf`, `supply`,
+`computedClaimAmount`, `next`, `isChain`, `ceilDiv`, `getPooledEthBySharesRoundUp`, …): grind needs
+to peel the definition to reach the storage-level equations.
+
+### II.1 Kleros / SortitionTrees
+
+| Name | Kind | Purpose | Grind |
+|---|---|---|---|
+| `leaf_sum` | `def` (Uint256) | sum of 4 leaf weights | `[grind]` unfold |
+| `parent_equals_sum_of_children_spec` | `def` (Prop) | tree balance between parents/children | SKIP — it IS the main obligation, better not auto-unfold |
+| `root_equals_sum_of_leaves_spec` | `def` (Prop) | root invariant | SKIP — main obligation |
+| `draw_selects_valid_leaf_spec` | `def` (Prop) | bounds 3 ≤ selected ≤ 6 | SKIP — main obligation |
+| `node_id_bijection_spec` | `def` (Prop) | id-mapping bijection | SKIP — main obligation |
+| `root_minus_left_equals_right_subtree_spec` | `def` (Prop) | right = root - left | SKIP — main obligation |
+
+→ **1 tagged:** `leaf_sum` (auxiliary aggregator that appears inside `root_equals_sum_of_leaves_spec`).
+
+### II.2 Lido / VaulthubLocked
+
+Helpers live in the adjacent `Contract.lean` (readable — not `Proofs.lean`).
+
+| Name | Kind | Purpose | Grind |
+|---|---|---|---|
+| `TOTAL_BASIS_POINTS` | `def` (Uint256 constant) | 10000 | SKIP (constant) |
+| `ceilDiv` | `def` (Uint256 → Uint256 → Uint256) | ceil-div helper | `[grind]` unfold |
+| `getPooledEthBySharesRoundUp` | `def` | share → ether round-up | `[grind]` unfold |
+| `ceildiv_sandwich_spec` | `def` (Prop) | `ceilDiv(x,d) * d ≥ x` when no overflow | SKIP — main obligation |
+| `shares_conversion_monotone_spec` | `def` (Prop) | share conversion monotonicity | SKIP — main obligation |
+| `locked_funds_solvency_spec` | `def` (Prop) | solvency invariant | SKIP — main obligation |
+
+→ **2 tagged:** `ceilDiv`, `getPooledEthBySharesRoundUp`.
+
+### II.3 Zama / ERC7984ConfidentialToken
+
+| Name | Kind | Purpose | Grind |
+|---|---|---|---|
+| `balanceOf` | `def` (accessor) | `s.storageMap 1 addr` | `[grind]` unfold |
+| `supply` | `def` (accessor) | `s.storage 0` | `[grind]` unfold |
+| `operatorExpiry` | `def` (accessor) | `s.storageMap2 3 holder spender` | `[grind]` unfold |
+| other specs | `def` (Prop) | main obligations | SKIP |
+
+→ **3 tagged.**
+
+### II.4 PaladinVotes / StreamRecoveryClaimUsdc
+
+| Name | Kind | Purpose | Grind |
+|---|---|---|---|
+| `computedClaimAmount` | `def` (Uint256) | `shareWad * s.storage 0 / 1e18` | `[grind]` unfold |
+| `computedWethClaimAmount` | `def` (Uint256) | WETH analog | `[grind]` unfold |
+
+→ **2 tagged.**
+
+### II.5 Safe / OwnerManagerReach — the rich one
+
+| Name | Kind | Purpose | Grind |
+|---|---|---|---|
+| `next` | `def` (accessor) | `wordToAddress (s.storageMap 0 a)` | `[grind]` unfold |
+| `isChain` | `def` (List → Prop, recursive) | pairwise-next consistency | `[grind]` unfold |
+| `reachable` | `def` (Prop, ∃ chain …) | existential chain | **NOT TAGGED** — unfolding an existential makes grind try to fabricate chains; leads to loop. Keep opaque. |
+| `inListReachable` | `def` (Prop) | Certora-style list invariant | `[grind]` unfold |
+| `reachableInList` | `def` (Prop) | inverse invariant | `[grind]` unfold |
+| `ownerListInvariant` | `def` (Prop) | bundled iff invariant | `[grind]` unfold |
+| `noDuplicates` | `def` (List → Prop, recursive) | list is nodup | `[grind]` unfold |
+| `acyclic` | `def` (Prop, ∀ chain …) | universal over chains | **NOT TAGGED** — universally quantified over chain structures; unfolding inside grind explodes. Keep opaque. |
+| `uniquePredecessor` | `def` (Prop) | at-most-one incoming edge | `[grind]` unfold |
+| `freshInList` | `def` (Prop, ∀ chain …) | absence from any chain | **NOT TAGGED** — same reason as `acyclic`. |
+| `noSelfLoops` | `def` (Prop) | no self-edges | `[grind]` unfold |
+| `isOwner` | `def` (Prop) | non-zero successor + ≠ SENTINEL | `[grind]` unfold |
+
+→ **9 tagged, 3 intentionally left opaque** (`reachable`, `acyclic`, `freshInList`).
+
+### II.6 NexusMutual / RammPriceBand
+
+Contract.lean has `PRICE_BUFFER`, `PRICE_BUFFER_DENOMINATOR`, `ONE_ETHER` (constants — SKIP) and
+`calculateBuyReserve`, `calculateSellReserve`, `spotPrices` (multi-branch functions — SKIP because
+unfolding them inside grind would thrash on case splits).
+
+Specs.lean predicates are main obligations (SKIP).
+
+→ **0 tagged.** (Documented reasoning: multi-branch computational helpers are antipattern for
+grind.)
+
+### II.7 DamnVulnerableDeFi, Ethereum/DepositContractMinimal, OpenZeppelin, UniswapV2
+
+These Specs.lean files contain only **main obligation predicates** (`deposit_sets_pool_balance_spec`,
+`deposit_increments_deposit_count_spec`, etc.) — no auxiliary helpers. Tagging them for grind unfold
+would be circular (we'd unfold the obligation into its body). **0 tagged** from these cases.
+
+---
+
+## Part III — Rationale for rejections and "NOT TAGGED" entries
+
+1. **Already `@[simp]` on trivial shapes** — FiniteSet membership lemmas, `Specs.Common *_rfl`.
+   Simp runs inside grind, so double-tagging is redundant noise.
+
+2. **Commutativity rewrites** — `*_comm` lemmas are E-match loop magnets. Skip.
+
+3. **Existentially- or universally-quantified predicates over chains** (`reachable`, `acyclic`,
+   `freshInList`) — unfolding them mid-grind creates a witness search that cannot be bounded.
+
+4. **Multi-branch computation functions** (`calculateBuyReserve`, `spotPrices`) — unfolding
+   explodes the proof state with case splits that grind has no oracle for.
+
+5. **Plain numeric constants** (`TOTAL_BASIS_POINTS`, `PRICE_BUFFER`, `ONE_ETHER`) — no domain
+   content; simp-unfolding when needed is cheaper than grind tagging.
+
+6. **Main obligation predicates** (everything named `*_spec` that is a top-level proof
+   obligation) — these are the theorems we prove; we should not make grind unfold them when proving
+   something else.
+
+---
+
+## Part IV — Coordination with worker S1
+
+S1 is building `Benchmark/Grindset/` on branch `grindset/s1-verity-grindset` and tagging **core
+operational primitives** (likely: Uint256 arithmetic, FiniteSet ops, storage context manipulation,
+Free monad step semantics). Our A1 coverage is complementary:
+
+- A1 owns **invariant-level** lemmas (`sumBalances_*`, `map_sum_*`, `setMapping*_same/diff`,
+  mulDivUp/Down bound + cancellation + monotonicity, safe-op Option elimination).
+- A1 owns **case-local predicate unfolding** for the 7 active cases with non-trivial helpers.
+- S1 presumably owns operational primitives (`.runState`, `.runValue`, basic Uint256 `add/mul/sub`
+  identities).
+
+If both branches tag the same lemma, Lean will accept the second tag as a no-op (attribute is
+idempotent for `grind` equal-orientation); if S1 tags the Uint256 commutativity set as `grind` we
+rely on S1's choice (we document this as deferred).
+
+The stub `Benchmark/Grindset.lean` on A1's branch imports only `Benchmark.Grindset.Invariants`; S1
+will merge later.
+
+---
+
+## Build verification
+
+`lake build Benchmark.Grindset.Invariants` must succeed. The `attribute [grind …] X` syntax
+requires `X` to already be imported. We import:
+
+- `Verity.Core.Uint256`
+- `Verity.Core.FiniteSet` *(transitively)*
+- `Verity.Proofs.Stdlib.Math`
+- `Verity.Proofs.Stdlib.ListSum`
+- `Verity.Proofs.Stdlib.MappingAutomation`
+- `Verity.Specs.Common.Sum`
+- `Benchmark.Cases.*.Specs` for the 7 active cases
+
+See `Benchmark/Grindset/Invariants.lean` for the complete, grouped attribute application.
diff --git a/Benchmark/Grindset/Invariants.lean b/Benchmark/Grindset/Invariants.lean
new file mode 100644
index 00000000..71a17221
--- /dev/null
+++ b/Benchmark/Grindset/Invariants.lean
@@ -0,0 +1,321 @@
+/-
+  Benchmark.Grindset.Invariants
+
+  Mission A1 (grindset/a1-invariant-tags): re-export and tag domain-level invariant lemmas and
+  case-local spec helpers with `@[grind …]` so the `grind` tactic can use them during proof search.
+
+  Complementary to sibling worker S1 (`grindset/s1-verity-grindset`), who tags core operational
+  primitives. A1 focuses on:
+
+    • Verity sum-preservation invariants   (Verity.Proofs.Stdlib.ListSum,
+                                              Verity.Specs.Common.Sum)
+    • Verity mapping store/load identities (Verity.Proofs.Stdlib.MappingAutomation)
+    • Verity ceil/floor-div + wad + safe-op bounds
+                                              (Verity.Proofs.Stdlib.Math)
+    • A single Uint256 cancellation lemma  (Verity.Core.Uint256.sub_add_cancel)
+    • Case-local predicate unfolding       (Benchmark.Cases.*.Specs)
+
+  See Benchmark/Grindset/INVARIANTS_AUDIT.md for per-entry rationale and rejection notes.
+
+  Constraints honoured:
+    - No Verity library file (`.lake/packages/verity/**`) is modified.
+    - No `Benchmark/Cases/**/Specs.lean` or `Proofs.lean` is modified.
+    - Only `attribute [grind …] Name` re-exports are applied here.
+
+  Orientation choices:
+    - `[grind =]` for equality lemmas whose conclusion is used as a bidirectional rewrite (the
+      safer default when the hypotheses lack matchable patterns or are non-propositional).
+    - `[grind →]` reserved for implications whose antecedents contain genuinely matchable
+      patterns distinct from the conclusion (`safeAdd_some`, `*_monotone_*` that ship with
+      `≤` antecedents containing the same `mulDiv` terms as the conclusion, etc.).
+    - Case-local `def`s get plain `[grind]` which registers them as δ-unfold candidates.
+-/
+
+import Verity.Core.Uint256
+import Verity.Proofs.Stdlib.Math
+import Verity.Proofs.Stdlib.ListSum
+import Verity.Proofs.Stdlib.MappingAutomation
+import Verity.Specs.Common
+import Verity.Specs.Common.Sum
+
+import Benchmark.Cases.Kleros.SortitionTrees.Specs
+import Benchmark.Cases.Lido.VaulthubLocked.Specs
+import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs
+import Benchmark.Cases.Safe.OwnerManagerReach.Specs
+import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs
+
+namespace Benchmark.Grindset.Invariants
+
+/-! ## 1. Core Uint256 cancellations
+
+Almost all of `Verity.Core.Uint256`'s algebraic lemmas are already `@[simp]`. Two are not but are
+genuinely useful for proof automation: the wrap-safe `sub_add_cancel` and the forward-only
+`add_right_cancel`. -/
+
+attribute [grind =] Verity.Core.Uint256.sub_add_cancel
+attribute [grind →] Verity.Core.Uint256.add_right_cancel
+
+
+/-! ## 2. ListSum — point-update / transfer conservation
+
+Core balance-conservation invariants. The `_eq` countOcc lemmas tag cleanly as `[grind =]`; the
+conditional `_ne` variants (with an `a ≠ t` antecedent) are forward-only and tagged `[grind →]`.
+The three `map_sum_*` preservation theorems can't be tagged with either `→` (antecedent patterns
+aren't extractable) or `=` (the LHS of the concluding equality doesn't mention every bound
+parameter like `delta`/`src`/`dst`, so grind can't instantiate them from an E-match). Callers
+should pull them in manually (e.g. `grind [map_sum_point_update]`); NOT TAGGED here to avoid a
+loud-but-useless global registration. -/
+
+attribute [grind =]
+  Verity.Proofs.Stdlib.ListSum.countOcc_cons_eq
+  Verity.Proofs.Stdlib.ListSum.countOccU_cons_eq
+-- Conditional (`a ≠ t → …`) equalities: forward-only per the audit.
+attribute [grind →]
+  Verity.Proofs.Stdlib.ListSum.countOcc_cons_ne
+  Verity.Proofs.Stdlib.ListSum.countOccU_cons_ne
+
+
+/-! ## 3. sumBalances preservation over FiniteAddressSet
+
+Namespace is `Verity.Specs.Common` (the file lives under Sum.lean but opens no sub-namespace).
+
+Only the two "pure rewrite" theorems (`sumBalances_insert_existing`, `sumBalances_zero_of_all_zero`)
+tag cleanly as `[grind =]` — grind can E-match their LHS to the goal without unknown parameters.
+The other three (`_insert_new`, `_update_existing`, `balancesFinite_preserved_deposit`) mention
+fresh parameters (`amount`, `old_amount`, record-update on `knownAddresses`) that don't appear on
+the pattern LHS, so grind refuses to register them. Callers invoke these manually. -/
+
+attribute [grind =]
+  Verity.Specs.Common.sumBalances_insert_existing
+  Verity.Specs.Common.sumBalances_zero_of_all_zero
+
+
+/-! ## 4. Mapping store/load identities (MappingAutomation)
+
+These are the single highest-impact cluster: every benchmark obligation of the form "after
+`setMappingX slot k v`, reading back at the same key equals `v`, and reading at a distinct key
+preserves the original" reduces to these core four shapes per mapping family.
+
+All tagged `[grind =]`:
+  - the `_same` / `_runValue` lemmas are pure equations;
+  - the `_diff` lemmas have an antecedent (`k1 ≠ k2`) whose pattern can't be extracted by grind →,
+    but tagging `=` still lets grind rewrite the `getMapping …` term and side-check the ineq;
+  - the `_preserves_*` lemmas have no propositional hypothesis at all, so `=` is the only
+    orientation accepted.
+-/
+
+-- 4a. Address → Uint256 mappings
+attribute [grind =]
+  Verity.Proofs.Stdlib.MappingAutomation.getMapping_runValue
+  Verity.Proofs.Stdlib.MappingAutomation.setMapping_getMapping_same
+  Verity.Proofs.Stdlib.MappingAutomation.setMapping_getMapping_diff
+  Verity.Proofs.Stdlib.MappingAutomation.setMapping_preserves_other_slot
+  Verity.Proofs.Stdlib.MappingAutomation.setMapping_preserves_storageMapUint
+  Verity.Proofs.Stdlib.MappingAutomation.setMapping_preserves_storageMap2
+
+-- 4b. Uint256 → Uint256 mappings
+attribute [grind =]
+  Verity.Proofs.Stdlib.MappingAutomation.getMappingUint_runValue
+  Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_getMappingUint_same
+  Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_getMappingUint_diff
+  Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_preserves_storage
+  Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_preserves_storageAddr
+  Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_preserves_storageMap
+  Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_preserves_storageMap2
+  Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_preserves_sender
+  Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_preserves_thisAddress
+  Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_preserves_events
+
+-- 4c. Address → Address → Uint256 (nested) mappings
+attribute [grind =]
+  Verity.Proofs.Stdlib.MappingAutomation.getMapping2_runValue
+  Verity.Proofs.Stdlib.MappingAutomation.setMapping2_getMapping2_same
+  Verity.Proofs.Stdlib.MappingAutomation.setMapping2_getMapping2_diff_key1
+  Verity.Proofs.Stdlib.MappingAutomation.setMapping2_getMapping2_diff_key2
+  Verity.Proofs.Stdlib.MappingAutomation.setMapping2_preserves_storage
+  Verity.Proofs.Stdlib.MappingAutomation.setMapping2_preserves_storageAddr
+  Verity.Proofs.Stdlib.MappingAutomation.setMapping2_preserves_storageMap
+  Verity.Proofs.Stdlib.MappingAutomation.setMapping2_preserves_storageMapUint
+  Verity.Proofs.Stdlib.MappingAutomation.setMapping2_preserves_events
+
+
+/-! ## 5. Ceil / floor division + wad + safe ops
+
+All of `Verity.Proofs.Stdlib.Math` except commutativity rewrites (which are E-match loop traps).
+
+Groups:
+  • `*_nat_eq`          — bridge Uint256 op to Nat op (equational, the fits-within side is
+                          checked as a hypothesis but has no matchable pattern).
+  • `*_zero_*`          — identities with no precondition (equational).
+  • `*_one_{left,right}` / `wDivUp_by_wad` — gated identities (forward, the gate has patterns).
+  • `*_monotone_*`, `*_antitone_*` — monotonicity (forward, antecedent shares `mulDiv` patterns
+                                      with conclusion).
+  • `*_mul_le / _mul_ge / _mul_lt_add` — sandwich bounds (mixed; those whose antecedents lack
+                                          matchable patterns fall back to `=`).
+  • `mulDivUp_eq_mulDivDown_*` — exactness disjunctions (forward).
+  • `*_cancel_*`        — conditional cancellation (forward).
+  • `*_pos`             — positivity entailment (forward).
+  • `safe{Add,Sub,Mul,Div}_*` — Option-elimination and result bounds (mix of `=` for identities
+                                 and `→` for bound-producing lemmas).
+-/
+
+-- 5a. Nat bridges (conditional on a `fits_within` hypothesis, forward-only per the audit).
+attribute [grind →]
+  Verity.Proofs.Stdlib.Math.mulDivDown_nat_eq
+  Verity.Proofs.Stdlib.Math.mulDivUp_nat_eq
+  Verity.Proofs.Stdlib.Math.wMulDown_nat_eq
+  Verity.Proofs.Stdlib.Math.wDivUp_nat_eq
+
+-- 5b. Unconditional zero identities
+attribute [grind =]
+  Verity.Proofs.Stdlib.Math.mulDivDown_zero_left
+  Verity.Proofs.Stdlib.Math.mulDivDown_zero_right
+  Verity.Proofs.Stdlib.Math.mulDivUp_zero_left
+  Verity.Proofs.Stdlib.Math.mulDivUp_zero_right
+  Verity.Proofs.Stdlib.Math.wMulDown_zero_left
+  Verity.Proofs.Stdlib.Math.wMulDown_zero_right
+  Verity.Proofs.Stdlib.Math.wDivUp_zero
+
+-- 5c. Gated identity rewrites
+attribute [grind →]
+  Verity.Proofs.Stdlib.Math.wMulDown_one_left
+  Verity.Proofs.Stdlib.Math.wMulDown_one_right
+  Verity.Proofs.Stdlib.Math.wDivUp_by_wad
+
+-- 5d. Monotonicity / antitonicity (mulDivDown variants: antecedents lack patterns AND the
+--     conclusion is `≤` not `=`, so neither `→` nor `=` works. Use plain `[grind]`.)
+attribute [grind]
+  Verity.Proofs.Stdlib.Math.mulDivDown_monotone_left
+  Verity.Proofs.Stdlib.Math.mulDivDown_monotone_right
+attribute [grind →]
+  Verity.Proofs.Stdlib.Math.mulDivUp_monotone_left
+  Verity.Proofs.Stdlib.Math.mulDivUp_monotone_right
+  Verity.Proofs.Stdlib.Math.wMulDown_monotone_left
+  Verity.Proofs.Stdlib.Math.wMulDown_monotone_right
+  Verity.Proofs.Stdlib.Math.wDivUp_monotone_left
+  Verity.Proofs.Stdlib.Math.wDivUp_antitone_right
+  Verity.Proofs.Stdlib.Math.mulDivDown_antitone_divisor
+  Verity.Proofs.Stdlib.Math.mulDivUp_antitone_divisor
+
+-- 5e. Sandwich bounds (mulDivDown variants: conclusions are `≤` / `<`, so use plain `[grind]`)
+attribute [grind]
+  Verity.Proofs.Stdlib.Math.mulDivDown_mul_le
+  Verity.Proofs.Stdlib.Math.mulDivDown_mul_lt_add
+attribute [grind →]
+  Verity.Proofs.Stdlib.Math.mulDivUp_mul_ge
+  Verity.Proofs.Stdlib.Math.mulDivUp_mul_lt_add
+  Verity.Proofs.Stdlib.Math.wMulDown_mul_le
+  Verity.Proofs.Stdlib.Math.wMulDown_mul_lt_add
+  Verity.Proofs.Stdlib.Math.wDivUp_mul_ge
+  Verity.Proofs.Stdlib.Math.wDivUp_mul_lt_add
+  Verity.Proofs.Stdlib.Math.mulDivDown_le_mulDivUp
+  Verity.Proofs.Stdlib.Math.mulDivUp_le_mulDivDown_add_one
+
+-- 5f. Exactness disjunctions
+attribute [grind →]
+  Verity.Proofs.Stdlib.Math.mulDivUp_eq_mulDivDown_of_dvd
+  Verity.Proofs.Stdlib.Math.mulDivUp_eq_mulDivDown_add_one_of_not_dvd
+  Verity.Proofs.Stdlib.Math.mulDivUp_eq_mulDivDown_or_succ
+
+-- 5g. Conditional cancellations
+attribute [grind →]
+  Verity.Proofs.Stdlib.Math.mulDivDown_cancel_left
+  Verity.Proofs.Stdlib.Math.mulDivDown_cancel_right
+  Verity.Proofs.Stdlib.Math.mulDivUp_cancel_left
+  Verity.Proofs.Stdlib.Math.mulDivUp_cancel_right
+
+-- 5h. Positivity
+attribute [grind →]
+  Verity.Proofs.Stdlib.Math.mulDivDown_pos
+  Verity.Proofs.Stdlib.Math.mulDivUp_pos
+  Verity.Proofs.Stdlib.Math.wMulDown_pos
+  Verity.Proofs.Stdlib.Math.wDivUp_pos
+
+-- 5i. safeAdd
+attribute [grind →]
+  Verity.Proofs.Stdlib.Math.safeAdd_some
+  Verity.Proofs.Stdlib.Math.safeAdd_none
+  Verity.Proofs.Stdlib.Math.safeAdd_zero_left
+  Verity.Proofs.Stdlib.Math.safeAdd_zero_right
+  Verity.Proofs.Stdlib.Math.safeAdd_result_bounded
+
+-- 5j. safeSub (zero/self are no-hypothesis identities → `=`)
+attribute [grind =]
+  Verity.Proofs.Stdlib.Math.safeSub_zero
+  Verity.Proofs.Stdlib.Math.safeSub_self
+attribute [grind →]
+  Verity.Proofs.Stdlib.Math.safeSub_some
+  Verity.Proofs.Stdlib.Math.safeSub_none
+  Verity.Proofs.Stdlib.Math.safeSub_result_le
+
+-- 5k. safeMul (zero identities → `=`, rest → `→`)
+attribute [grind =]
+  Verity.Proofs.Stdlib.Math.safeMul_zero_left
+  Verity.Proofs.Stdlib.Math.safeMul_zero_right
+attribute [grind →]
+  Verity.Proofs.Stdlib.Math.safeMul_some
+  Verity.Proofs.Stdlib.Math.safeMul_none
+  Verity.Proofs.Stdlib.Math.safeMul_one_left
+  Verity.Proofs.Stdlib.Math.safeMul_one_right
+  Verity.Proofs.Stdlib.Math.safeMul_result_bounded
+
+-- 5l. safeDiv (none/by_one are no-hypothesis identities, some/zero_num/self lack antecedent
+--     patterns → all to `=`)
+attribute [grind =]
+  Verity.Proofs.Stdlib.Math.safeDiv_some
+  Verity.Proofs.Stdlib.Math.safeDiv_none
+  Verity.Proofs.Stdlib.Math.safeDiv_zero_numerator
+  Verity.Proofs.Stdlib.Math.safeDiv_by_one
+  Verity.Proofs.Stdlib.Math.safeDiv_self
+attribute [grind →]
+  Verity.Proofs.Stdlib.Math.safeDiv_result_le_numerator
+
+
+/-! ## 6. Case-local predicate / accessor unfolding
+
+These are `def`s (not theorems) in the Specs.lean files of the 7 active cases. Tagging a `def`
+with `@[grind]` registers it as an unfolding candidate for grind — it will δ-reduce the head
+when it appears in the goal. This is essential so grind can see the underlying
+`storage`/`storageMap`/… reads that the definitions abbreviate.
+
+Rejected on purpose:
+  • `reachable` / `acyclic` / `freshInList` (Safe.OwnerManagerReach) — existential / universal
+    over chain lists; unfolding inside grind creates unbounded witness search.
+  • `calculateBuyReserve`, `calculateSellReserve`, `spotPrices` (NexusMutual/RammPriceBand in
+    Contract.lean) — multi-branch computation, unfolding thrashes on case splits.
+  • Plain numeric constants — simp handles them better.
+  • Main obligation predicates (`*_spec` at top level) — we prove these, we don't unfold them.
+-/
+
+-- Kleros / SortitionTrees
+attribute [grind] Benchmark.Cases.Kleros.SortitionTrees.leaf_sum
+
+-- PaladinVotes / StreamRecoveryClaimUsdc
+attribute [grind]
+  Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.computedClaimAmount
+  Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.computedWethClaimAmount
+
+-- Lido / VaulthubLocked (defs live in the adjacent Contract module)
+attribute [grind]
+  Benchmark.Cases.Lido.VaulthubLocked.ceilDiv
+  Benchmark.Cases.Lido.VaulthubLocked.getPooledEthBySharesRoundUp
+
+-- Zama / ERC7984ConfidentialToken — storage accessors
+attribute [grind]
+  Benchmark.Cases.Zama.ERC7984ConfidentialToken.balanceOf
+  Benchmark.Cases.Zama.ERC7984ConfidentialToken.supply
+  Benchmark.Cases.Zama.ERC7984ConfidentialToken.operatorExpiry
+
+-- Safe / OwnerManagerReach — linked-list reachability / invariant predicates
+attribute [grind]
+  Benchmark.Cases.Safe.OwnerManagerReach.next
+  Benchmark.Cases.Safe.OwnerManagerReach.isChain
+  Benchmark.Cases.Safe.OwnerManagerReach.inListReachable
+  Benchmark.Cases.Safe.OwnerManagerReach.reachableInList
+  Benchmark.Cases.Safe.OwnerManagerReach.ownerListInvariant
+  Benchmark.Cases.Safe.OwnerManagerReach.noDuplicates
+  Benchmark.Cases.Safe.OwnerManagerReach.uniquePredecessor
+  Benchmark.Cases.Safe.OwnerManagerReach.noSelfLoops
+  Benchmark.Cases.Safe.OwnerManagerReach.isOwner
+
+end Benchmark.Grindset.Invariants
diff --git a/Benchmark/Grindset/Monad.lean b/Benchmark/Grindset/Monad.lean
new file mode 100644
index 00000000..f7cfc6a4
--- /dev/null
+++ b/Benchmark/Grindset/Monad.lean
@@ -0,0 +1,136 @@
+/-
+  Benchmark.Grindset.Monad — simp/grind normalization of the Contract monad
+  scaffolding.
+
+  The Verity DSL elaborates `verity_contract` function bodies into do-notation
+  over the `Contract` monad, which in turn desugars to chains of
+  `Verity.bind`/`Verity.pure` wrapped by `Contract.run` and projected through
+  `ContractResult.snd` / `ContractResult.fst`.
+
+  We register these identifiers as `@[simp]` (for the dedicated
+  `grind_norm` set) and also `@[grind]` / `@[grind =]` so that `grind` can
+  unfold / rewrite them on its own. The goal is that typical benchmark task
+  obligations of shape
+
+    ((Contract.f arg).run s).snd.storage n = ...
+
+  normalize down to plain record updates over `s`, at which point `grind`
+  can finish with the tagged storage/mapping simp-lemmas in `Core.lean`.
+-/
+
+import Verity.Core
+import Benchmark.Grindset.Attr
+
+namespace Benchmark.Grindset
+
+open Verity
+
+/-!
+## `grind_norm` simp set
+
+Unfolds the bind/pure/run scaffolding so that `Contract.run (do …) s`
+collapses into nested applications of the underlying `*_run` lemmas.
+
+Downstream tactics can invoke these lemmas via:
+
+```
+simp only [grind_norm] at *
+```
+
+or implicitly via the `grind` tactic (all rules below are also tagged
+`@[grind]`/`@[grind =]`).
+-/
+
+
+/-! ### Bind and pure -/
+
+@[grind_norm, simp]
+theorem bind_def {α β : Type} (m : Contract α) (f : α → Contract β) :
+    (m >>= f) = Verity.bind m f := rfl
+
+@[grind_norm, simp]
+theorem pure_def {α : Type} (a : α) :
+    (Pure.pure a : Contract α) = Verity.pure a := rfl
+
+@[grind_norm, simp]
+theorem bind_success {α β : Type} (a : α) (s : ContractState)
+    (f : α → Contract β) :
+    Verity.bind (fun state => ContractResult.success a state) f s =
+      f a s := rfl
+
+/-! ### `Contract.run` against constructors -/
+
+@[grind_norm, simp]
+theorem Contract_run_success {α : Type} (a : α) (s : ContractState) :
+    Contract.run (fun state => ContractResult.success a state) s =
+      ContractResult.success a s := rfl
+
+/-! ### Projection-through-constructor lemmas
+
+The two core structural facts used by every spec-unfolding proof: after
+reducing the monadic body to a `ContractResult.success a s'`, projecting out
+`.snd` gives back `s'`. These are already `@[simp]` upstream, but we re-tag
+them for `grind` so the tactic can apply them directly. -/
+
+attribute [grind_norm] ContractResult.snd_success ContractResult.snd_revert
+attribute [grind_norm] ContractResult.fst_success
+attribute [grind_norm] Contract.bind_pure_left Contract.bind_pure_right
+attribute [grind_norm] Contract.bind_assoc
+
+/-! ### Primitive operation `.run` lemmas.
+
+These are `@[simp]` upstream. Re-tagging into `grind_norm` keeps everything
+accessible via one attribute when running the normalization pass. -/
+
+attribute [grind_norm] getStorage_run setStorage_run
+attribute [grind_norm] getStorageAddr_run setStorageAddr_run
+attribute [grind_norm] getMapping_run setMapping_run
+attribute [grind_norm] getMapping2_run setMapping2_run
+attribute [grind_norm] getMappingUint_run setMappingUint_run
+attribute [grind_norm] msgSender_run contractAddress_run msgValue_run
+attribute [grind_norm] blockTimestamp_run blockNumber_run chainid_run
+attribute [grind_norm] require_true require_false
+attribute [grind_norm] pure_run
+
+/-!
+### Definitional unfolds
+
+The Verity monadic primitives are ordinary `def`s; we need the simp set to
+be able to unfold them so `Verity.bind (setStorage … …) f s` can reduce to
+a `ContractResult.success …` pattern that the `*_run` lemmas (and the `.snd`
+projection lemmas) can finish. -/
+
+attribute [grind_norm] Verity.bind Verity.pure
+attribute [grind_norm] Verity.Contract.run
+attribute [grind_norm] Verity.getStorage Verity.setStorage
+attribute [grind_norm] Verity.getStorageAddr Verity.setStorageAddr
+attribute [grind_norm] Verity.getMapping Verity.setMapping
+attribute [grind_norm] Verity.getMapping2 Verity.setMapping2
+attribute [grind_norm] Verity.getMappingUint Verity.setMappingUint
+attribute [grind_norm] Verity.msgSender Verity.contractAddress
+attribute [grind_norm] Verity.msgValue
+attribute [grind_norm] Verity.blockTimestamp Verity.blockNumber Verity.chainid
+attribute [grind_norm] Verity.require
+
+/-! ### `require` branch discharge
+
+The `verity_contract` macro elaborates `require (a <= b) msg` into
+`Verity.require (decide (a ≤ b)) msg`, which after unfolding becomes
+`fun s => if decide (a ≤ b) = true then ContractResult.success () s else …`.
+A proof-side hypothesis `h : a ≤ b` passed into `simp only […, h]` rewrites
+the inner `Prop` to `True`, leaving the residual guard
+`if decide True = true then success … else revert …`. The ground
+`simp only [grind_norm, …]` simp set does not include a rule that collapses
+this guard — without it the enclosing `Verity.bind` / `Contract.run` matches
+cannot commit to their success branch and `grind` is handed a large
+unreduced term whose storage projection it cannot see through.
+
+The lemma below is the missing rewrite. It discharges the `require` in one
+step, unblocking the rest of the monadic normalisation. -/
+
+@[grind_norm, simp]
+theorem ite_decide_True {α : Sort _} (a b : α) :
+    (if decide True = true then a else b) = a := by
+  simp
+
+end Benchmark.Grindset
diff --git a/Benchmark/Grindset/REACH_NOTES.md b/Benchmark/Grindset/REACH_NOTES.md
new file mode 100644
index 00000000..2bb78ba4
--- /dev/null
+++ b/Benchmark/Grindset/REACH_NOTES.md
@@ -0,0 +1,180 @@
+# Grindset Reach extension — design notes
+
+Worker **A3** (branch `grindset/a3-reach-grind-ext`).
+
+## TL;DR
+
+- **Reach shape in the benchmark is not inductive** — the one case
+  that genuinely uses reachability (`Safe/OwnerManagerReach`) encodes
+  it as an *existential over a witness list* (`List Address`), not as
+  `Relation.ReflTransGen` or a custom `inductive Reach` step closure.
+- `Benchmark/Grindset/Reach.lean` ships **both** flavours of closure
+  lemmas (inductive `Relation.ReflTransGen` and witness-based
+  `Reachable`/`IsChain`) so the extension is future-proof.
+- `@[grind]` tagging is **deliberately conservative**: only refl /
+  one-step / base facts are tagged. `trans` and `snoc` are not tagged
+  globally because they are too productive and cause E-matching to
+  explode on innocuous terms like `f (f (f a))`.
+- The `verity_reach_grind` macro handles the actual closure
+  obligations by `apply`-ing `reachable_preserves_invariant` /
+  `reach_preserves_invariant` before handing off to `grind`.
+
+## The four flagged cases, reach-wise
+
+| Case                                              | Reach?                                   |
+| ------------------------------------------------- | ---------------------------------------- |
+| `Kleros/SortitionTrees`                           | No — sum/storage arithmetic only         |
+| `Safe/OwnerManagerReach`                          | **Yes — list-witness `reachable`**       |
+| `Lido/VaulthubLocked`                             | No — solvency arithmetic (F-01 / P-VH-*) |
+| `PaladinVotes/StreamRecoveryClaimUsdc`            | No — claim-state updates only            |
+
+So only `Safe/OwnerManagerReach` actually benefits from a reach pack.
+The other three were presumably flagged by keyword match alone.
+
+## The concrete Reach shape in `Safe/OwnerManagerReach`
+
+From `Benchmark/Cases/Safe/OwnerManagerReach/Specs.lean` (paraphrased):
+
+```lean
+-- Linked-list next-pointer reader
+def next (s : ContractState) (a : Address) : Address :=
+  wordToAddress (s.storageMap 0 a)
+
+-- A list of addresses that walks the linked list correctly
+def isChain (s : ContractState) : List Address → Prop
+  | [] | [_]           => True
+  | a :: b :: rest     => next s a = b ∧ isChain s (b :: rest)
+
+-- Reachability via a witness chain
+def reachable (s : ContractState) (a b : Address) : Prop :=
+  ∃ chain, chain.head? = some a ∧ chain.getLast? = some b ∧ isChain s chain
+```
+
+Key observation: **reach induction here is list induction**, not
+inductive-predicate induction. This is a deliberate choice — Certora's
+`reach` predicate was replaced with a witness-style existential
+because the Safe linked list is naturally finite and the witness is a
+first-class object proofs can manipulate.
+
+## What `Reach.lean` provides
+
+### Part 1 — Inductive reach (`Relation.ReflTransGen`)
+
+For future cases that *do* use the inductive formulation (none of the
+four flagged cases do, but it's a common pattern). Lemmas tagged
+`@[grind]`:
+
+| Lemma                       | Role                                          |
+| --------------------------- | --------------------------------------------- |
+| `reach_refl`                | `ReflTransGen r a a`                          |
+| `reach_of_step`             | single step ⇒ reach                           |
+| `reach_tail` / `reach_head` | snoc / cons extension                         |
+| `reach_trans`               | transitivity                                  |
+
+Plus an un-tagged closure lemma:
+
+| Lemma                       | Role                                          |
+| --------------------------- | --------------------------------------------- |
+| `reach_preserves_invariant` | `(∀ x y, r x y → P x → P y) → ∀ a b, R* a b → P a → P b` |
+
+### Part 2 — Witness-based reach (`Reachable` / `IsChain`)
+
+Generic over `σ` (state) and `α` (node). Definitions mirror the Safe
+case verbatim. Lemmas:
+
+| Lemma                          | Tagged `@[grind]`? | Role                                   |
+| ------------------------------ | ------------------ | -------------------------------------- |
+| `isChain_nil`, `isChain_singleton` | yes            | base cases                             |
+| `isChain_cons_cons`            | `@[simp]` only     | Iff unfolding (pattern too generic for grind) |
+| `isChain_tail`                 | no                 | structural lemma                       |
+| `reachable_refl`               | yes                | `Reachable step s a a`                 |
+| `reachable_step`               | yes                | `Reachable step s a (step s a)`        |
+| `reachable_of_step`            | yes                | alias of `reachable_step`              |
+| `reachable_snoc`               | **no** (loops)     | extend reach by one step               |
+| `reachable_trans`              | **no** (loops)     | transitivity                           |
+| `reachable_preserves_invariant`| no                 | the canonical closure lemma            |
+
+### Part 3 — The `verity_reach_grind` tactic
+
+A macro that:
+
+1. First tries `apply reachable_preserves_invariant <;> grind` — this
+   is the canonical shape of nearly every reach-closure obligation.
+2. Falls back to `apply reach_preserves_invariant <;> grind` for the
+   inductive `ReflTransGen` variant.
+3. Falls back to plain `grind` (base facts are already tagged).
+4. As a last resort, retries `grind` with `snoc`/`trans` as explicit
+   hints (will usually time out — only useful for tiny chains).
+
+## Why trans/snoc are **not** globally `@[grind]`
+
+Empirically, tagging `reachable_trans` and `reachable_snoc` makes
+`grind`'s E-matcher produce thousands of spurious instances such as
+
+```
+Reachable chainStep f (chainStep f (chainStep f (chainStep f b))) (chainStep f (chainStep f a))
+```
+
+because every existing `Reachable …` fact matches their first hypothesis
+pattern and every `chainStep _ _` term plausibly matches the step
+pattern. The E-matching "maximum rounds" threshold is hit in <1s.
+
+Leaving them as explicit hints (or arguments to
+`verity_reach_grind`'s inner `grind`) scopes them to situations where
+a manual `apply` has already fixed the relevant endpoints.
+
+## Demo proofs
+
+`Benchmark/Grindset/ReachTests.lean` contains:
+
+1. `demo_reach_preserves_P` — `Relation.ReflTransGen`-style invariant
+   preservation, closed by `verity_reach_grind`.
+2. `demo_chain_reach_preserves_membership` — the witness-based analogue
+   (`Reachable chainStep f a b → a ∈ S → b ∈ S` assuming `S`
+   step-closed), also closed by `verity_reach_grind`. This is the
+   exact shape used in the Safe case.
+
+Both are authored from the specs + contract side only — no peeking at
+`Proofs.lean`.
+
+There is also a concrete three-step chain example using
+`reachable_step` + `reachable_trans` to sanity-check composition.
+
+## Applicability estimate
+
+| Case                                   | Helps via this pack?                                    |
+| -------------------------------------- | ------------------------------------------------------- |
+| `Safe/OwnerManagerReach`               | **Partially.** `reachable_preserves_invariant` closes generic closure obligations (e.g. `reachableInList` propagation), but the *non-trivial* Safe theorems (`inListReachable`, acyclicity, unique predecessor after `addOwner`/`removeOwner`/`swapOwner`) require case-specific reasoning about how `next` is mutated at a handful of specific keys. The pack turns "induction on reach" into one-liner `verity_reach_grind`, but the surrounding `next`-mutation algebra is still the hard part. Estimate: closes ≤ 30–40% of obligations end-to-end. |
+| `Kleros/SortitionTrees`                | No — no reach relation. Needs S1's arithmetic grindset. |
+| `Lido/VaulthubLocked`                  | No — no reach relation. Needs S1's arithmetic grindset. |
+| `PaladinVotes/StreamRecoveryClaimUsdc` | No — no reach relation. Needs S1's arithmetic grindset. |
+
+So exactly **one** of the four cases actually benefits from the reach
+pack. The other three were misclassified as reach-heavy.
+
+## Limitations
+
+- The witness-based lemmas are generic over `step : σ → α → α`. Safe's
+  `next s a = wordToAddress (s.storageMap 0 a)` fits this shape, but
+  any case using a *relational* step (`next s a = b` as an arbitrary
+  predicate, not a function) would need a small adapter to bridge to
+  `Relation.ReflTransGen`. Not currently needed.
+- `verity_reach_grind` will happily spin on goals that are **not**
+  reach-closure shaped (plain `grind` will then hit limits); it is not
+  a universal solver.
+- The E-matching patterns for `reachable_trans`/`reachable_snoc` are
+  intentionally omitted — re-adding them as `@[grind →]` would loop.
+  If a future need arises, attach an explicit `grind_pattern` tied to
+  a unique top-level symbol.
+- `isChain_cons_cons` is only `@[simp]`, not `@[grind]` — its pattern
+  is too unconstrained for the E-matcher (matches every cons-cons
+  expression).
+
+## Open questions for S1
+
+- If the merged grindset adds a general `Verity.Specs`-level
+  `Reachable` alias, `Benchmark.Grindset.Reach.Reachable` can be
+  re-expressed as a direct `attribute [grind]` re-tag rather than a
+  new namespaced definition.
+- Worth checking whether mathlib's `Relation.TransGen`/`EqvGen` need
+  analogous packs — not currently exercised by any benchmark case.
diff --git a/Benchmark/Grindset/Reach.lean b/Benchmark/Grindset/Reach.lean
new file mode 100644
index 00000000..f5cd0be5
--- /dev/null
+++ b/Benchmark/Grindset/Reach.lean
@@ -0,0 +1,345 @@
+import Verity.Specs.Common
+import Mathlib.Logic.Relation
+import Mathlib.Data.List.Basic
+
+/-!
+# Grindset: Reach closure extension
+
+Custom `grind` attribute pack and a bespoke tactic (`verity_reach_grind`)
+for discharging reachability / reach-closure obligations that recur
+across several Verity benchmark cases.
+
+## Reach shapes actually found in the benchmark
+
+We inspected the four cases flagged as reachability-heavy. Only one of
+them uses a real reach relation; the others turned out to be arithmetic
+or ownership specs with no transitive closure:
+
+* `Benchmark/Cases/Safe/OwnerManagerReach` — **does** use reach. The
+  shape is *witness-based*, not inductive:
+
+  ```
+  def isChain (s : ContractState) : List Address → Prop
+    | []           => True
+    | [_]          => True
+    | a :: b :: t  => next s a = b ∧ isChain s (b :: t)
+
+  def reachable (s : ContractState) (a b : Address) : Prop :=
+    ∃ chain, chain.head? = some a
+           ∧ chain.getLast? = some b
+           ∧ isChain s chain
+  ```
+
+* `Benchmark/Cases/Kleros/SortitionTrees` — storage arithmetic
+  invariants, no reach relation.
+* `Benchmark/Cases/Lido/VaulthubLocked` — solvency arithmetic (F-01),
+  no reach relation.
+* `Benchmark/Cases/PaladinVotes/StreamRecoveryClaimUsdc` — claim-state
+  updates, no reach relation.
+
+Because only `Safe/OwnerManagerReach` is genuinely reach-heavy we focus
+on its shape. We *also* provide a generic pack for
+`Relation.ReflTransGen` (the standard mathlib inductive transitive
+closure) so that future cases that pick the inductive formulation will
+be covered out of the box.
+-/
+
+set_option linter.unusedSectionVars false
+
+namespace Benchmark.Grindset.Reach
+
+open Verity
+open Verity.EVM.Uint256
+
+/-! ## Part 1 — Generic inductive reach via `Relation.ReflTransGen`
+
+`Relation.ReflTransGen r a b` is the reflexive–transitive closure of a
+step relation `r : α → α → Prop`. Useful closure lemmas are already
+provided by mathlib; we re-export them under `@[grind]` so `grind` can
+chain steps and preserve step-wise invariants automatically.
+-/
+
+section ReflTransGen
+variable {α : Type*} {r : α → α → Prop}
+
+-- Reflexivity is the obvious "no step" base case.
+@[grind]
+theorem reach_refl (a : α) : Relation.ReflTransGen r a a :=
+  Relation.ReflTransGen.refl
+
+-- One step is already reach.
+@[grind]
+theorem reach_of_step {a b : α} (h : r a b) : Relation.ReflTransGen r a b :=
+  Relation.ReflTransGen.single h
+
+-- Snoc: extend a reach by a final step (native mathlib shape).
+@[grind]
+theorem reach_tail {a b c : α}
+    (h₁ : Relation.ReflTransGen r a b) (h₂ : r b c) :
+    Relation.ReflTransGen r a c :=
+  Relation.ReflTransGen.tail h₁ h₂
+
+-- Cons: prefix a reach by an initial step.
+@[grind]
+theorem reach_head {a b c : α}
+    (h₁ : r a b) (h₂ : Relation.ReflTransGen r b c) :
+    Relation.ReflTransGen r a c :=
+  Relation.ReflTransGen.head h₁ h₂
+
+-- Transitivity.
+@[grind]
+theorem reach_trans {a b c : α}
+    (h₁ : Relation.ReflTransGen r a b) (h₂ : Relation.ReflTransGen r b c) :
+    Relation.ReflTransGen r a c :=
+  Relation.ReflTransGen.trans h₁ h₂
+
+/--
+Invariant preservation under `ReflTransGen`. If `P` is preserved by
+every `r`-step, then `P` is preserved by `ReflTransGen r`.
+
+This is the *canonical* "reach-closure" lemma and the thing `grind`
+has the hardest time synthesising on its own, because it hides an
+induction on the reach derivation.
+-/
+theorem reach_preserves_invariant
+    {P : α → Prop}
+    (hStep : ∀ x y, r x y → P x → P y)
+    {a b : α} (hR : Relation.ReflTransGen r a b) (hP : P a) : P b := by
+  induction hR with
+  | refl => exact hP
+  | tail _ hrxy ih => exact hStep _ _ hrxy ih
+
+end ReflTransGen
+
+/-! ## Part 2 — Witness-based reach (`isChain` / `reachable` shape)
+
+This is the shape actually used in `Safe/OwnerManagerReach`. We don't
+import that module (we want `Grindset.Reach` to be self-contained and
+reusable), so we reproduce the shape generically over a *step function*
+`step : σ → α → α` and derive the same closure theorems. A user who
+has their own `reachable` and `isChain` can then just plumb through
+these lemmas with a one-line adapter.
+-/
+
+section ChainReach
+variable {σ : Type*} {α : Type*}
+
+/-- A chain is a list where consecutive elements are connected by
+`step s`. Mirrors `Safe.OwnerManagerReach.isChain` generically. -/
+def IsChain (step : σ → α → α) (s : σ) : List α → Prop
+  | []          => True
+  | [_]         => True
+  | a :: b :: t => step s a = b ∧ IsChain step s (b :: t)
+
+@[grind, simp]
+theorem isChain_nil (step : σ → α → α) (s : σ) :
+    IsChain step s ([] : List α) := trivial
+
+@[grind, simp]
+theorem isChain_singleton (step : σ → α → α) (s : σ) (a : α) :
+    IsChain step s [a] := trivial
+
+@[simp]
+theorem isChain_cons_cons (step : σ → α → α) (s : σ) (a b : α) (t : List α) :
+    IsChain step s (a :: b :: t) ↔
+      step s a = b ∧ IsChain step s (b :: t) := Iff.rfl
+
+/-- Tail of a chain is a chain. Useful for inducting over chain length. -/
+theorem isChain_tail (step : σ → α → α) (s : σ) :
+    ∀ {a : α} {t : List α}, IsChain step s (a :: t) → IsChain step s t
+  | _, [], _ => trivial
+  | _, _ :: _, h => h.2
+
+/-- Append a `step s b` tail to a chain ending at `b`. -/
+private theorem isChain_append_step (step : σ → α → α) (s : σ) (b : α) :
+    ∀ (chain : List α),
+      IsChain step s chain → chain.getLast? = some b →
+      IsChain step s (chain ++ [step s b])
+  | [], _, h => by simp [List.getLast?] at h
+  | [a], _, hlast => by
+      have ha : a = b := by simpa [List.getLast?] using hlast
+      subst ha
+      exact ⟨rfl, trivial⟩
+  | a₁ :: a₂ :: t, hch, hlast => by
+      have hstep : step s a₁ = a₂ := hch.1
+      have hrest : IsChain step s (a₂ :: t) := hch.2
+      have hlast' : (a₂ :: t).getLast? = some b := by
+        simpa [List.getLast?] using hlast
+      have ih := isChain_append_step step s b (a₂ :: t) hrest hlast'
+      -- (a₁ :: a₂ :: t) ++ [step s b] = a₁ :: ((a₂ :: t) ++ [step s b])
+      show IsChain step s (a₁ :: ((a₂ :: t) ++ [step s b]))
+      exact ⟨hstep, ih⟩
+
+/-- Witness-based reachability: there is a chain from `a` to `b`. -/
+def Reachable (step : σ → α → α) (s : σ) (a b : α) : Prop :=
+  ∃ chain : List α,
+    chain.head? = some a ∧
+    chain.getLast? = some b ∧
+    IsChain step s chain
+
+theorem reachable_refl (step : σ → α → α) (s : σ) (a : α) :
+    Reachable step s a a :=
+  ⟨[a], rfl, rfl, isChain_singleton step s a⟩
+
+theorem reachable_step (step : σ → α → α) (s : σ) (a : α) :
+    Reachable step s a (step s a) :=
+  ⟨[a, step s a], rfl, rfl, ⟨rfl, trivial⟩⟩
+
+/--
+A single forward step preserves reachability: if `Reachable s a b`
+then `Reachable s a (step s b)`. This is the most common closure
+lemma in practice (the Safe proofs repeatedly extend a witnessed
+chain by one hop).
+-/
+theorem reachable_snoc (step : σ → α → α) (s : σ)
+    {a b : α} (h : Reachable step s a b) :
+    Reachable step s a (step s b) := by
+  obtain ⟨chain, hhd, hlast, hch⟩ := h
+  refine ⟨chain ++ [step s b], ?_, ?_, ?_⟩
+  · -- head of chain ++ [x] is head of chain when chain ≠ []
+    cases chain with
+    | nil => simp [List.head?] at hhd
+    | cons c cs => simpa [List.head?] using hhd
+  · -- last of chain ++ [x] is x
+    simp
+  · exact isChain_append_step step s b chain hch hlast
+
+/-- Transitivity of chain-reachability (concatenation of witnesses). -/
+theorem reachable_trans (step : σ → α → α) (s : σ)
+    {a b c : α} (h1 : Reachable step s a b) (h2 : Reachable step s b c) :
+    Reachable step s a c := by
+  obtain ⟨chain₂, hhd₂, hlast₂, hch₂⟩ := h2
+  -- Auxiliary: walk `chain₂` and repeatedly extend the prefix reach
+  -- witness by `reachable_snoc`.
+  suffices aux : ∀ (chain : List α) (a b c : α),
+      chain.head? = some b → chain.getLast? = some c →
+      IsChain step s chain → Reachable step s a b → Reachable step s a c from
+    aux chain₂ a b c hhd₂ hlast₂ hch₂ h1
+  intro chain
+  induction chain with
+  | nil =>
+      intros _ _ _ hhd _ _ _
+      simp [List.head?] at hhd
+  | cons x xs ih =>
+      intros a b c hhd hlast hch h1
+      have hx : x = b := by simpa [List.head?] using hhd
+      cases xs with
+      | nil =>
+          have hxc : x = c := by simpa [List.getLast?] using hlast
+          have hbc : b = c := hx ▸ hxc
+          exact hbc ▸ h1
+      | cons y ys =>
+          have hstep : step s x = y := hch.1
+          have hrest : IsChain step s (y :: ys) := hch.2
+          have hlast' : (y :: ys).getLast? = some c := by
+            simpa [List.getLast?] using hlast
+          have hhd' : (y :: ys).head? = some y := rfl
+          have hstep_b : step s b = y := hx ▸ hstep
+          have hay : Reachable step s a y := by
+            have := reachable_snoc step s h1
+            rw [hstep_b] at this
+            exact this
+          exact ih a y c hhd' hlast' hrest hay
+
+/--
+**The** reach-closure lemma for the chain-witness shape:
+an invariant preserved by every `step` is preserved by `Reachable`.
+
+This is the `reach_preserves_invariant` counterpart for witness-based
+reach — see `REACH_NOTES.md` for discussion.
+-/
+theorem reachable_preserves_invariant
+    {step : σ → α → α} {s : σ} {P : α → Prop}
+    (hStep : ∀ x, P x → P (step s x))
+    {a b : α} (h : Reachable step s a b) (hP : P a) : P b := by
+  obtain ⟨chain, hhd, hlast, hch⟩ := h
+  -- Auxiliary: for any chain with head = some a, last = some b, and
+  -- `IsChain`, `P a → P b`. Proven by induction on the chain.
+  suffices aux : ∀ (chain : List α) (a b : α),
+      chain.head? = some a → chain.getLast? = some b →
+      IsChain step s chain → P a → P b from aux chain a b hhd hlast hch hP
+  intro chain
+  induction chain with
+  | nil =>
+      intros a b hhd _ _ _
+      simp [List.head?] at hhd
+  | cons x xs ih =>
+      intros a b hhd hlast hch hP
+      have hx : x = a := by simpa [List.head?] using hhd
+      cases xs with
+      | nil =>
+          have hxb : x = b := by simpa [List.getLast?] using hlast
+          have hab : a = b := hx ▸ hxb
+          exact hab ▸ hP
+      | cons y ys =>
+          have hstep : step s x = y := hch.1
+          have hrest : IsChain step s (y :: ys) := hch.2
+          have hlast' : (y :: ys).getLast? = some b := by
+            simpa [List.getLast?] using hlast
+          have hhd' : (y :: ys).head? = some y := rfl
+          have hstep_a : step s a = y := hx ▸ hstep
+          have hPy : P y := hstep_a ▸ hStep a hP
+          exact ih y b hhd' hlast' hrest hPy
+
+/-- Convenience: if reaching `a` from itself then extending by a step,
+we land exactly at `step s a`. Useful sugar for `grind`. -/
+theorem reachable_of_step (step : σ → α → α) (s : σ) (a : α) :
+    Reachable step s a (step s a) := reachable_step step s a
+
+end ChainReach
+
+-- We intentionally do NOT tag `reachable_snoc` or `reachable_trans`
+-- globally with `@[grind]` — they are too productive (each instance
+-- fires on any reachability fact in context and can loop the
+-- E-matcher). They are still handed to `grind` as explicit hints
+-- inside the `verity_reach_grind` macro in controlled situations.
+attribute [grind] reachable_refl
+attribute [grind] reachable_step
+attribute [grind] reachable_of_step
+
+/-! ## Part 3 — The `verity_reach_grind` tactic
+
+`grind`'s E-matcher is strong at rewriting and propagating equalities,
+but it cannot synthesise inductions on reach derivations on its own.
+The lemmas above ship the induction *result* as ordinary theorems, so
+most concrete obligations of the form
+
+  `Reachable step s a b → Inv a → Inv b`
+
+close via `reachable_preserves_invariant` plus `grind`'s usual
+unfolding. For trickier goals we expose a tactic macro that tries a
+plain `grind` first, then falls back to applying the closure lemmas
+before re-invoking `grind`.
+
+We deliberately use a simple `macro` (not parameterised by extra
+`grind` hints) — extra hypotheses can always be introduced by the user
+before calling `verity_reach_grind` and `grind` will pick them up.
+-/
+
+/--
+`verity_reach_grind` is a small wrapper over `grind` that makes the
+standard reach-closure lemmas available as hints. If the direct
+`grind` attempt fails, it tries `reachable_preserves_invariant` /
+`reach_preserves_invariant` and re-runs `grind` in each subgoal.
+-/
+macro (name := verity_reach_grind) "verity_reach_grind" : tactic =>
+  `(tactic|
+    first
+    -- 1. Try the canonical reach-preservation closure first. This
+    --    handles the overwhelmingly common "Reach … → Inv … → Inv …"
+    --    shape by applying `*_preserves_invariant` and dispatching
+    --    the step-preservation subgoal by `grind`.
+    | (apply Benchmark.Grindset.Reach.reachable_preserves_invariant <;>
+        first | assumption | grind)
+    | (apply Benchmark.Grindset.Reach.reach_preserves_invariant <;>
+        first | assumption | grind)
+    -- 2. Plain `grind` (no snoc/trans, to avoid E-matcher loops). The
+    --    cheap closure facts (`refl`, `step`, `of_step`) are already
+    --    globally tagged `@[grind]` and will fire automatically.
+    | grind
+    -- 3. Last-ditch: include the productive lemmas explicitly. Only
+    --    useful for tiny finite chains; usually hits thresholds.
+    | grind [reach_trans, reach_tail, reach_head,
+             reachable_snoc, reachable_trans])
+
+end Benchmark.Grindset.Reach
diff --git a/Benchmark/Grindset/ReachTests.lean b/Benchmark/Grindset/ReachTests.lean
new file mode 100644
index 00000000..78e5bdb7
--- /dev/null
+++ b/Benchmark/Grindset/ReachTests.lean
@@ -0,0 +1,102 @@
+import Benchmark.Grindset.Reach
+import Mathlib.Logic.Relation
+
+/-!
+# Grindset: Reach closure — demo proofs
+
+These two tests demonstrate that the `Reach.lean` extension really
+does close reach-closure obligations. They are *independent* of any
+case's `Proofs.lean` — both theorems are authored from scratch using
+only the specs side (an abstract `step` / `next` function and a
+user-supplied step-preservation hypothesis).
+
+Both tests are closed using `verity_reach_grind`, the macro defined in
+`Benchmark.Grindset.Reach`.
+-/
+
+set_option linter.unusedSectionVars false
+
+namespace Benchmark.Grindset.Reach.Tests
+
+open Benchmark.Grindset.Reach
+
+/-! ## Demo 1 — inductive `ReflTransGen` invariant preservation
+
+A small linked-list style state: the state is a function `Nat → Nat`
+mapping each slot to the "next" slot. The step relation says `a` can
+step to `b` in state `f` iff `f a = b`. We prove that any invariant
+which is preserved by one step is preserved under the full transitive
+closure — the standard "reach-preserves-invariant" shape.
+
+This closes via the generic `ReflTransGen`-tagged lemmas.
+-/
+
+def stepRel (f : Nat → Nat) (a b : Nat) : Prop := f a = b
+
+/--
+If `P` is closed under `stepRel f` then `P` is closed under
+`Relation.ReflTransGen (stepRel f)`. Closed by `verity_reach_grind`.
+-/
+theorem demo_reach_preserves_P
+    (f : Nat → Nat) (P : Nat → Prop)
+    (hStep : ∀ x, P x → P (f x))
+    (a b : Nat) (hR : Relation.ReflTransGen (stepRel f) a b) (hPa : P a) :
+    P b := by
+  have hStep' : ∀ x y, stepRel f x y → P x → P y := by
+    intro x y hxy hPx
+    -- stepRel f x y unfolds to f x = y
+    have : f x = y := hxy
+    exact this ▸ hStep x hPx
+  -- Our macro tries plain grind first, then the closure lemma.
+  verity_reach_grind
+
+/-! ## Demo 2 — chain-witness reach preserves a set-membership invariant
+
+Here we mirror the exact shape used in `Safe/OwnerManagerReach`: a
+witnessed chain `Reachable step s a b`, a state-dependent step
+function, and an invariant (membership in a step-closed set) that must
+propagate along the chain.
+
+The proof is closed using `verity_reach_grind`, which invokes
+`reachable_preserves_invariant` under the hood.
+-/
+
+/--
+State type: a function from `Nat` (a node) to its successor. The step
+function is just state application.
+-/
+def chainStep (f : Nat → Nat) (a : Nat) : Nat := f a
+
+/--
+If a set `S` is closed under `chainStep f` (i.e. `x ∈ S → f x ∈ S`)
+and `Reachable (chainStep) f a b` holds, then `a ∈ S → b ∈ S`.
+
+This is the *exact* reach-closure obligation pattern from the Safe
+OwnerManagerReach specs (once one specialises `σ := ContractState`,
+`α := Address`, `chainStep := next`, and takes `S` to be any
+`next`-closed address set such as "nodes reachable from SENTINEL").
+-/
+theorem demo_chain_reach_preserves_membership
+    (f : Nat → Nat) (S : Set Nat)
+    (hClosed : ∀ x, x ∈ S → f x ∈ S)
+    (a b : Nat) (hR : Reachable chainStep f a b) (hA : a ∈ S) :
+    b ∈ S := by
+  -- `chainStep f x = f x` by definition, so membership-closure under
+  -- `f` is exactly membership-closure under `chainStep`.
+  have hStep : ∀ x, x ∈ S → chainStep f x ∈ S := hClosed
+  verity_reach_grind
+
+/-! ## Sanity: the closure lemmas also let `grind` chain concrete steps -/
+
+/-- Three-step chain: builds a reach by stacking `reachable_step`. -/
+example (f : Nat → Nat) (a : Nat) :
+    Reachable chainStep f a (f (f (f a))) := by
+  -- Each `reachable_step` gives one hop; the trans lemma chains them.
+  have h1 : Reachable chainStep f a (f a) := reachable_step chainStep f a
+  have h2 : Reachable chainStep f (f a) (f (f a)) :=
+    reachable_step chainStep f (f a)
+  have h3 : Reachable chainStep f (f (f a)) (f (f (f a))) :=
+    reachable_step chainStep f (f (f a))
+  exact reachable_trans chainStep f (reachable_trans chainStep f h1 h2) h3
+
+end Benchmark.Grindset.Reach.Tests
diff --git a/Benchmark/Grindset/Tests.lean b/Benchmark/Grindset/Tests.lean
new file mode 100644
index 00000000..89dbe044
--- /dev/null
+++ b/Benchmark/Grindset/Tests.lean
@@ -0,0 +1,89 @@
+/-
+  Benchmark.Grindset.Tests — demonstration proofs closed by a single `grind`.
+
+  These proofs are written from scratch against `Specs.lean` + `Contract.lean`.
+  They deliberately do NOT import any `Proofs.lean` from under
+  `Benchmark/Cases/` — the held-out ground truth is never consulted.
+
+  Each demo theorem has the same shape as the sorry-stubs in
+  `Benchmark/Generated/.../Tasks/*.lean`, and is discharged by a single
+  invocation of `grind` (plus, where needed, an `unfold` of the spec
+  predicate).
+-/
+
+import Benchmark.Grindset.Core
+import Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.Specs
+import Benchmark.Cases.Lido.VaulthubLocked.Specs
+
+namespace Benchmark.Grindset.Tests
+
+open Verity
+open Verity.EVM.Uint256
+
+/-! ## SideEntrance.deposit: slot-write spec -/
+
+/--
+Demo #1: `deposit` writes `add oldPoolBalance amount` to `poolBalance`.
+Closed by a single `grind` call once we unfold the spec predicate and
+the contract function.
+-/
+theorem demo_deposit_sets_pool_balance
+    (amount : Verity.Core.Uint256)
+    (s : ContractState) :
+    let s' :=
+      ((Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.deposit amount).run s).snd
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.deposit_sets_pool_balance_spec
+      amount s s' := by
+  simp only [grind_norm,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.deposit_sets_pool_balance_spec,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.deposit,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.poolBalance,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.totalCredits,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.creditOf]
+  grind
+
+/--
+Demo #2: `deposit` credits the caller's mapping slot by `amount`.
+This is the "mapping + sender" variant; we rely on
+`storageMap_setMapping_sender_eq` (from `Core.lean`) plus `grind_norm` to
+collapse the monadic do-block.
+-/
+theorem demo_deposit_sets_sender_credit
+    (amount : Verity.Core.Uint256)
+    (s : ContractState) :
+    let s' :=
+      ((Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.deposit amount).run s).snd
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.deposit_sets_sender_credit_spec
+      amount s s' := by
+  simp only [grind_norm,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.deposit_sets_sender_credit_spec,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.deposit,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.poolBalance,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.totalCredits,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.creditOf]
+  grind
+
+/--
+Demo #3: `flashLoanViaDeposit` preserves pool balance. This is a branchy
+case because the function body starts with a `require (amount <= oldPoolBalance)`.
+The precondition `hBorrow` discharges the branch; the remaining reasoning is
+the same slot-write logic as `deposit`.
+-/
+theorem demo_flashLoanViaDeposit_preserves_pool_balance
+    (amount : Verity.Core.Uint256)
+    (s : ContractState)
+    (hBorrow : amount <= s.storage 0) :
+    let s' :=
+      ((Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.flashLoanViaDeposit
+          amount).run s).snd
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.flashLoanViaDeposit_preserves_pool_balance_spec
+      amount s s' := by
+  simp only [grind_norm,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.flashLoanViaDeposit_preserves_pool_balance_spec,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.flashLoanViaDeposit,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.poolBalance,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.totalCredits,
+    Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.creditOf, hBorrow]
+  grind
+
+end Benchmark.Grindset.Tests
diff --git a/harness/PROMPT.md b/harness/PROMPT.md
index a38026de..7a08a75d 100644
--- a/harness/PROMPT.md
+++ b/harness/PROMPT.md
@@ -6,6 +6,46 @@ Each task gives the agent:
 - one editable proof file
 - one theorem target
 
-The agent must return the full proof file. It must not change specs, change implementations, or rely on hidden solved proofs.
+The agent must return the full proof file. It must not change specs, change
+implementations, or rely on hidden solved proofs.
 
-The harness rejects placeholders, runs Lean in a temp workspace, and checks the target theorem.
+The harness rejects placeholders, runs Lean in a temp workspace, and checks
+the target theorem.
+
+## Proof strategy
+
+Every generated task skeleton already imports `Benchmark.Grindset` and starts
+with a grind-first body of the form:
+
+```lean
+theorem foo ... := by
+  unfold foo_spec
+  grind [ContractName.fn, ContractName.fieldA, ContractName.fieldB]
+```
+
+That is the pattern to keep. Your first attempt should always be:
+
+1. Keep `unfold <spec_name>` on the first line of the proof.
+2. Call `grind [ContractName.fn, <every storage field the function touches>]`.
+   Include every storage field declared inside `verity_contract ContractName`
+   — extra hints are cheap, missing hints are expensive. Do NOT hint the
+   generic operational lemmas (`getStorage`, `setStorage`, `Verity.bind`,
+   `Contract.run`, `ContractResult.snd`, …); they are already tagged
+   `@[grind]` by `Benchmark.Grindset`.
+3. If the goal has a case split, introduce the branch hypotheses with
+   `by_cases` BEFORE the `grind` call and pass each hypothesis into the
+   `grind [...]` list alongside the contract hints.
+4. If `grind` leaves goals open, call `grind?` once on the stuck state. It
+   prints the concrete lemma set grind chose; copy any additions you see back
+   into your `grind [...]` hint list, then retry.
+5. Only if `grind` still fails after the above, fall back to the simp-heavy
+   recipe in `harness/PROOF_PATTERNS.md` (`simp` / `simp_all` with the
+   operational lemmas enumerated explicitly, optionally finished with
+   `native_decide`).
+
+Do not remove `import Benchmark.Grindset`, do not remove `unfold <spec>`, and
+do not revert to a pure `simp`-only pattern unless you have first tried
+`grind` with a complete hint list and observed it fail.
+
+See `harness/PROOF_PATTERNS.md` for worked examples of both the grind-first
+primary pattern and the simp/`by_cases` fallback.
diff --git a/harness/PROOF_PATTERNS.md b/harness/PROOF_PATTERNS.md
index 8d2c337d..a47ae971 100644
--- a/harness/PROOF_PATTERNS.md
+++ b/harness/PROOF_PATTERNS.md
@@ -2,61 +2,125 @@
 
 Use public operational proof patterns, not hidden case solutions.
 
-Verity execution proofs often reduce with `simp` once the execution path is fixed.
-Typical symbols to unfold or simplify are:
-
-- `getStorage`, `setStorage`, `setMapping`, `setMappingUint`
-- `Verity.require`, `Verity.bind`, `Bind.bind`
-- `Verity.pure`, `Pure.pure`
-- `Contract.run`, `ContractResult.snd`
-- the contract's storage labels, such as `ContractName.counter`
-
-The simp set MUST include ALL storage field definitions from the contract. Storage fields are declared as `fieldName : Uint256 := slot N` inside `verity_contract`. Include each one by name (e.g., `ContractName.depositCount`, `ContractName.chainStarted`) so that `.slot` reduces to the concrete slot number. Without these, simp leaves unresolved `if` expressions comparing `s.storage ContractName.field.slot` against constants.
-
-Common pattern for a successful-path slot-write theorem:
+Lean 4.22's `grind` tactic is the primary closer for Verity execution proofs.
+Every generated task skeleton imports `Benchmark.Grindset`, which bundles the
+`@[grind]`-tagged operational lemmas (`getStorage`, `setStorage`,
+`setMapping`, `setMappingUint`, `Verity.require`, `Verity.bind`, `Bind.bind`,
+`Verity.pure`, `Pure.pure`, `Contract.run`, `ContractResult.snd`, and friends)
+needed to reduce Verity execution terms. You should lean on `grind` first and
+only fall back to `simp`/`by_cases` if grind leaves goals open.
+
+## Primary: grind-first pattern
+
+Start with `unfold` on the spec name followed by `grind [...]` passing the
+contract function you are reasoning about and every storage field it touches.
+Storage fields are declared as `fieldName : Uint256 := slot N` inside
+`verity_contract`; hint each one by its fully-qualified name
+(e.g. `ContractName.depositCount`, `ContractName.chainStarted`) so `grind` can
+reduce `.slot` to the concrete slot number.
 
 ```lean
-private theorem slot_write_helper
+theorem slot_write_theorem
     (x : Uint256) (s : ContractState)
     (hGuard : ...) :
     let s' := ((ContractName.fn x).run s).snd
-    s'.storage slot = expected := by
-  simp [ContractName.fn, hGuard, ContractName.slotField,
-    getStorage, setStorage, Verity.require, Verity.bind, Bind.bind,
-    Verity.pure, Pure.pure, Contract.run, ContractResult.snd]
+    spec_name x s s' := by
+  unfold spec_name
+  grind [ContractName.fn,
+         ContractName.fieldA, ContractName.fieldB, ContractName.fieldC]
 ```
 
-Common pattern for a branch theorem:
+Rules of thumb for the grind hint list:
+
+- Always include `ContractName.fn` for the contract function under test.
+- Always include every storage field of `ContractName` that the function
+  reads or writes (when in doubt, include them all — extra hints are cheap).
+- If the spec references another helper function (e.g. `computedClaimAmount`),
+  add that helper name too so `grind` can unfold it.
+- You do NOT need to hint the operational lemmas (`getStorage`, `setStorage`,
+  `Verity.bind`, `Contract.run`, `ContractResult.snd`, ...). They are already
+  tagged `@[grind]` via `Benchmark.Grindset`.
+
+If `grind` leaves the goal visibly closer but not closed, use `grind?` once
+to print the actual lemma set it chose; copy any useful additions back into
+your `grind [...]` hint list, then retry.
+
+## Branching with grind
+
+When the contract has a case split (an `ite`, a `require` with a non-trivial
+condition, or nested `if`s in the spec), prove the branch facts first and
+pass them to `grind` along with the usual hints:
 
 ```lean
-by_cases hBranch : condition
-· simp [ContractName.fn, hBranch, ...]
-· have hNotBranch : ¬ condition := hBranch
-  simp [ContractName.fn, hNotBranch, ...]
+theorem branch_theorem ... := by
+  by_cases hBranch : condition
+  · unfold spec_name
+    grind [ContractName.fn, ContractName.field, hBranch]
+  · have hNotBranch : ¬ condition := hBranch
+    unfold spec_name
+    grind [ContractName.fn, ContractName.field, hNotBranch]
 ```
 
-Do not use `split` on the final post-state goal unless the goal itself is explicitly a conjunction or a sum-type elimination. Generated Verity execution terms often simplify better if you first prove the exact branch facts used by the contract and then call `simp`.
+For nested conditionals (e.g. a threshold check inside a deposit-size check),
+nest `by_cases` the same way and put every branch hypothesis into the
+`grind [...]` list:
 
-For arithmetic threshold branches, the negated fact often needs to be restated in the comparator form used by the generated code. Example:
+```lean
+by_cases hBig : depositAmount >= 32000000000
+· by_cases hThresh : add (s.storage 1) 1 = 65536
+  · grind [ContractName.fn, ContractName.field, hCount, hMin, hBig, hThresh]
+  · grind [ContractName.fn, ContractName.field, hCount, hMin, hBig, hThresh]
+· grind [ContractName.fn, ContractName.field, hCount, hMin, hBig]
+```
+
+For arithmetic threshold branches, restate the negated fact in the comparator
+form used by the generated code before handing it to `grind`:
 
 ```lean
 have hNotFull : ¬ 32000000000 ≤ depositAmount := Nat.not_le_of_lt hSmall
-simp [ContractName.fn, hCount, hMin, hNotFull, ...]
+grind [ContractName.fn, ContractName.field, hCount, hMin, hNotFull]
 ```
 
-If one theorem has to work for both sides of a branch, prove two private helpers first, one per branch, then use `by_cases` in the public theorem and `simpa using` the matching helper.
+If one theorem has to work for both sides of a branch, prove two private
+helpers first (one per branch, each closed by `grind`), then `by_cases` in
+the public theorem and finish each branch with `exact helper_branch ...`.
+
+## Fallback: simp + by_cases
 
-If `simp` leaves nested `match`/`if` expressions with free variables, use `by_cases` on each unresolved condition BEFORE calling `simp`, not `split` after. Pass all case hypotheses to `simp`. For contracts with nested conditionals (e.g., a threshold check inside a deposit-size check), nest `by_cases`:
+If `grind` still leaves goals after you have unfolded the spec and hinted the
+contract function plus every storage field, fall back to the pre-grindset
+simp-heavy recipe. This is strictly a fallback; prefer to extend the `grind`
+hint list first.
 
 ```lean
-by_cases hBig : depositAmount >= 32000000000
-· by_cases hThresh : add (s.storage 1) 1 = 65536
-  · simp [ContractName.fn, getStorage, setStorage, ..., hCount, hMin, hBig, hThresh]
-  · simp [ContractName.fn, getStorage, setStorage, ..., hCount, hMin, hBig, hThresh]
-· simp [ContractName.fn, getStorage, setStorage, ..., hCount, hMin, hBig]
+-- Fallback when grind alone does not close:
+by_cases hBranch : condition
+· simp [ContractName.fn, hBranch, ContractName.slotField,
+    getStorage, setStorage, Verity.require, Verity.bind, Bind.bind,
+    Verity.pure, Pure.pure, Contract.run, ContractResult.snd]
+· have hNotBranch : ¬ condition := hBranch
+  simp [ContractName.fn, hNotBranch, ContractName.slotField,
+    getStorage, setStorage, Verity.require, Verity.bind, Bind.bind,
+    Verity.pure, Pure.pure, Contract.run, ContractResult.snd]
 ```
 
-If `simp` leaves unsolved goals because a hypothesis uses a spec helper name (e.g., `computedClaimAmount`) while the goal has the definition already unfolded, use `simp_all` instead of `simp`. `simp_all` rewrites hypotheses into the goal context, resolving name mismatches automatically. Pattern:
+The simp set MUST include every storage field definition from the contract.
+Without them, `simp` leaves unresolved `if` expressions comparing
+`s.storage ContractName.field.slot` against constants.
+
+Do not use `split` on the final post-state goal unless the goal itself is
+explicitly a conjunction or a sum-type elimination. Generated Verity
+execution terms often simplify better if you first prove the exact branch
+facts used by the contract and then call `simp`.
+
+If `simp` leaves nested `match`/`if` expressions with free variables, use
+`by_cases` on each unresolved condition BEFORE calling `simp`, not `split`
+after. Pass all case hypotheses to `simp`.
+
+If `simp` leaves unsolved goals because a hypothesis uses a spec helper name
+(e.g., `computedClaimAmount`) while the goal has the definition already
+unfolded, use `simp_all` instead of `simp`. `simp_all` rewrites hypotheses
+into the goal context, resolving name mismatches automatically.
 
 ```lean
 unfold specName
@@ -66,9 +130,9 @@ simp_all [ContractName.fn, getStorage, setStorage, getMapping, setMapping,
           specHelper]
 ```
 
-If `simp` reduces the goal to concrete slot equalities or a finite `if` over concrete slot numbers, `native_decide` or `decide` often closes the remaining goal.
-
-Typical shape:
+If `simp` reduces the goal to concrete slot equalities or a finite `if` over
+concrete slot numbers, `native_decide` or `decide` often closes the remaining
+goal:
 
 ```lean
 have hSlot : s'.storage slot = expected := by
@@ -76,7 +140,8 @@ have hSlot : s'.storage slot = expected := by
   native_decide
 ```
 
-If `simp` already solves the goal, do not leave a trailing `decide`, `exact`, or extra tactic line after it; Lean will report `no goals to be solved`.
+If `simp` already solves the goal, do not leave a trailing `decide`, `exact`,
+or extra tactic line after it; Lean will report `no goals to be solved`.
 
 If the public theorem is just a named spec, it is often cleaner to:
 
diff --git a/harness/README.md b/harness/README.md
index 71cb8a40..997bed04 100644
--- a/harness/README.md
+++ b/harness/README.md
@@ -23,10 +23,12 @@ Core files:
 - `harness/agents/*.json`: bundled profiles
 
 Bundled profiles:
-- `default`: repo reference profile
-- `interactive`: minimal-tool interactive profile
+- `default`: repo reference profile (strict, builtin/fast via proxy)
 - `openai-compatible`: generic external OpenAI-compatible profile
 - `openai-proxy-fast`: pinned proxy profile
+- `interactive-gpt`: interactive, OpenRouter `openai/gpt-5.4`
+- `interactive-opus`: interactive, OpenRouter `anthropic/claude-opus-4.7`
+- `interactive-smart`: interactive, `builtin/smart` via configured proxy
 
 Runtime modes:
 - `strict`: no agent tools
diff --git a/harness/agents/builtin-smart.json b/harness/agents/builtin-smart.json
deleted file mode 100644
index 1cf2d9e5..00000000
--- a/harness/agents/builtin-smart.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "schema_version": 1,
-  "agent_id": "verity-benchmark-builtin-smart",
-  "track": "reference",
-  "run_slug": "builtin-smart",
-  "adapter": "openai_compatible",
-  "base_url": "https://agent-backend.thomas.md/v1",
-  "base_url_env": null,
-  "model": "builtin/smart",
-  "model_env": null,
-  "api_key": null,
-  "api_key_env": "VERITY_BENCHMARK_AGENT_API_KEY",
-  "chat_completions_path": "/chat/completions",
-  "models_path": "/models",
-  "system_prompt_files": [
-    "harness/PROMPT.md",
-    "harness/POLICY.md",
-    "harness/TOOLS.md",
-    "harness/PROOF_PATTERNS.md"
-  ],
-  "mode": "strict",
-  "temperature": 0.0,
-  "max_completion_tokens": 2000,
-  "max_attempts": 8,
-  "max_tool_calls": 24,
-  "headers": {},
-  "header_envs": {},
-  "extra_body": {
-    "thinking": {
-      "type": "disabled"
-    }
-  },
-  "request_timeout_seconds": 120
-}
diff --git a/harness/agents/combined-lean-tools.json b/harness/agents/interactive-gpt.json
similarity index 67%
rename from harness/agents/combined-lean-tools.json
rename to harness/agents/interactive-gpt.json
index e8fcfa17..2c1b823e 100644
--- a/harness/agents/combined-lean-tools.json
+++ b/harness/agents/interactive-gpt.json
@@ -1,11 +1,12 @@
 {
   "schema_version": 1,
-  "agent_id": "combined-lean-tools",
+  "agent_id": "interactive-gpt",
+  "mode": "interactive",
   "track": "custom",
-  "run_slug": "combined-lean-tools",
+  "run_slug": "interactive-gpt-5-4",
   "adapter": "openai_compatible",
   "base_url": "https://openrouter.ai/api/v1",
-  "model": "google/gemini-3.1-flash-lite-preview",
+  "model": "openai/gpt-5.4",
   "api_key_env": "OPENROUTER_API_KEY",
   "chat_completions_path": "/chat/completions",
   "models_path": "/models",
@@ -15,13 +16,12 @@
     "harness/TOOLS.md",
     "harness/PROOF_PATTERNS.md"
   ],
-  "mode": "interactive",
   "temperature": 0.0,
-  "max_completion_tokens": 2000,
-  "max_attempts": 12,
-  "max_tool_calls": 24,
+  "max_completion_tokens": 4096,
+  "max_attempts": 32,
+  "max_tool_calls": 80,
   "headers": {},
   "header_envs": {},
   "extra_body": {},
-  "request_timeout_seconds": 120
+  "request_timeout_seconds": 180
 }
diff --git a/harness/agents/interactive-candidate.json b/harness/agents/interactive-opus.json
similarity index 54%
rename from harness/agents/interactive-candidate.json
rename to harness/agents/interactive-opus.json
index 217809e4..2e2d4a9f 100644
--- a/harness/agents/interactive-candidate.json
+++ b/harness/agents/interactive-opus.json
@@ -1,16 +1,13 @@
 {
   "schema_version": 1,
-  "agent_id": "openai-interactive",
+  "agent_id": "interactive-opus",
   "mode": "interactive",
   "track": "custom",
-  "run_slug": "interactive-candidate",
+  "run_slug": "interactive-opus-4-7",
   "adapter": "openai_compatible",
-  "base_url": null,
-  "base_url_env": "VERITY_BENCHMARK_AGENT_BASE_URL",
-  "model": null,
-  "model_env": "VERITY_BENCHMARK_AGENT_MODEL",
-  "api_key": null,
-  "api_key_env": "VERITY_BENCHMARK_AGENT_API_KEY",
+  "base_url": "https://openrouter.ai/api/v1",
+  "model": "anthropic/claude-opus-4.7",
+  "api_key_env": "OPENROUTER_API_KEY",
   "chat_completions_path": "/chat/completions",
   "models_path": "/models",
   "system_prompt_files": [
@@ -20,9 +17,9 @@
     "harness/PROOF_PATTERNS.md"
   ],
   "temperature": 0.0,
-  "max_completion_tokens": 3000,
-  "max_attempts": 16,
-  "max_tool_calls": 24,
+  "max_completion_tokens": 4096,
+  "max_attempts": 32,
+  "max_tool_calls": 80,
   "headers": {},
   "header_envs": {},
   "extra_body": {},
diff --git a/harness/agents/interactive-smart.json b/harness/agents/interactive-smart.json
index 82d45275..b0095371 100644
--- a/harness/agents/interactive-smart.json
+++ b/harness/agents/interactive-smart.json
@@ -21,8 +21,8 @@
   ],
   "temperature": 0.0,
   "max_completion_tokens": 2000,
-  "max_attempts": 16,
-  "max_tool_calls": 24,
+  "max_attempts": 32,
+  "max_tool_calls": 80,
   "headers": {},
   "header_envs": {},
   "extra_body": {
diff --git a/harness/agents/interactive.json b/harness/agents/interactive.json
deleted file mode 100644
index 8c9bf850..00000000
--- a/harness/agents/interactive.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "schema_version": 1,
-  "agent_id": "openai-interactive",
-  "mode": "interactive",
-  "track": "custom",
-  "run_slug": "interactive-proxy",
-  "adapter": "openai_compatible",
-  "base_url": null,
-  "base_url_env": "VERITY_BENCHMARK_AGENT_BASE_URL",
-  "model": null,
-  "model_env": "VERITY_BENCHMARK_AGENT_MODEL",
-  "api_key": null,
-  "api_key_env": "VERITY_BENCHMARK_AGENT_API_KEY",
-  "chat_completions_path": "/chat/completions",
-  "models_path": "/models",
-  "system_prompt_files": [
-    "harness/PROMPT.md",
-    "harness/POLICY.md",
-    "harness/TOOLS.md",
-    "harness/PROOF_PATTERNS.md"
-  ],
-  "temperature": 0.0,
-  "max_completion_tokens": 2000,
-  "max_attempts": 16,
-  "max_tool_calls": 24,
-  "headers": {},
-  "header_envs": {},
-  "extra_body": {
-    "thinking": {
-      "type": "disabled"
-    }
-  },
-  "request_timeout_seconds": 120
-}
diff --git a/harness/agents/leanstral.json b/harness/agents/leanstral.json
deleted file mode 100644
index a9a10779..00000000
--- a/harness/agents/leanstral.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "schema_version": 1,
-  "agent_id": "leanstral-completion",
-  "mode": "custom",
-  "track": "custom",
-  "run_slug": "leanstral",
-  "adapter": "command",
-  "base_url": "https://spark-de79.gazella-vector.ts.net",
-  "base_url_env": null,
-  "model": "mistralai_Leanstral-128x3.9B-2603-Q4_K_M.gguf",
-  "model_env": null,
-  "api_key": null,
-  "api_key_env": null,
-  "chat_completions_path": "/completion",
-  "models_path": "/models",
-  "system_prompt_files": [
-    "harness/PROMPT.md",
-    "harness/POLICY.md",
-    "harness/TOOLS.md",
-    "harness/PROOF_PATTERNS.md"
-  ],
-  "temperature": 0.0,
-  "max_completion_tokens": 2000,
-  "max_attempts": 8,
-  "max_tool_calls": 24,
-  "headers": {},
-  "header_envs": {},
-  "extra_body": {},
-  "command": [
-    "python3",
-    "harness/leanstral_completion_adapter.py"
-  ],
-  "request_timeout_seconds": 120
-}
diff --git a/harness/agents/openrouter-gemini-3.1-flash-lite-preview.json b/harness/agents/openrouter-gemini-3.1-flash-lite-preview.json
deleted file mode 100644
index 2468ee7c..00000000
--- a/harness/agents/openrouter-gemini-3.1-flash-lite-preview.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "schema_version": 1,
-  "agent_id": "openrouter-gemini-3.1-flash-lite-preview",
-  "track": "custom",
-  "run_slug": "openrouter-gemini-3.1-flash-lite-preview",
-  "adapter": "openai_compatible",
-  "base_url": "https://openrouter.ai/api/v1",
-  "base_url_env": null,
-  "model": "google/gemini-3.1-flash-lite-preview",
-  "model_env": null,
-  "api_key": null,
-  "api_key_env": "OPENROUTER_API_KEY",
-  "chat_completions_path": "/chat/completions",
-  "models_path": "/models",
-  "system_prompt_files": [
-    "harness/PROMPT.md",
-    "harness/POLICY.md",
-    "harness/TOOLS.md",
-    "harness/PROOF_PATTERNS.md"
-  ],
-  "mode": "strict",
-  "temperature": 0.0,
-  "max_completion_tokens": 2000,
-  "max_attempts": 8,
-  "max_tool_calls": 24,
-  "headers": {},
-  "header_envs": {},
-  "extra_body": {
-    "thinking": {
-      "type": "disabled"
-    }
-  },
-  "request_timeout_seconds": 120
-}
diff --git a/harness/default_agent.py b/harness/default_agent.py
index fb237396..4d2b15ee 100644
--- a/harness/default_agent.py
+++ b/harness/default_agent.py
@@ -8,6 +8,7 @@
 import re
 import subprocess
 import sys
+import random
 import time
 from dataclasses import dataclass
 from datetime import datetime, timezone
@@ -16,7 +17,14 @@
 from urllib import error, request
 
 from benchmark_config import load_benchmark_agent_defaults
-from interactive_runtime import TaskProofRuntime, tool_result_json, extract_contract_simp_terms, classify_failure
+from interactive_runtime import (
+    TaskProofRuntime,
+    classify_failure,
+    extract_contract_simp_terms,
+    prebuild_task_modules,
+    tool_result_json,
+    _PREFLIGHT_FAILURE_MODES as _RUNTIME_PREFLIGHT_FAILURE_MODES,
+)
 from task_runner import ROOT, load_task_record, resolve_task_manifest
 
 AGENT_RESULTS_DIR = ROOT / "results" / "agent_runs"
@@ -452,9 +460,49 @@ def resolve_config(path: Path, *, require_secrets: bool, profile: str | None = N
     )
 
 
+def _synthesized_interactive_tools_prompt() -> str:
+    """Render the real interactive tool surface from TaskProofRuntime.tool_specs().
+
+    Replaces the static harness/TOOLS.md which advertises `lake build`, `scripts/run_task.sh`,
+    and `scripts/run_all.sh` — none of which are actually callable in interactive mode.
+    """
+    lines = [
+        "# Interactive Tool Surface",
+        "",
+        "You have exactly these function tools. Call them; do NOT call shell commands:",
+        "",
+    ]
+    # Build a minimal task shim to get tool_specs without instantiating a real task.
+    # Note: tool_specs() uses self.paths.public_files for the read_public_file enum,
+    # so we enumerate generic names here instead of calling tool_specs() directly.
+    surface = [
+        ("read_public_file(path)", "Read one of the task's public Lean files (impl/spec/editable)."),
+        ("write_editable_proof(content)", "Replace the editable proof file AND automatically run the Lean check. Response reports status (passed/failed), failure_mode, details, failure_class, and repair_hints. A separate run_lean_check call is not needed after this."),
+        ("run_lean_check()", "Re-run `lake env lean` without changing the file (redundant immediately after write_editable_proof)."),
+        ("inspect_lean_goals()", "Inspect goal state at explicit `?_` holes. Unsupported if no hole present."),
+        ("try_tactic_at_hole(tactic)", "Replace all `?_` holes with a tactic and check. Pass a raw tactic (e.g. `omega`, `simp_all`, `decide`); substitution auto-wraps as `(by tac)` at term positions like `exact ?_`. Preserves original proof on failure."),
+        ("search_public_defs(query)", "Search the task's public impl/spec files for def/theorem/lemma names. Does NOT search Lean core / Batteries / Mathlib — use `exact?`/`apply?`/`rw?` via `try_tactic_at_hole` for standard-library lemmas."),
+    ]
+    for name, desc in surface:
+        lines.append(f"- `{name}` — {desc}")
+    lines.extend([
+        "",
+        "Typical loop: write_editable_proof (which runs Lean) → read repair_hints → iterate.",
+        "`?_` is a PROBE for `inspect_lean_goals` / `try_tactic_at_hole`, never a final proof — Lean rejects every submission containing `?_`.",
+        "Do NOT emit `lake build` or `scripts/...`; there is no shell tool.",
+    ])
+    return "\n".join(lines)
+
+
 def build_system_prompt(config: ResolvedAgentConfig) -> str:
     sections = []
     for rel_path in config.system_prompt_files:
+        # In interactive mode, replace the static TOOLS.md (which advertises shell
+        # commands that don't exist) with a synthesized description of the real
+        # function-tool surface.
+        if config.mode == "interactive" and rel_path.endswith("TOOLS.md"):
+            sections.append(f"[{rel_path}]\n{_synthesized_interactive_tools_prompt()}")
+            continue
         path = ROOT / rel_path
         sections.append(f"[{rel_path}]\n{path.read_text(encoding='utf-8').strip()}")
     return "\n\n".join(sections).strip()
@@ -559,9 +607,9 @@ def build_user_prompt(task: dict[str, Any], *, interactive: bool) -> str:
         "You are in interactive mode with verification tools.\n"
         "All implementation, specification, and editable proof files are already provided below. "
         "Do NOT re-read them with read_public_file — start working immediately.\n"
-        "Workflow: call write_editable_proof with your complete proof file, then call run_lean_check to verify.\n"
+        "Workflow: call write_editable_proof with your complete proof file — it returns the Lean check result directly, you do NOT need a separate run_lean_check call afterward.\n"
         "If the check fails, read the failure_class and repair_hints in the result.\n"
-        "For unknown_identifier errors: use search_public_defs to find correct names.\n"
+        "For unknown_identifier errors: read the repair_hints before searching — the missing name may be a tactic in term position (wrap in `by`), a local binder (call inspect_lean_goals instead), or a Mathlib lemma (this workspace has NO Mathlib; use `omega`/`ring`/`simp arith`). Only call search_public_defs for a genuine project-defined name from the implementation or spec file.\n"
         "For unsolved_goals: use inspect_lean_goals with a ?_ hole to see the exact goal, then write targeted tactics.\n"
         "Fix the specific error, write the corrected proof, and re-check. Do not rewrite from scratch unless the approach is fundamentally wrong.\n"
         "Only use read_public_file or search_public_defs if you need a definition not shown below.\n"
@@ -860,7 +908,13 @@ def build_attempt_trace(
         "candidate_sha256": stable_digest(candidate_text),
         "status": status,
         "failure_mode": failure_mode,
-        "candidate_changed_from_previous": None if previous_attempt is None else candidate_text != previous_candidate,
+        # Treat the first non-empty candidate as a change (previously was None, which
+        # broke candidate_change_count analytics — every successful run showed 0).
+        "candidate_changed_from_previous": (
+            bool(candidate_text.strip())
+            if previous_attempt is None
+            else candidate_text != previous_candidate
+        ),
         "failure_mode_changed_from_previous": (
             None if previous_attempt is None else failure_mode != previous_trace.get("failure_mode")
         ),
@@ -942,21 +996,42 @@ def build_run_analysis(
     reasoning_attempts = 0
     candidate_change_count = 0
     failure_mode_change_count = 0
+    distinct_candidate_hashes: set[str] = set()
+    previous_candidate = ""
     for attempt in attempts:
-        trace = attempt.get("trace", {})
-        if not isinstance(trace, dict):
-            continue
-        if int(trace.get("provider_reasoning_chars") or 0) > 0:
-            reasoning_attempts += 1
-        if trace.get("candidate_changed_from_previous") is True:
-            candidate_change_count += 1
-        if trace.get("failure_mode_changed_from_previous") is True:
-            failure_mode_change_count += 1
+        trace = attempt.get("trace", {}) or {}
+        if isinstance(trace, dict):
+            if int(trace.get("provider_reasoning_chars") or 0) > 0:
+                reasoning_attempts += 1
+            if trace.get("candidate_changed_from_previous") is True:
+                candidate_change_count += 1
+            if trace.get("failure_mode_changed_from_previous") is True:
+                failure_mode_change_count += 1
+            candidate_hash = trace.get("candidate_sha256")
+            if isinstance(candidate_hash, str) and candidate_hash and int(trace.get("candidate_chars") or 0) > 0:
+                distinct_candidate_hashes.add(candidate_hash)
+        # Fallback for interactive-mode attempts that do not populate `trace`:
+        # derive candidate changes/hashes directly from candidate_file_contents.
+        # Count every transition (incl. reverts like A -> B -> A), and record
+        # each distinct hash separately. Skip this block entirely when `trace`
+        # is already populated, so non-interactive traces are not redundantly
+        # re-hashed (which would be harmless while digests match but fragile
+        # if the two derivation paths ever diverge).
+        trace_has_hash = isinstance(trace, dict) and bool(trace.get("candidate_sha256"))
+        if not trace_has_hash:
+            candidate_text = str(attempt.get("candidate_file_contents", ""))
+            if candidate_text.strip():
+                candidate_hash = stable_digest(candidate_text)
+                distinct_candidate_hashes.add(candidate_hash)
+                if candidate_text != previous_candidate:
+                    candidate_change_count += 1
+                previous_candidate = candidate_text
     return {
         "attempt_count": len(attempts),
         "tool_calls_used": tool_calls_used,
         "reasoning_attempt_count": reasoning_attempts,
         "candidate_change_count": candidate_change_count,
+        "distinct_candidate_count": len(distinct_candidate_hashes),
         "failure_mode_change_count": failure_mode_change_count,
         "final_failure_mode": evaluation.get("failure_mode"),
         "final_status": evaluation.get("status"),
@@ -984,47 +1059,208 @@ def build_finalization_messages(
     ]
 
 
+RETRY_STATUS_CODES = frozenset({408, 409, 425, 429, 500, 502, 503, 504})
+MAX_CHAT_COMPLETION_RETRIES = 6
+
+
+def _parse_retry_after(value: str | None) -> float | None:
+    """Parse an HTTP `Retry-After` header.
+
+    Accepts both forms permitted by RFC 7231:
+    - delta-seconds (e.g. "120")
+    - HTTP-date (e.g. "Wed, 21 Oct 2015 07:28:00 GMT")
+
+    Returns the number of seconds to wait, or None if the value cannot be
+    parsed. A date in the past is clamped to 0.
+    """
+    if not value:
+        return None
+    value = value.strip()
+    if not value:
+        return None
+    try:
+        return max(0.0, float(value))
+    except ValueError:
+        pass
+    try:
+        from email.utils import parsedate_to_datetime
+        import datetime as _dt
+
+        parsed = parsedate_to_datetime(value)
+        if parsed is None:
+            return None
+        if parsed.tzinfo is None:
+            parsed = parsed.replace(tzinfo=_dt.timezone.utc)
+        delta = (parsed - _dt.datetime.now(_dt.timezone.utc)).total_seconds()
+        return max(0.0, delta)
+    except (TypeError, ValueError):
+        return None
+
+
+def _backoff_delay(attempt: int, retry_after: float | None) -> float:
+    if retry_after is not None:
+        # Honour the provider-requested wait. Clamp only at a safety ceiling
+        # (10 minutes) so a pathological header cannot stall the run
+        # indefinitely; the previous 60s clamp was too aggressive and caused
+        # retries to fire while the rate limit was still in force. Add a
+        # small additive jitter (up to 1s) so concurrent workers hitting the
+        # same Retry-After do not thunder back in lockstep.
+        clamped = min(retry_after, 600.0)
+        return clamped + random.random()
+    # Exponential backoff with jitter, capped at 30s.
+    base = min(30.0, 2.0 ** attempt)
+    return base * (0.5 + random.random() * 0.5)
+
+
+def _post_chat_completion(
+    config: ResolvedAgentConfig,
+    payload: dict[str, Any],
+    model: str,
+) -> dict[str, Any]:
+    """POST one chat completion request with retries on transient failures.
+
+    Retries on HTTP 408/409/425/429/500/502/503/504 and URL-level errors (timeouts)
+    using exponential backoff with jitter, respecting Retry-After when present.
+    """
+    url = f"{config.base_url}{config.chat_completions_path}"
+    body_payload = dict(payload)
+    body_payload["model"] = model
+    req_body = json.dumps(body_payload).encode("utf-8")
+    headers = {
+        "Authorization": f"Bearer {config.api_key}",
+        "Content-Type": "application/json",
+        "User-Agent": "verity-benchmark/0.1",
+        **config.headers,
+    }
+    last_error: str | None = None
+    for attempt in range(MAX_CHAT_COMPLETION_RETRIES):
+        req = request.Request(url, data=req_body, headers=headers, method="POST")
+        try:
+            with request.urlopen(req, timeout=config.request_timeout_seconds) as response:
+                body = response.read().decode("utf-8")
+            try:
+                return json.loads(body)
+            except json.JSONDecodeError as exc:
+                # Non-JSON 200 responses (HTML error pages from a CDN or load
+                # balancer mid-deploy are common) must be treated as transient
+                # failures so the retry loop and fallback-model chain can take
+                # over, not as SystemExit which aborts the whole task.
+                last_error = f"non-JSON response: {body[:200]!r}"
+                if attempt == MAX_CHAT_COMPLETION_RETRIES - 1:
+                    raise _ChatCompletionError(status=0, detail=last_error, model=model) from exc
+                time.sleep(_backoff_delay(attempt, None))
+                continue
+        except error.HTTPError as exc:
+            detail = exc.read().decode("utf-8", errors="replace")
+            last_error = f"HTTP {exc.code}: {detail[:400]}"
+            if exc.code not in RETRY_STATUS_CODES or attempt == MAX_CHAT_COMPLETION_RETRIES - 1:
+                raise _ChatCompletionError(status=exc.code, detail=detail, model=model) from exc
+            retry_after = _parse_retry_after(exc.headers.get("Retry-After") if exc.headers else None)
+            time.sleep(_backoff_delay(attempt, retry_after))
+            continue
+        except error.URLError as exc:
+            last_error = f"URL error: {exc}"
+            if attempt == MAX_CHAT_COMPLETION_RETRIES - 1:
+                raise _ChatCompletionError(status=0, detail=str(exc), model=model) from exc
+            time.sleep(_backoff_delay(attempt, None))
+            continue
+        except TimeoutError as exc:
+            # Python 3.10+: socket.timeout during SSL read surfaces as
+            # TimeoutError rather than urllib.error.URLError. Treat it as
+            # a transient network failure and retry with backoff.
+            last_error = f"Read timeout: {exc}"
+            if attempt == MAX_CHAT_COMPLETION_RETRIES - 1:
+                raise _ChatCompletionError(status=0, detail=str(exc), model=model) from exc
+            time.sleep(_backoff_delay(attempt, None))
+            continue
+    raise _ChatCompletionError(status=0, detail=last_error or "unknown", model=model)
+
+
+class _ChatCompletionError(Exception):
+    def __init__(self, *, status: int, detail: str, model: str) -> None:
+        super().__init__(f"chat completion failed with status {status}: {detail[:400]}")
+        self.status = status
+        self.detail = detail
+        self.model = model
+
+
 def send_chat_completion(
     config: ResolvedAgentConfig,
     messages: list[dict[str, Any]],
     *,
     tools: list[dict[str, Any]] | None = None,
     max_tokens_override: int | None = None,
+    temperature_override: float | None = None,
 ) -> dict[str, Any]:
-    url = f"{config.base_url}{config.chat_completions_path}"
-    payload = {
-        "model": config.model,
-        "messages": messages,
-        "temperature": config.temperature,
-        "max_tokens": max_tokens_override or config.max_completion_tokens,
-    }
+    payload: dict[str, Any] = {"messages": messages}
     if tools:
         payload["tools"] = tools
         payload["tool_choice"] = "auto"
+    # Apply extra_body first so computed overrides below win over any
+    # temperature/max_tokens keys the user may have stashed in extra_body.
     payload.update(config.extra_body)
-    req = request.Request(
-        url,
-        data=json.dumps(payload).encode("utf-8"),
-        headers={
-            "Authorization": f"Bearer {config.api_key}",
-            "Content-Type": "application/json",
-            "User-Agent": "verity-benchmark/0.1",
-            **config.headers,
-        },
-        method="POST",
+    payload["temperature"] = (
+        config.temperature if temperature_override is None else temperature_override
+    )
+    payload["max_tokens"] = max_tokens_override or config.max_completion_tokens
+    # Allow configuring a fallback chain via extra_body.fallback_models (list of model ids).
+    # This lets a rate-limited primary (e.g. "opus") degrade gracefully instead of failing the run.
+    # Normalize fallback_models: accept a list of strings (standard) or a
+    # single string (common operator shorthand). A bare string must not be
+    # iterated character-by-character, which would produce single-letter
+    # "models" like "g", "p", "t".
+    raw_fallback = config.extra_body.get("fallback_models") or []
+    if isinstance(raw_fallback, str):
+        raw_fallback = [raw_fallback]
+    elif not isinstance(raw_fallback, (list, tuple)):
+        # extra_body is schema-free operator input; a truthy non-iterable
+        # (bool, int, dict, ...) must not blow up the iteration below.
+        raw_fallback = []
+    # Trim each entry: the guard below already gates on `item.strip()`
+    # truthiness, but store the stripped form so leading/trailing whitespace
+    # in a config like `" gpt-4o-mini"` does not survive into the outbound
+    # request body (providers reject model ids they do not recognize, so an
+    # otherwise-valid fallback would fail with a 404 model-not-found).
+    fallback_models = [
+        item.strip()
+        for item in raw_fallback
+        if isinstance(item, str) and item.strip()
+    ]
+    payload.pop("fallback_models", None)
+    # Benchmark-only knob consumed in execute_interactive_agent_task; strip
+    # it so providers don't reject the request with an unknown-field error.
+    payload.pop("length_retry_token_cap", None)
+    models_to_try: list[str] = [config.model, *fallback_models]
+    last_exc: _ChatCompletionError | None = None
+    # Status codes that are fatal for the whole chain — every model would
+    # get the same error, so no point in continuing to try fallbacks.
+    # 401 (bad/expired API key) and 403 (forbidden) are auth-level and
+    # apply account-wide; retrying a different model would just produce
+    # the same error. Every other non-transient 4xx is model-specific
+    # (404 model-not-found, 400 model-rejected-payload, 422 bad params
+    # for a model, 429 model-specific quota is in RETRY_STATUS_CODES
+    # already) and should fall through to the next fallback model.
+    _FATAL_AUTH_STATUSES = {401, 403}
+    for model in models_to_try:
+        try:
+            return _post_chat_completion(config, payload, model)
+        except _ChatCompletionError as exc:
+            last_exc = exc
+            # Fall back on the same transient statuses `_post_chat_completion`
+            # retries internally (plus status 0 for network/read errors), so a
+            # primary that keeps returning 408/409/425/429/5xx gets routed to
+            # the configured fallback chain instead of hard-failing. For a
+            # non-transient, non-auth error (e.g. 404 model-not-found on a
+            # typo'd fallback entry) keep trying later models — one bad
+            # fallback should not prevent subsequent configured backups.
+            if exc.status in _FATAL_AUTH_STATUSES:
+                break
+            continue
+    if last_exc is None:
+        raise SystemExit("chat completion request failed with no attempts")
+    raise SystemExit(
+        f"chat completion request failed with HTTP {last_exc.status} (model={last_exc.model}): {last_exc.detail[:400]}"
     )
-    try:
-        with request.urlopen(req, timeout=config.request_timeout_seconds) as response:
-            body = response.read().decode("utf-8")
-    except error.HTTPError as exc:
-        detail = exc.read().decode("utf-8", errors="replace")
-        raise SystemExit(f"chat completion request failed with HTTP {exc.code}: {detail}") from exc
-    except error.URLError as exc:
-        raise SystemExit(f"chat completion request failed: {exc}") from exc
-    try:
-        return json.loads(body)
-    except json.JSONDecodeError as exc:
-        raise SystemExit(f"chat completion request returned non-JSON response: {body[:400]!r}") from exc
 
 
 def list_models(config: ResolvedAgentConfig) -> dict[str, Any]:
@@ -1576,6 +1812,81 @@ def execute_strict_agent_task(
     return response, response_text, evaluation, attempts
 
 
+# Set of failure_modes produced by write_editable_proof's preflight checks
+# (before Lean ever runs). These are deterministic formatting/import/semantic
+# rejects whose human-readable `details` classify as `other`, collapsing
+# distinct failure modes into the same temperature-history bucket. Surface
+# each preflight mode as its own history class so the repeated-class bump
+# can fire correctly (and only) when the *same* preflight keeps recurring.
+# Authoritative preflight failure-mode set lives in
+# harness/interactive_runtime.py::_PREFLIGHT_FAILURE_MODES and is re-exported
+# here so `_failure_history_class` can't drift out of sync with the runtime
+# that actually produces these modes. An earlier duplicate definition lost
+# `empty_response` during a refactor; importing removes that whole class of
+# bug entirely.
+_PREFLIGHT_FAILURE_MODES = _RUNTIME_PREFLIGHT_FAILURE_MODES
+
+# Canonical evaluation-contract keys, matching the top-level `evaluation`
+# object in schemas/agent-run.schema.json (additionalProperties=false over
+# {status, failure_mode, details, command, candidate_workspace}). Whenever
+# the runtime returns a dict that will ultimately become a top-level or
+# per-attempt `evaluation` record, filter it through these keys first so
+# write-time metadata (path, bytes, lines, warnings, write_status,
+# repair_hints) and tool-specific extras (e.g. try_tactic_at_hole's
+# `tactic`) don't leak through and break JSON schema validation.
+_EVAL_KEYS = ("status", "failure_mode", "details", "command", "candidate_workspace")
+
+
+def _failure_history_class(result: dict) -> str:
+    """Return the failure-class label to append to temperature history.
+
+    Empty string means "do not append" (no failure, or infra noise we filter).
+    Preflight failure_modes are surfaced with a `pf:` prefix so e.g.
+    `pf:placeholder_detected` does not collide with Lean-check classes like
+    `type_error`, while still allowing the repeated-class same-value
+    comparison to trigger when the same preflight recurs.
+    """
+    if not isinstance(result, dict) or result.get("status") != "failed":
+        return ""
+    failure_mode = result.get("failure_mode") or ""
+    if failure_mode in _PREFLIGHT_FAILURE_MODES:
+        return f"pf:{failure_mode}"
+    # Lean-check failure (or any unclassified failure): derive from details.
+    fc = result.get("failure_class") or classify_failure(str(result.get("details", "")))
+    fc = str(fc)
+    # Environment errors are infra noise that would break the sliding-window
+    # same-class check (["type_error","environment_error","type_error"] looks
+    # like a class change). Filter out.
+    if fc == "environment_error":
+        return ""
+    return fc
+
+
+def _append_failure_class(
+    history: list,
+    fc_entry: str,
+    candidate_text: str,
+    last_key: list,
+) -> None:
+    """Append `fc_entry` to `history` unless it's empty or a same-candidate duplicate.
+
+    Dedupe guards against double-counting when a single turn fires both
+    `write_editable_proof` (which now runs the Lean check internally) and a
+    follow-up `run_lean_check` against the same failed candidate — that
+    would push two identical entries for one actual failure and prematurely
+    trigger the same-class temperature bump.
+    """
+    if not fc_entry:
+        return
+    candidate_hash = hashlib.sha1(candidate_text.encode("utf-8", "replace")).hexdigest()[:16]
+    key = (candidate_hash, fc_entry)
+    if last_key and last_key[0] == key:
+        return
+    history.append(fc_entry)
+    last_key[0] = key
+
+
+
 def execute_interactive_agent_task(
     config: ResolvedAgentConfig,
     task: dict[str, Any],
@@ -1593,13 +1904,71 @@ def execute_interactive_agent_task(
     consecutive_length_stops = 0
     max_total_turns = config.max_attempts * 2  # hard cap to prevent infinite loops
     token_budget = config.max_completion_tokens
+    # Ceiling for the length-retry silent bump. Read from config.extra_body so
+    # operators can opt into larger bumps for providers that accept them, but
+    # default to `max_completion_tokens` so models with a hard cap at that value
+    # don't get HTTP 400 when the bump kicks in. Stripped from the request
+    # payload in `send_chat_completion` so it never leaks to the provider.
+    _cap_raw = config.extra_body.get("length_retry_token_cap", config.max_completion_tokens)
+    try:
+        length_retry_token_cap = int(_cap_raw)
+    except (TypeError, ValueError):
+        # Invalid operator-edited value (e.g. null, "12k", nested object).
+        # Fall back silently rather than aborting the run.
+        length_retry_token_cap = config.max_completion_tokens
+    if length_retry_token_cap < config.max_completion_tokens:
+        length_retry_token_cap = config.max_completion_tokens
+    # Temperature schedule: escalate after repeated same-class failures to break out
+    # of deterministic loops where temperature=0 reproduces byte-identical responses.
+    current_temperature = config.temperature
+    failure_class_history: list[str] = []
+    # Dedupe key for `failure_class_history` appends: (candidate_hash, class).
+    # When a model does write_editable_proof then run_lean_check in the same
+    # turn against the same (failed) candidate, both tool calls produce the
+    # same class entry for the same candidate. Without dedupe the history
+    # gets two entries for one actual failure, and the repeated-class
+    # temperature bump fires a turn too early.
+    # Scope: reset at the top of each model turn (see loop below) so
+    # cross-turn repeats on an unchanged candidate still register as genuine
+    # failures for the repeated-class temperature escalation.
+    _last_history_key: list = [None]  # mutable cell so helper can update
+    # Track how many failures we have already applied the temperature-bump
+    # schedule to, so we don't keep escalating temperature on every iteration
+    # once the trigger condition is first met (it would otherwise run to the
+    # cap within a few turns regardless of intervening search/write activity).
+    temperature_schedule_applied_at = 0
 
     turn = 0
     while proof_attempts < config.max_attempts and turn < max_total_turns:
         turn += 1
+        # Scope the failure-class dedupe to a single turn. The dedupe exists to
+        # coalesce same-candidate same-class duplicates emitted within one
+        # model turn (e.g. `write_editable_proof` + follow-up `run_lean_check`
+        # on the same candidate); it must not silence genuine cross-turn
+        # repeats where the candidate stays unchanged but the model tries
+        # again. Resetting here bounds the dedupe window to the current turn.
+        _last_history_key[0] = None
+        # Adjust temperature once per new failure entry when the last two
+        # proof attempts failed with the same class.
+        if (
+            len(failure_class_history) > temperature_schedule_applied_at
+            and len(failure_class_history) >= 2
+            and failure_class_history[-1] == failure_class_history[-2]
+            and failure_class_history[-1] not in ("", "environment_error")
+        ):
+            # Escalate toward 0.7 to break deterministic loops, but never
+            # DECREASE below the configured base temperature. A run with
+            # `config.temperature = 1.0` should stay at 1.0 (or higher)
+            # rather than dropping to 0.7 on the first stagnation trigger —
+            # the cap exists only to stop unbounded growth, not to override
+            # an operator who explicitly asked for a hotter sampler.
+            escalated = max(current_temperature + 0.2, 0.2)
+            current_temperature = max(min(0.7, escalated), config.temperature)
+        temperature_schedule_applied_at = len(failure_class_history)
         response = send_chat_completion(
             config, transcript, tools=runtime.tool_specs(),
             max_tokens_override=token_budget if token_budget != config.max_completion_tokens else None,
+            temperature_override=current_temperature if current_temperature != config.temperature else None,
         )
         response_text = extract_text(response)
         tool_calls = extract_tool_calls(response)
@@ -1613,9 +1982,12 @@ def execute_interactive_agent_task(
             finish_reason = choices[0].get("finish_reason", "")
         if finish_reason == "length" and not tool_calls and not response_text.strip():
             consecutive_length_stops += 1
-            if consecutive_length_stops == 1:
-                # First length stop: bump token budget once and retry silently
-                token_budget = min(int(token_budget * 1.5), 4500)
+            # Up to 3 silent budget bumps before nudging the model to simplify.
+            # Cap bump at `config.max_completion_tokens` so we never exceed the
+            # provider-enforced per-response limit (some models hard-cap at the
+            # configured value and would return HTTP 400 on anything larger).
+            if consecutive_length_stops <= 3:
+                token_budget = min(int(token_budget * 1.5), length_retry_token_cap)
                 continue
             # Subsequent length stops: inject a nudge to simplify and use tools
             transcript.append({"role": "assistant", "content": ""})
@@ -1623,16 +1995,19 @@ def execute_interactive_agent_task(
                 "role": "user",
                 "content": (
                     "Your response was cut off. Do not over-think. "
-                    "Immediately call write_editable_proof with a simple proof attempt, "
-                    "then call run_lean_check. Keep the proof short."
+                    "Immediately call write_editable_proof with a simple proof attempt "
+                    "(it runs the Lean check automatically). Keep the proof short."
                 ),
             })
-            if consecutive_length_stops >= 3:
-                # Reset budget back to configured value after persistent overruns
-                token_budget = config.max_completion_tokens
+            # Reset budget back to configured value after persistent overruns
+            token_budget = config.max_completion_tokens
             continue
         else:
+            # Recovered from any length streak -- reset both the counter and
+            # the (possibly-elevated) token budget so we don't leak state into
+            # subsequent turns.
             consecutive_length_stops = 0
+            token_budget = config.max_completion_tokens
 
         attempts.append(
             {
@@ -1651,11 +2026,63 @@ def execute_interactive_agent_task(
             # Only overwrite the stored proof if the response looks like Lean code,
             # not natural-language explanation.
             if final_candidate.strip() and _looks_like_lean(final_candidate):
-                runtime.write_editable_proof(final_candidate)
+                # `write_editable_proof` already runs the Lean check
+                # internally (check=True default) and returns the merged
+                # write-metadata + run_lean_check result. Reuse that dict
+                # instead of calling `evaluate_current()` again — the
+                # previous double-invocation cost a second `lake env lean`
+                # per no-tool-calls attempt and pushed a spurious entry
+                # onto `_check_history`, which could trigger premature
+                # stagnation/temperature escalation.
+                # NOTE: local name is `write_payload` (not `write_result`)
+                # because `write_result` is a module-level function at
+                # line ~1530 (`write_result(task_ref, config, payload)`),
+                # and shadowing it with a local would silently break any
+                # future code in this function that tried to call the
+                # file-writer. The on-trace attempts record still exposes
+                # this payload under the `"write_result"` key for
+                # backward-compatible tooling.
+                write_payload = runtime.write_editable_proof(final_candidate)
                 proof_attempts += 1
-                evaluation = runtime.evaluate_current()
+                # `write_editable_proof` returns the full write payload
+                # merged with `run_lean_check` output (path, bytes, lines,
+                # warnings, write_status, repair_hints). These are not part
+                # of the top-level `evaluation` schema (which is strict:
+                # additionalProperties=false over {status, failure_mode,
+                # details, command, candidate_workspace}). Returning the
+                # raw dict upward — as was done before — made `build_result`
+                # forward it to `validate_result_payload` and fail schema
+                # validation with a SystemExit, aborting the entire run
+                # every time the model produced Lean text without tool
+                # calls (including successful proofs). Normalize here so
+                # both the nested `attempts[-1]["evaluation"]` record and
+                # the outward return have the contract shape, while
+                # preserving the rich write-time payload under a separate
+                # per-attempt key for debugging/analytics.
+                evaluation = {
+                    k: write_payload[k]
+                    for k in _EVAL_KEYS
+                    if k in write_payload
+                }
+                evaluation.setdefault("failure_mode", None)
+                evaluation.setdefault("details", "")
                 attempts[-1]["candidate_file_contents"] = runtime.current_proof_text
                 attempts[-1]["evaluation"] = evaluation
+                attempts[-1]["write_result"] = write_payload
+                # Track model-driven failure classes for the temperature
+                # schedule's sliding window. `_failure_history_class` maps
+                # preflight modes (placeholder_detected, hidden_*_import,
+                # theorem_statement_mismatch) to distinct `pf:<mode>` labels
+                # so they don't all collapse into `other`, and filters out
+                # infra-noise environment errors that would break
+                # same-class detection.
+                fc_entry = _failure_history_class(write_payload)
+                _append_failure_class(
+                    failure_class_history,
+                    fc_entry,
+                    runtime.current_proof_text,
+                    _last_history_key,
+                )
                 if evaluation["status"] == "passed":
                     return response, response_text, runtime.current_proof_text, evaluation, attempts, tool_calls_used
                 # Failed candidate without tool calls: feed error back
@@ -1669,16 +2096,42 @@ def execute_interactive_agent_task(
                     )
                     if guidance:
                         repair_msg += f"\nRepair guidance:\n{guidance}\n"
-                    repair_msg += "\nUse write_editable_proof to write a corrected proof, then run_lean_check to verify."
+                    repair_msg += "\nUse write_editable_proof to write a corrected proof (it runs the Lean check automatically; no separate run_lean_check needed)."
                     transcript.append({"role": "assistant", "content": response_text or ""})
                     transcript.append({"role": "user", "content": repair_msg})
-                elif failure_mode in ("placeholder_detected", "theorem_statement_mismatch"):
+                elif failure_mode in (
+                    "placeholder_detected",
+                    "theorem_statement_mismatch",
+                    "hidden_proof_import_detected",
+                    "hidden_case_import_detected",
+                ):
+                    # Preflight rejections (placeholder_detected,
+                    # theorem_statement_mismatch, hidden_*_import_detected) are
+                    # all recoverable by the model: the candidate file made it
+                    # through the write path but was rejected before Lean saw
+                    # it. Surface the rejection and give the model another
+                    # turn to produce a clean candidate, instead of bailing
+                    # out on the first hidden-import mistake.
+                    extra_hint = ""
+                    if failure_mode == "hidden_proof_import_detected":
+                        extra_hint = (
+                            "\nRemove any `import`, `open`, or `export` of a "
+                            "`Benchmark.Cases.*.Proofs` module — those hold "
+                            "held-out ground truth and are not available to "
+                            "the model."
+                        )
+                    elif failure_mode == "hidden_case_import_detected":
+                        extra_hint = (
+                            "\nOnly the public specification / implementation "
+                            "modules for this task may be imported. Drop any "
+                            "other `Benchmark.Cases.*` imports."
+                        )
                     retry_msg = (
                         f"Your response did not produce a valid proof candidate (proof attempt {proof_attempts} of {config.max_attempts}, "
                         f"failure: {failure_mode}).\n"
-                        "Use the write_editable_proof tool to submit the complete editable Lean proof file, "
-                        "then use run_lean_check to verify it.\n"
-                        "Do not explain or analyze. Use the tools directly.\n"
+                        "Use the write_editable_proof tool to submit the complete editable Lean proof file "
+                        "(it runs the Lean check automatically; no separate run_lean_check needed).\n"
+                        "Do not explain or analyze. Use the tools directly." + extra_hint + "\n"
                     )
                     transcript.append({"role": "assistant", "content": response_text})
                     transcript.append({"role": "user", "content": retry_msg})
@@ -1687,8 +2140,8 @@ def execute_interactive_agent_task(
             else:
                 # Empty response or no valid candidate: nudge model to use tools
                 nudge_msg = (
-                    "You must use the write_editable_proof tool to submit your proof, "
-                    "then call run_lean_check to verify it. Do not respond with text only.\n"
+                    "You must use the write_editable_proof tool to submit your proof "
+                    "(it runs the Lean check automatically). Do not respond with text only.\n"
                 )
                 transcript.append({"role": "assistant", "content": response_text or ""})
                 transcript.append({"role": "user", "content": nudge_msg})
@@ -1701,7 +2154,6 @@ def execute_interactive_agent_task(
                 "tool_calls": tool_calls,
             }
         )
-        saw_lean_failure = False
         turn_had_proof_action = False
         for tool_call in tool_calls:
             if tool_calls_used >= config.max_tool_calls:
@@ -1738,12 +2190,50 @@ def execute_interactive_agent_task(
                     "result": result,
                 }
             )
-            if tool_name == "run_lean_check" and result.get("failure_mode") == "lean_check_failed":
-                saw_lean_failure = True
-            elif tool_name in ("run_lean_check", "try_tactic_at_hole") and result.get("status") == "passed":
-                # Normalize to evaluation schema (try_tactic_at_hole returns tactic/details without failure_mode)
-                evaluation = dict(result)
+            if tool_name in ("run_lean_check", "write_editable_proof") and result.get("status") == "failed":
+                # Track any write/check failure (Lean-check *and* preflight
+                # failures like placeholder_detected /
+                # hidden_case_import_detected). Previously only
+                # `failure_mode == "lean_check_failed"` was recorded, so a run
+                # stuck on repeated preflight failures never tripped the
+                # same-class temperature bump and stayed at deterministic
+                # temperature until attempt exhaustion.
+                fc_entry = _failure_history_class(result)
+                _append_failure_class(
+                    failure_class_history,
+                    fc_entry,
+                    runtime.current_proof_text,
+                    _last_history_key,
+                )
+                # Persist candidate state even for failed proof-tool turns so
+                # `build_run_analysis` can hash intermediate drafts for the
+                # candidate_change_count / distinct_candidate_count analytics.
+                # Without this, only the last (passed or budget-exhausted)
+                # turn's candidate gets recorded and repeated unsuccessful
+                # edits look like zero churn.
+                attempts[-1]["candidate_file_contents"] = runtime.current_proof_text
+                # Normalize to the evaluation schema (same _EVAL_KEYS filter as
+                # the passed path below) so the nested per-attempt evaluation
+                # records have a consistent shape across passed / failed /
+                # budget-exhausted branches. The raw tool result carries
+                # write-time metadata (path, bytes, lines, warnings,
+                # repair_hints) that isn't part of the evaluation contract.
+                _failed_eval = {
+                    k: result[k]
+                    for k in _EVAL_KEYS
+                    if k in result
+                }
+                _failed_eval.setdefault("failure_mode", None)
+                _failed_eval.setdefault("details", "")
+                attempts[-1]["evaluation"] = _failed_eval
+            elif tool_name in ("run_lean_check", "try_tactic_at_hole", "write_editable_proof") and result.get("status") == "passed":
+                # Normalize to evaluation schema. `try_tactic_at_hole` returns
+                # extra keys like `tactic` that must be stripped, otherwise the
+                # final result fails schema validation (additionalProperties:
+                # false) and the whole task aborts with no result file.
+                evaluation = {k: result[k] for k in _EVAL_KEYS if k in result}
                 evaluation.setdefault("failure_mode", None)
+                evaluation.setdefault("details", "")
                 attempts[-1]["candidate_file_contents"] = runtime.current_proof_text
                 attempts[-1]["evaluation"] = evaluation
                 return response, response_text, runtime.current_proof_text, evaluation, attempts, tool_calls_used
@@ -1760,7 +2250,7 @@ def execute_interactive_agent_task(
                         "content": (
                             "Stop searching and write a proof now. The search_public_defs tool only searches "
                             "this task's implementation and specification files, not the Lean standard library. "
-                            "Use write_editable_proof to submit your best proof attempt, then run_lean_check to verify."
+                            "Use write_editable_proof to submit your best proof attempt (it runs the Lean check automatically)."
                         ),
                     }
                 )
@@ -1808,7 +2298,12 @@ def execute_agent_task(
         return 0, result_path
 
     start = time.perf_counter()
+    # Pre-build implementation/specification modules so `lake env lean` inside
+    # TaskProofRuntime.evaluate_candidate does not race against on-the-fly
+    # compilation with fast agent retries.
+    prebuild_reports: list[dict[str, Any]] = []
     if config.mode == "interactive":
+        prebuild_reports = prebuild_task_modules(task)
         response, response_text, candidate_text, evaluation, attempts, tool_calls_used = execute_interactive_agent_task(
             config,
             task,
@@ -1850,6 +2345,8 @@ def execute_agent_task(
     result["attempts"] = attempts
     result["tool_calls_used"] = tool_calls_used
     result["analysis"] = build_run_analysis(attempts=attempts, evaluation=evaluation, tool_calls_used=tool_calls_used)
+    if prebuild_reports:
+        result["prebuild_reports"] = prebuild_reports
     validate_result_payload(result, task_ref)
     result_path = write_result(task_ref, config, result)
     return (0 if evaluation["status"] == "passed" else 1), result_path
diff --git a/harness/interactive_runtime.py b/harness/interactive_runtime.py
index 23420b59..fd3e99d5 100644
--- a/harness/interactive_runtime.py
+++ b/harness/interactive_runtime.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import copy
 import json
 import os
 import re
@@ -12,11 +13,68 @@
 
 
 PLACEHOLDER_PATTERN = re.compile(r"\b(sorry|admit|axiom)\b")
-HOLE_PATTERN = re.compile(r"\?(?:_|\w+)")
+# Match standalone `?_` holes only (not `?x` metavariables used in valid tactics).
+HOLE_PATTERN = re.compile(r"(?<!\w)\?_(?!\w)")
+# Detection-only pattern covering both unnamed (`?_`) and named (`?ident`)
+# holes. Used by `inspect_goals` so the model can introspect goals at a
+# named hole too. NOT used by `try_tactic_at_hole` or `_substitute_holes`
+# — blanket substitution of a named hole `?h` can collide with real
+# identifiers, so substitution stays strictly `?_`-scoped.
+ANY_HOLE_PATTERN = re.compile(r"(?<!\w)\?(?:_|[A-Za-z][A-Za-z0-9_']*)(?!\w)")
 DEF_PATTERN = re.compile(r"^\s*(?:def|theorem|lemma|abbrev|opaque)\s+([A-Za-z0-9_'.]+)")
-HIDDEN_PROOF_IMPORT_PATTERN = re.compile(r"^\s*import\s+Benchmark\.Cases\..*\.Proofs\b", re.MULTILINE)
+HIDDEN_PROOF_IMPORT_PATTERN = re.compile(
+    r"^\s*(?:import|open|export)\s+Benchmark\.Cases\..*\.Proofs\b", re.MULTILINE
+)
 IMPORT_PATTERN = re.compile(r"^\s*import\s+([A-Za-z0-9_.']+)\s*$", re.MULTILINE)
 
+# Well-known Lean 4 tactics that Lean reports as "unknown identifier" when
+# written in *term* position (e.g. `exact simp [...]`, `refine omega`, `:= by_cases h`).
+# Corpus analysis of 83 runs: 20 of 29 failed tasks (69%) hit this at least once,
+# with `simp`, `simpa`, `omega`, `exact`, `native_decide`, `intro`, `simp_all`, and
+# `by_cases` accounting for 61 occurrences. The existing `unknown_identifier` hint
+# sends the model to `search_public_defs`, which cannot help here — these are
+# language keywords, not definitions.
+_LEAN_TACTIC_NAMES = frozenset({
+    "simp", "simpa", "simp_all", "dsimp",
+    "omega", "decide", "native_decide",
+    "exact", "refine", "apply", "intro", "intros",
+    "constructor", "cases", "induction", "by_cases", "obtain",
+    "unfold", "rfl", "rw", "rewrite", "ring", "linarith", "nlinarith",
+    "split", "left", "right", "use", "show", "have", "suffices", "let",
+    "trivial", "tauto", "contradiction", "assumption", "skip",
+    "ext", "funext", "congr", "norm_num", "field_simp", "abel",
+})
+_UNKNOWN_IDENT_RE = re.compile(r"unknown (?:identifier|constant) '([^']+)'")
+
+# Names that look like Mathlib lemmas (e.g. `add_sub_add_right_eq_sub`,
+# `lt_of_add_lt_add_right`, `Nat.div_mul_le`). Corpus analysis of 83 runs
+# found 5 of 29 failed tasks (17%) stagnating on such guesses —
+# `add_sub_add_right_eq_sub`, `sub_eq_sub_right`, `add_assoc`, `add_comm`,
+# `sub_eq_add_neg`, `lt_of_add_lt_add_right`, `Nat.div_mul_le`,
+# `Nat.le_div_mul`, `Nat.div_def`, `Nat.cast_mk`, `Nat.not_ge.mp`, …
+# This workspace has NO Mathlib dependency, so these searches can never
+# succeed; the agent should be pointed at `omega`/`ring`/`simp` instead.
+_MATHLIB_SHAPE_PREFIX_RE = re.compile(
+    r"^(add_|sub_|mul_|div_|mod_|le_|lt_|ge_|gt_|eq_|ne_|not_|neg_|pos_|zero_|one_)"
+)
+_MATHLIB_SHAPE_EXACT = frozenset({
+    "add_assoc", "add_comm", "add_left_comm",
+    "mul_comm", "mul_assoc", "mul_left_comm",
+    "sub_zero", "zero_add", "add_zero", "mul_one", "one_mul",
+    "not_eq",
+})
+
+
+def _is_mathlib_shaped(name: str) -> bool:
+    if name in _MATHLIB_SHAPE_EXACT:
+        return True
+    if _MATHLIB_SHAPE_PREFIX_RE.match(name):
+        return True
+    # `Nat.*` lemma guesses are overwhelmingly Mathlib-only in this corpus.
+    if name.startswith("Nat."):
+        return True
+    return False
+
 
 @dataclass(frozen=True)
 class RuntimePaths:
@@ -37,6 +95,44 @@ def __init__(self, task: dict[str, Any]) -> None:
         self._task = task  # store for hint escalation
         self._best_error_count: int | None = None
         self._best_first_error_line: int | None = None
+        # Fingerprints of hint texts already surfaced this session. Used to
+        # avoid echoing the same repair advice verbatim across consecutive
+        # failures — repeated identical hints are pure noise and train the
+        # model to ignore the list instead of acting on it.
+        self._emitted_hint_keys: set[str] = set()
+        # Normalised fingerprint of the previous failing Lean details text,
+        # plus a count of how many times the same fingerprint has repeated
+        # in a row. Used to detect "no-progress loops" where the model
+        # resubmits a proof that yields byte-identical errors — corpus
+        # analysis found 12/29 failing tasks hit this pattern.
+        self._last_details_fp: str | None = None
+        self._same_details_streak: int = 0
+        # Cache of the most recent run_lean_check evaluation keyed by the
+        # exact proof text that produced it. A redundant run_lean_check call
+        # against unchanged content (corpus analysis found 201/201 — 100% —
+        # of run_lean_check calls were immediately after a write_editable_proof
+        # that had already run Lean) returns this cached result instantly
+        # plus a `cached: true` marker telling the model the call was
+        # redundant, saving a full Lean invocation and a round.
+        self._last_eval_cache: tuple[str, dict[str, Any]] | None = None
+        # Count of consecutive failed try_tactic_at_hole calls. Corpus analysis
+        # of 83 runs: try_tactic_at_hole has a 0/76 (0%) success rate across
+        # the entire interactive-proxy corpus, but failed runs average 3-7
+        # calls per task (14/29 failed runs have a ≥3-streak of failures)
+        # vs passed runs which max at a 2-streak (and never succeed when
+        # they do call it — they just move on after 1-2 attempts). Firing
+        # a pivot warning at the 3rd consecutive failure catches the stuck-
+        # loop pattern with zero false positives on the passed side.
+        self._try_tactic_failure_streak: int = 0
+        # Cache of prior search_public_defs calls keyed by (query, limit).
+        # Corpus analysis of 83 runs found failed runs averaged 41.9
+        # search_public_defs calls vs 1.5 on passing runs; 94% of those
+        # calls in failed runs were byte-identical re-queries (e.g. the same
+        # `"removeOwner_ownerListInvariant"` query 26 times in one run). The
+        # index is read-only within a session, so a cached hit with a
+        # `cached: true` + note tells the model the query yielded nothing
+        # new and it should pivot instead of re-asking.
+        self._search_cache: dict[tuple[str, int], dict[str, Any]] = {}
         self.paths = RuntimePaths(
             editable_rel_path=editable_rel_path,
             theorem_name=str(task["theorem_name"]),
@@ -75,19 +171,103 @@ def read_public_file(self, rel_path: str) -> dict[str, Any]:
         except FileNotFoundError:
             return {"status": "missing", "path": rel_path}
 
-    def write_editable_proof(self, content: str) -> dict[str, Any]:
+    def write_editable_proof(self, content: str, *, check: bool = True) -> dict[str, Any]:
         self.current_proof_text = content if content.endswith("\n") else f"{content}\n"
-        return {
-            "status": "ok",
+        # Invalidate the run_lean_check fast-path cache. The cache is keyed on
+        # `current_proof_text`, so a repeat write of identical content (common
+        # during stagnation loops) would otherwise hit a stale cached
+        # evaluation and return `cached: true` with a note claiming this was
+        # a redundant `run_lean_check` follow-up — even though the model's
+        # intent is a fresh write. Drop the cache unconditionally here; the
+        # downstream `execute_tool("run_lean_check", ...)` call re-populates
+        # it for genuine no-op follow-ups.
+        self._last_eval_cache = None
+        warnings: list[dict[str, str]] = []
+        if not self.current_proof_text.strip():
+            warnings.append({"kind": "empty_content", "detail": "candidate is empty"})
+        if PLACEHOLDER_PATTERN.search(self.current_proof_text):
+            warnings.append({
+                "kind": "placeholder_detected",
+                "detail": "contains `sorry`/`admit`/`axiom`; Lean rejects these — replace with a real tactic or a `?_` hole.",
+            })
+        if HIDDEN_PROOF_IMPORT_PATTERN.search(self.current_proof_text):
+            warnings.append({
+                "kind": "hidden_proof_import_detected",
+                "detail": "remove Benchmark.Cases.*.Proofs import/open/export.",
+            })
+        blocked = self._find_blocked_case_imports(self.current_proof_text)
+        if blocked:
+            warnings.append({
+                "kind": "hidden_case_import_detected",
+                "detail": "non-public imports: " + ", ".join(blocked),
+            })
+        if HOLE_PATTERN.search(self.current_proof_text):
+            warnings.append({
+                "kind": "unfilled_hole",
+                "detail": "proof still contains `?_` holes; fill before submitting.",
+            })
+        candidate_signature = self._extract_theorem_signature(self.current_proof_text)
+        if candidate_signature != self.expected_theorem_signature:
+            warnings.append({
+                "kind": "theorem_statement_mismatch",
+                "detail": "editable theorem signature changed; revert to the original statement.",
+            })
+        result: dict[str, Any] = {
+            "status": "ok_with_warnings" if warnings else "ok",
             "path": self.paths.editable_rel_path,
             "bytes": len(self.current_proof_text.encode("utf-8")),
             "lines": len(self.current_proof_text.splitlines()),
         }
+        if warnings:
+            result["warnings"] = warnings
+        # Fold the Lean check into the write. Each write+check used to cost
+        # two tool slots and two model round-trips; inlining saves one full
+        # round-trip (hundreds of ms to seconds of LLM latency per proof
+        # iteration) and doubles the effective budget for proof exploration.
+        # The caller can disable by passing check=False (kept for callers
+        # that only want to stage a draft without paying for Lean).
+        if check:
+            # Reuse the full run_lean_check pipeline (auto-heal + annotation +
+            # repair hints) so downstream success/failure detection is
+            # identical to a bare run_lean_check call. Write-time metadata
+            # (path, bytes, lines, warnings) stays visible in the result so
+            # the model still sees format warnings like non_public_imports
+            # alongside the Lean verdict.
+            pre_check_status = result["status"]
+            result.update(self.execute_tool("run_lean_check", {}))
+            # `run_lean_check` overwrites the `status` field, which drops the
+            # pre-check `ok_with_warnings` verdict. Callers that look for
+            # write-phase warnings (unfilled `?_` holes, non_public_imports,
+            # theorem_statement_mismatch) need a stable signal, so expose the
+            # pre-check verdict on `write_status`. The main `status` still
+            # reflects the Lean check so existing `status == "passed"` and
+            # `status == "failed"` branches keep working unchanged.
+            if pre_check_status != "ok":
+                result["write_status"] = pre_check_status
+        return result
 
     def search_public_defs(self, query: str, *, limit: int = 20) -> dict[str, Any]:
         query_text = query.strip()
         if not query_text:
             return {"status": "rejected", "reason": "query_must_not_be_empty"}
+        # The set of public impl/spec files does not change within a session,
+        # so the same (query, limit) will always return the same matches.
+        # Short-circuit repeat queries with a cached response + explicit note
+        # so the agent stops looping on an identical search.
+        cache_key = (query_text.lower(), limit)
+        cached = self._search_cache.get(cache_key)
+        if cached is not None:
+            reused = copy.deepcopy(cached)
+            reused["cached"] = True
+            reused["note"] = (
+                "You already ran search_public_defs with this exact query "
+                "earlier in the session; the public impl/spec files are "
+                "static, so the result is identical. Try a different query "
+                "(e.g. a substring, a related concept, or a parameter name) "
+                "or switch to inspect_lean_goals / try_tactic_at_hole — "
+                "do not resubmit the same query."
+            )
+            return reused
         lowered = query_text.lower()
         matches: list[dict[str, Any]] = []
         for rel_path in self.paths.implementation_files + self.paths.specification_files:
@@ -110,16 +290,52 @@ def search_public_defs(self, query: str, *, limit: int = 20) -> dict[str, Any]:
                     }
                 )
                 if len(matches) >= limit:
-                    return {"status": "ok", "query": query_text, "matches": matches, "truncated": True}
-        return {"status": "ok", "query": query_text, "matches": matches, "truncated": False}
+                    result = {"status": "ok", "query": query_text, "matches": matches, "truncated": True}
+                    self._search_cache[cache_key] = copy.deepcopy(result)
+                    return result
+        if not matches:
+            # Corpus analysis (83 runs) found 55/75 (73%) of search_public_defs
+            # calls returned empty — overwhelmingly because agents searched for
+            # Mathlib / core Lean library names like `Nat.div_mul_le`,
+            # `add_zero`, `div_pos`, etc. This tool only searches the task's
+            # public impl/spec files, not the standard library. Surface that
+            # scope limit explicitly so the agent stops burning rounds on
+            # library searches.
+            result = {
+                "status": "ok",
+                "query": query_text,
+                "matches": matches,
+                "truncated": False,
+                "hint": (
+                    "No match in the task's public impl/spec files. "
+                    "`search_public_defs` only indexes definitions inside "
+                    "implementation_files and specification_files for this "
+                    "task — it does NOT search Lean core, Batteries, or "
+                    "Mathlib (Mathlib is not a dependency of this project). "
+                    "For standard-library lemmas use `exact?` / `apply?` / "
+                    "`rw?` via `try_tactic_at_hole`, or rely on `simp` / "
+                    "`omega` / `decide` which already know common arithmetic "
+                    "and boolean facts. Retry this tool only with names you "
+                    "expect to be defined in the current task's spec/impl."
+                ),
+            }
+            self._search_cache[cache_key] = copy.deepcopy(result)
+            return result
+        result = {"status": "ok", "query": query_text, "matches": matches, "truncated": False}
+        self._search_cache[cache_key] = copy.deepcopy(result)
+        return result
 
     def inspect_goals(self) -> dict[str, Any]:
-        holes = sorted(set(HOLE_PATTERN.findall(self.current_proof_text)))
+        # Detect `?_` AND named holes (`?h`, `?foo`). Named-hole detection was
+        # lost when HOLE_PATTERN was tightened for substitution safety; this
+        # tool is read-only so the broader pattern is safe and restores the
+        # recovery path for proofs that use named holes.
+        holes = sorted(set(ANY_HOLE_PATTERN.findall(self.current_proof_text)))
         if not holes:
             return {
                 "status": "unsupported",
                 "reason": "goal_inspection_requires_explicit_hole",
-                "details": "Write the proof with a `?_` or named hole first, then retry goal inspection.",
+                "details": "Write the proof with a `?_` or named hole (e.g. `?h`) first, then retry goal inspection.",
             }
         evaluation = self.evaluate_current(check_goals=True)
         return {
@@ -138,8 +354,17 @@ def try_tactic_at_hole(self, tactic: str) -> dict[str, Any]:
         if not tactic.strip():
             return {"status": "rejected", "reason": "tactic_must_not_be_empty"}
         original = self.current_proof_text
-        # Replace standalone ?_ holes (not named holes like ?_foo)
-        modified = re.sub(r"\?_(?!\w)", tactic.strip(), original)
+        # Substitute each `?_` with a context-adapted form of `tactic`. Corpus
+        # analysis of 72 failed try_tactic_at_hole calls found 47 (65%) passed
+        # a raw tactic (e.g. `omega`, `rfl`, `simp_all [...]`) into a proof
+        # where the hole sat at a TERM position like `exact ?_` — making the
+        # substituted proof read `exact omega`, which Lean rejects because
+        # `omega` is a tactic, not a term. Automatically wrap the substituted
+        # tactic with `(by ...)` at term-position holes, and strip an existing
+        # `by ` wrapper at tactic-position holes, so the model's intent
+        # survives context mismatches. Holes at other positions get the raw
+        # tactic.
+        modified = _substitute_holes(original, tactic.strip())
         if modified == original:
             return {
                 "status": "unsupported",
@@ -148,18 +373,70 @@ def try_tactic_at_hole(self, tactic: str) -> dict[str, Any]:
             }
         evaluation = self.evaluate_candidate(modified)
         if evaluation.get("status") == "passed":
+            self._try_tactic_failure_streak = 0
             self.current_proof_text = modified
             return {
                 "status": "passed",
                 "tactic": tactic.strip(),
                 "details": "Tactic succeeded. Proof updated.",
             }
-        return {
+        self._try_tactic_failure_streak += 1
+        # Produce the same class-based repair_hints as run_lean_check /
+        # write_editable_proof do on failure. Corpus analysis of 83 interactive
+        # runs found 76/76 (100%) of failed try_tactic_at_hole results returned
+        # no hints, even though the failure_class distribution (45 unknown_
+        # identifier, 18 unsolved_goals, 7 type_mismatch, …) maps onto hints
+        # already produced by `_build_check_hints` when the same error comes
+        # from the other two tools. Reusing that helper keeps the advice
+        # consistent across the tool surface and gives the model a concrete
+        # next tactic to try instead of a bare error payload.
+        # `details` is already stripped of `linter.unusedSimpArgs` noise and
+        # capped at `_LEAN_OUTPUT_CAP_CHARS` (16 KB) by `evaluate_candidate`.
+        # Earlier code re-truncated to 2000 chars — a legacy band-aid from
+        # before the upstream cleanup pipeline existed. Corpus analysis of
+        # the 78 try_tactic_at_hole failures in the current corpus found
+        # 41/78 (53%) hit that 2000-char cap, chopping off already-cleaned
+        # diagnostic content (goal state, context, line numbers) that
+        # run_lean_check would have returned in full on the same failure.
+        # Drop the extra truncation so all three tools surface the same
+        # error fidelity; the 16 KB pipeline cap remains the backstop.
+        details = str(evaluation.get("details", ""))
+        failure_class = classify_failure(details)
+        result = {
             "status": "failed",
             "tactic": tactic.strip(),
-            "details": evaluation.get("details", "")[:2000],
-            "failure_class": classify_failure(str(evaluation.get("details", ""))),
+            "details": details,
+            "failure_class": failure_class,
         }
+        hints = _build_check_hints(failure_class, details)
+        # After 3 consecutive failed try_tactic_at_hole calls, inject a
+        # "pivot" hint. Corpus analysis: passed runs never exceed a 2-streak;
+        # failed runs hit ≥3 in 14/29 (48%) tasks, with some stacking 5-7
+        # attempts of increasingly speculative tactics. The tool has a
+        # 0/76 (0%) corpus-wide success rate, so further attempts on the
+        # same hole are almost certainly wasted budget — the pivot hint
+        # tells the model to switch to write_editable_proof with explicit
+        # multi-step tactics and inspect_lean_goals between steps.
+        if self._try_tactic_failure_streak >= 3:
+            hints = list(hints) if hints else []
+            hints.insert(
+                0,
+                f"You have now run {self._try_tactic_failure_streak} consecutive "
+                "`try_tactic_at_hole` calls with no success. This tool only "
+                "closes a goal when a SINGLE tactic discharges it entirely; "
+                "for goals that need BEq↔Prop bridging, case analysis on "
+                "residual `if`/`match` arms, monadic-trace unfolding, or "
+                "multi-step arithmetic rewriting, no single tactic will "
+                "close them no matter how many more you try. PIVOT: write a "
+                "full multi-line proof body with `write_editable_proof` "
+                "(leaving `?_` ONLY at positions where you then "
+                "`inspect_lean_goals` to see the reduced state), and make "
+                "progress one step at a time. Do NOT continue cycling "
+                "single-tactic guesses here."
+            )
+        if hints:
+            result["repair_hints"] = hints
+        return result
 
     def evaluate_current(self, *, check_goals: bool = False) -> dict[str, Any]:
         return self.evaluate_candidate(self.current_proof_text, check_goals=check_goals)
@@ -231,6 +508,16 @@ def evaluate_candidate(self, candidate_text: str, *, check_goals: bool = False)
                 )
                 command = ["lake", "env", "lean", "--root=.", str(check_path.relative_to(workspace))]
             code, output = lean_run_command(command, cwd=workspace)
+            # Strip the "This simp argument is unused" lint blocks from Lean
+            # output before returning. Corpus analysis of 37 failed-check
+            # detail blobs found 844/846 warnings (~99%) were this single
+            # linter, accounting for ~20 KB of the average 34 KB details
+            # blob. The noise drowns the real errors and trains the model
+            # to ignore the details block. Filtering preserves every real
+            # error and every other warning kind — only the known-useless
+            # linter goes away.
+            output = _strip_noise_warnings(output)
+            output = _cap_lean_output(output)
             if code != 0:
                 return {
                     "status": "failed",
@@ -271,7 +558,7 @@ def tool_specs(self) -> list[dict[str, Any]]:
                 "type": "function",
                 "function": {
                     "name": "write_editable_proof",
-                    "description": "Replace the entire editable proof file with complete Lean code.",
+                    "description": "Replace the entire editable proof file with complete Lean code and automatically run the Lean check. The response reports status (passed/failed/ok/ok_with_warnings) and, on failure, failure_mode, details, and failure_class. A separate run_lean_check call is not needed after this.",
                     "parameters": {
                         "type": "object",
                         "additionalProperties": False,
@@ -288,7 +575,7 @@ def tool_specs(self) -> list[dict[str, Any]]:
                 "type": "function",
                 "function": {
                     "name": "run_lean_check",
-                    "description": "Run the official harness Lean check for the current editable proof.",
+                    "description": "Re-run the Lean check on the current editable proof without modifying it. Redundant immediately after `write_editable_proof`, which already runs the check — if the proof text is unchanged since the last evaluation, this call returns a cached result tagged `cached: true` rather than re-invoking Lean.",
                     "parameters": {
                         "type": "object",
                         "additionalProperties": False,
@@ -312,7 +599,7 @@ def tool_specs(self) -> list[dict[str, Any]]:
                 "type": "function",
                 "function": {
                     "name": "search_public_defs",
-                    "description": "Search public implementation/specification files for matching def/theorem/lemma names.",
+                    "description": "Search the task's public implementation/specification files for matching def/theorem/lemma names. Scope is ONLY those task files — it does NOT search Lean core, Batteries, or Mathlib (Mathlib is not a dependency of this project). For standard-library lemmas, prefer `exact?` / `apply?` / `rw?` via `try_tactic_at_hole`, or tactics like `simp` / `omega` / `decide` that already know common arithmetic and boolean facts.",
                     "parameters": {
                         "type": "object",
                         "additionalProperties": False,
@@ -328,7 +615,7 @@ def tool_specs(self) -> list[dict[str, Any]]:
                 "type": "function",
                 "function": {
                     "name": "try_tactic_at_hole",
-                    "description": "Try replacing all `?_` holes in the current proof with a specific tactic and check if it compiles. Preserves the original proof if it fails. Useful for testing tactics like `simp_all [...]`, `omega`, `decide`, or `duper [...]`.",
+                    "description": "Try replacing all `?_` holes in the current proof with a specific tactic and check if it compiles. Pass a raw tactic (e.g. `omega`, `simp_all [foo]`, `decide`, `exact h`); substitution auto-wraps as `(by tac)` when the hole is at a term position like `exact ?_`. Preserves the original proof if it fails.",
                     "parameters": {
                         "type": "object",
                         "additionalProperties": False,
@@ -350,19 +637,67 @@ def execute_tool(self, name: str, arguments: dict[str, Any]) -> dict[str, Any]:
         if name == "write_editable_proof":
             return self.write_editable_proof(str(arguments.get("content", "")))
         if name == "run_lean_check":
+            # Short-circuit if the proof text is unchanged since the last
+            # evaluation. Corpus analysis of 83 interactive runs found that
+            # 201/201 (100%) of run_lean_check calls were made immediately
+            # after a write_editable_proof that had already run Lean on the
+            # same content. Returning the cached evaluation saves a full
+            # Lean invocation (seconds) and teaches the model the call was
+            # redundant via the `cached: true` marker + note.
+            if self._last_eval_cache is not None:
+                cached_text, cached_result = self._last_eval_cache
+                # Never serve an `environment_error` from cache. The write-
+                # side guard below already refuses to cache env errors, but
+                # treat the read side defensively too: if an env error ever
+                # ends up in the cache (e.g. via a future refactor), we
+                # must still re-run `evaluate_current` so `_attempt_lake_build`
+                # can retry the heal path instead of pinning the task to
+                # a stale infra failure that may have recovered.
+                cached_is_env_error = (
+                    isinstance(cached_result, dict)
+                    and (
+                        cached_result.get("failure_class") == "environment_error"
+                        or cached_result.get("environment_error") is True
+                    )
+                )
+                if cached_text == self.current_proof_text and not cached_is_env_error:
+                    reused = copy.deepcopy(cached_result)
+                    reused["cached"] = True
+                    reused["note"] = (
+                        "Proof text is unchanged since the last evaluation; "
+                        "returning cached result without re-running Lean. "
+                        "`write_editable_proof` already runs the Lean check — "
+                        "a follow-up `run_lean_check` on unchanged content is "
+                        "redundant."
+                    )
+                    return reused
             result = self.evaluate_current()
+            # Auto-heal environment errors (missing .olean) once before annotating.
+            if result.get("status") == "failed" and result.get("failure_mode") == "lean_check_failed":
+                details = str(result.get("details", ""))
+                if classify_failure(details) == "environment_error":
+                    module_name = _missing_olean_module(details)
+                    healed = _attempt_lake_build(module_name)
+                    if healed:
+                        result = self.evaluate_current()
             if result.get("status") == "failed":
                 result = self._annotate_check_result(result)
-                # Also add structured repair hints from main's guidance
-                if result.get("failure_mode") == "lean_check_failed":
-                    guidance = _build_repair_guidance(str(result.get("details", "")))
-                    if guidance:
-                        existing = result.get("repair_hints", [])
-                        if isinstance(existing, list):
-                            existing.append(guidance)
-                            result["repair_hints"] = existing
-                        else:
-                            result["repair_hints"] = [existing, guidance] if existing else [guidance]
+            # Cache the fresh evaluation against the current proof text so a
+            # follow-up run_lean_check on unchanged content hits the fast path.
+            # Exception: do NOT cache `environment_error` results. Those are
+            # transient infrastructure failures (missing .olean, lake build
+            # contention) that the heal path above tries to recover from via
+            # `_attempt_lake_build`. Caching them would short-circuit every
+            # subsequent `run_lean_check` on unchanged proof text back to the
+            # stale env error, preventing the heal path from being re-entered
+            # if infra recovers. Re-evaluate every time for env errors so the
+            # heal path keeps getting a chance.
+            is_env_error = (
+                result.get("failure_class") == "environment_error"
+                or result.get("environment_error") is True
+            )
+            if not is_env_error:
+                self._last_eval_cache = (self.current_proof_text, copy.deepcopy(result))
             return result
         if name == "inspect_lean_goals":
             return self.inspect_goals()
@@ -380,11 +715,27 @@ def _annotate_check_result(self, result: dict[str, Any]) -> dict[str, Any]:
         # not preflight failures (empty_response, placeholder_detected, etc.)
         is_lean_failure = failure_mode == "lean_check_failed"
         details = str(result.get("details", ""))
-        failure_class = classify_failure(details)
+        # Preflight failures carry English-language details that classify_failure
+        # can't pattern-match, so they all collapse to "other" and the model gets
+        # no targeted hint. Map the failure_mode directly to a class name so the
+        # model sees e.g. "placeholder_detected" instead of "other" and
+        # _build_check_hints can dispatch a specific hint.
+        if not is_lean_failure and failure_mode in _PREFLIGHT_FAILURE_MODES:
+            failure_class = failure_mode
+        else:
+            failure_class = classify_failure(details)
         hints = _build_check_hints(failure_class, details)
         annotated = dict(result)
         annotated["failure_class"] = failure_class
 
+        # environment_error is infrastructure, not a proof problem. Don't track
+        # stagnation for it (retrying won't help) and tag the result clearly.
+        if failure_class == "environment_error":
+            annotated["environment_error"] = True
+            if hints:
+                annotated["repair_hints"] = hints
+            return annotated
+
         if not is_lean_failure:
             if hints:
                 annotated["repair_hints"] = hints
@@ -402,6 +753,17 @@ def _annotate_check_result(self, result: dict[str, Any]) -> dict[str, Any]:
             else:
                 break
 
+        # Detect true no-progress loops: the normalized error text matches the
+        # previous failure byte-for-byte. This is a much stronger signal than
+        # same-class stagnation — it proves the last edit had zero effect on
+        # what Lean actually saw.
+        details_fp = _normalize_details_fp(details)
+        if details_fp and details_fp == self._last_details_fp:
+            self._same_details_streak += 1
+        else:
+            self._same_details_streak = 1
+        self._last_details_fp = details_fp
+
         # Escalate on either: 2+ consecutive same-class failures, or 4+ total failures
         if same_class_count >= 2 or total_failures >= 4:
             if same_class_count >= 2:
@@ -414,10 +776,153 @@ def _annotate_check_result(self, result: dict[str, Any]) -> dict[str, Any]:
                     f"You have failed {total_failures} times across different error classes. "
                     "Step back and reconsider your proof strategy from scratch."
                 )
-            escalation = self._build_escalation_hint(failure_class)
+            escalation = self._build_escalation_hint(failure_class, details)
             if escalation:
                 hints.append(escalation)
 
+        # When the error text is byte-identical to the previous attempt, the
+        # model's latest edit had zero effect — hints must call this out
+        # explicitly, not just repeat class-level advice. Keep this BEFORE
+        # the dedup so the fingerprint-unique streak count is surfaced fresh
+        # each time.
+        if self._same_details_streak >= 2:
+            hints.insert(0, (
+                f"NO-PROGRESS LOOP DETECTED: your last {self._same_details_streak} "
+                "submissions produced byte-identical Lean errors. The changes you are "
+                "making do not reach the failing goal. Stop editing around the symptom. "
+                "Instead: (1) `write_editable_proof` with the failing tactic replaced by "
+                "`?_`, (2) `inspect_lean_goals` to read the real goal at that hole, "
+                "(3) `try_tactic_at_hole` with tactics you have NOT tried yet "
+                "(e.g. `simp_all`, `aesop`, `decide`, `exact?`, `constructor; all_goals ...`)."
+            ))
+
+        # Dedupe hints we've already shown this session. Repeated-verbatim hints
+        # are noise: corpus analysis of failing tasks showed the same 4-5 hints
+        # echoed across 5+ stagnation events, training the model to skip the
+        # repair_hints list entirely. Only surface *new* advice each time.
+        hints = self._filter_seen_hints(hints)
+
+        # Highest-leverage directive: corpus analysis of 83 runs shows 12/29
+        # failed tasks (41%) ended with `?_` still in the submitted proof, and
+        # in every one of those runs the agent re-submitted a `?_`-containing
+        # proof 2–9 times after the first rejection. The hint BELOW already
+        # existed but was inserted BEFORE `_filter_seen_hints`, so dedup
+        # suppressed it on the 2nd–Nth resubmission and the agent got no
+        # feedback tying its specific, detectable mistake (still-unfilled hole)
+        # to the specific failure class. Insert AFTER the dedup filter so this
+        # safety-critical, state-conditional warning fires on EVERY submission
+        # that still contains `?_`. The hint is keyed to the literal proof
+        # text state, not to the abstract hint corpus, so it is not a "noise"
+        # dedup candidate — it tells the agent something about its concrete
+        # current submission.
+        if HOLE_PATTERN.search(self.current_proof_text):
+            hole_count = len(HOLE_PATTERN.findall(self.current_proof_text))
+            hints.insert(0, (
+                f"UNFILLED HOLE IN SUBMITTED PROOF: your proof still contains "
+                f"{hole_count} `?_` hole(s). `?_` is a PROBE for `inspect_lean_goals` "
+                "and `try_tactic_at_hole`, never a final proof — Lean will reject "
+                "every submission containing `?_`. Do not submit `?_` again. Next "
+                "move: call `try_tactic_at_hole` with one concrete tactic at a "
+                "time (`omega`, `simp_all`, `decide`, `rfl`, `assumption`, "
+                "`trivial`, `exact h`, `linarith`, `aesop`, `exact?`). If any "
+                "succeeds, the proof updates in place and the task closes. If "
+                "none do, use `inspect_lean_goals` to read each hole's goal, then "
+                "`write_editable_proof` with concrete tactics substituted for "
+                "every `?_`."
+            ))
+
+        # Second safety-critical, state-conditional warning that must survive
+        # `_filter_seen_hints`: tactic-in-term-position.
+        # Corpus analysis of 29 failed runs: 19 tasks (66%) emit at least one
+        # `unknown identifier '<tactic>'` diagnostic — 173 occurrences for
+        # 'simp', 100 for 'simpa', 52 for 'omega', 43 for 'native_decide',
+        # 24 for 'simp_all'. One task alone (safe/swap_owner_is_owner_correctness)
+        # emits 52 repeats of `unknown identifier 'simp'` in a single run.
+        # The existing tactic-in-term hint inside `_build_check_hints`
+        # (line ~1466) is suppressed by the dedup filter after its first
+        # emission, so the agent never gets feedback tying the specific
+        # mistake to each subsequent rejection. This is identical to the
+        # hole-warning failure mode: a state-conditional critical warning
+        # that must repeat as long as the state persists. Re-detect the
+        # tactic-in-term case against the current `details` and insert a
+        # persistent warning post-dedup. The hint is keyed to the concrete
+        # error-text state (which tactic is being misused), not the generic
+        # hint corpus, so it is not a "noise" dedup candidate.
+        _unknown_names = _UNKNOWN_IDENT_RE.findall(details)
+        _tactic_in_term = [n for n in _unknown_names if n in _LEAN_TACTIC_NAMES]
+        if _tactic_in_term:
+            _tactic_name = _tactic_in_term[0]
+            hints.insert(0, (
+                f"TACTIC IN TERM POSITION: Lean reports `unknown identifier "
+                f"'{_tactic_name}'` because `{_tactic_name}` is a TACTIC, not "
+                f"a term. It appears in your proof after `exact` / `refine` / "
+                f"`apply` / `:=` or inside `⟨ ⟩` — all term positions. Fix: "
+                f"wrap the tactic in `by`, e.g. `exact by {_tactic_name} ...`, "
+                f"`refine ⟨by {_tactic_name}, ...⟩`, or drop the `exact` / "
+                f"`refine` prefix so `{_tactic_name}` runs as a tactic "
+                f"directly (`by {_tactic_name} ...` at the top of the proof "
+                f"body). Do NOT call search_public_defs for `{_tactic_name}` "
+                f"— it is not a definition, it is a tactic, and the only fix "
+                f"is the `by` wrapper."
+            ))
+
+        # Third safety-critical, state-conditional warning: local-variable
+        # out-of-scope names. Corpus analysis of 29 failed runs: 6 tasks
+        # (21%) emit `unknown identifier '<camelCase name>'` for names that
+        # are clearly binder-shaped (no dots, lowercase first char, no
+        # underscores) — up to 110 occurrences in a single run
+        # (safe/swap_owner_is_owner_correctness: 91×prevOwner, 19×oldOwner).
+        # The existing local-variable hint in `_build_check_hints`
+        # (~line 1475) is actionable ("call inspect_lean_goals / re-check
+        # the signature") but is suppressed by dedup after first emission.
+        # Same failure mode as tactic-in-term and unfilled-hole: state
+        # persists across re-submissions, warning must repeat. The hint
+        # is keyed to the specific out-of-scope name from the error text,
+        # not the generic corpus, so it is not a "noise" dedup candidate.
+        # Only fire when no tactic-hit is present so we never spam both
+        # warnings for the same line range — Lean reports tactic names
+        # the same way as local vars, and if a tactic mistake is present
+        # that's almost always the upstream cause.
+        if not _tactic_in_term:
+            _var_hits = [
+                n for n in _unknown_names
+                if n not in _LEAN_TACTIC_NAMES
+                and "." not in n
+                and n
+                and n[0].islower()
+                and "_" not in n
+            ]
+            if _var_hits:
+                _var_name = _var_hits[0]
+                hints.insert(0, (
+                    f"LOCAL VARIABLE OUT OF SCOPE: Lean reports `unknown "
+                    f"identifier '{_var_name}'` for a name that looks like "
+                    f"a local binder, not a definition. `{_var_name}` is "
+                    f"not in scope at the point it is used — common causes: "
+                    f"(a) it was introduced inside a different `by_cases` / "
+                    f"`rcases` / `·` branch and is not visible in the "
+                    f"current branch; (b) the theorem signature uses a "
+                    f"different parameter name (check the editable file "
+                    f"header via `read_public_file`); (c) it was shadowed "
+                    f"by a later `intro` / `rintro` / `obtain`. Fix: call "
+                    f"`inspect_lean_goals` on a `?_` hole at this exact "
+                    f"location to see the binders ACTUALLY in scope, then "
+                    f"reference those names. Do NOT call search_public_defs "
+                    f"for `{_var_name}` — it is a binder, not a definition, "
+                    f"and search_public_defs cannot find binders."
+                ))
+        if not hints and same_class_count >= 3:
+            # All the standing advice has already been seen and isn't working.
+            # Issue a one-shot pivot directive rather than sending an empty list,
+            # which the model interprets as "nothing new, carry on".
+            hints = [
+                f"All prior repair hints for '{failure_class}' have now been repeated "
+                f"{same_class_count} times without progress. Stop retrying variations of "
+                f"the same proof. Next move: write a minimal skeleton with a `?_` hole at "
+                f"the first failing step, call `inspect_lean_goals` to read the actual "
+                f"goal state, then use `try_tactic_at_hole` to probe tactics one at a time."
+            ]
+
         if hints:
             annotated["repair_hints"] = hints
 
@@ -455,7 +960,23 @@ def _annotate_check_result(self, result: dict[str, Any]) -> dict[str, Any]:
 
         return annotated
 
-    def _build_escalation_hint(self, failure_class: str) -> str | None:
+    def _filter_seen_hints(self, hints: list[str]) -> list[str]:
+        """Drop hints whose fingerprint has already been surfaced this session.
+
+        Fingerprint = lowercased first 80 non-whitespace chars. Short enough
+        that wording tweaks still dedupe, long enough to distinguish genuinely
+        different hints.
+        """
+        fresh: list[str] = []
+        for hint in hints:
+            key = "".join(hint.lower().split())[:80]
+            if key in self._emitted_hint_keys:
+                continue
+            self._emitted_hint_keys.add(key)
+            fresh.append(hint)
+        return fresh
+
+    def _build_escalation_hint(self, failure_class: str, details: str = "") -> str | None:
         """Build an escalation hint when the model is stagnating on a failure class."""
         terms = extract_contract_simp_terms(self._task)
         if terms:
@@ -465,6 +986,32 @@ def _build_escalation_hint(self, failure_class: str) -> str | None:
             full_set = ""
 
         if failure_class in ("simp_no_progress", "unsolved_goals", "rfl_failed", "unfold_failed"):
+            # If the stuck goal carries a `case <label>` marker, the agent has
+            # ALREADY case-split and is stalling on an open branch. Telling it
+            # to "Start with unfold … then by_cases" would undo the split and
+            # regress. Escalate with branch-closing advice instead.
+            case_labels = re.findall(r"\ncase ([a-zA-Z_][a-zA-Z0-9_.]*)\n", details or "")
+            if case_labels:
+                seen_lbls: list[str] = []
+                for lbl in case_labels:
+                    if lbl not in seen_lbls:
+                        seen_lbls.append(lbl)
+                lbl_list = ", ".join(f"`{l}`" for l in seen_lbls[:4])
+                simp_fragment = f"simp_all [{full_set}]" if full_set else "simp_all"
+                return (
+                    f"ESCALATION: You are stuck inside an open case branch ({lbl_list}). "
+                    f"Do NOT restart the proof or re-split — your previous case-split is "
+                    f"correct. Instead, close ONLY the open branch:\n"
+                    f"1. Call inspect_lean_goals with a `?_` at the branch's current "
+                    f"position to read the exact hypotheses (they include the branch "
+                    f"condition as `h✝` or a named hypothesis).\n"
+                    f"2. Try `{simp_fragment}` — it rewrites hypotheses into each other "
+                    f"and closes branches where the branch hypothesis contradicts another.\n"
+                    f"3. If two hypotheses literally contradict (e.g. `h1 : x = 0` and "
+                    f"`h2 : x ≠ 0`), close with `exact absurd h1 h2`.\n"
+                    f"4. If the goal is a linear (in)equality over `.val`, use `omega` "
+                    f"after `simp only [...]` has exposed the `.val` form."
+                )
             if full_set:
                 return (
                     f"ESCALATION: You are stuck. Do NOT use `unfold` on contract functions. "
@@ -476,6 +1023,44 @@ def _build_escalation_hint(self, failure_class: str) -> str | None:
                     f"5. Never use bare `simp [h]` or `unfold ContractName.functionName`"
                 )
         if failure_class == "unknown_identifier":
+            unknown_names = _UNKNOWN_IDENT_RE.findall(details or "")
+            tactic_hits = [n for n in unknown_names if n in _LEAN_TACTIC_NAMES]
+            if tactic_hits:
+                name = tactic_hits[0]
+                return (
+                    f"ESCALATION: `{name}` is a TACTIC, not an identifier to search for. "
+                    f"You are writing `{name}` in term position (after `exact`/`refine`/`apply`/`:=` or "
+                    f"inside `⟨ ⟩`). Either wrap with `by` (e.g. `exact by {name} ...`) or drop the "
+                    f"`exact`/`refine` prefix so `{name}` runs in tactic mode."
+                )
+            var_hits = [
+                n for n in unknown_names
+                if n not in _LEAN_TACTIC_NAMES and "." not in n
+                and n and n[0].islower() and "_" not in n
+            ]
+            if var_hits:
+                name = var_hits[0]
+                return (
+                    f"ESCALATION: `{name}` is a LOCAL VARIABLE shape, not a definition. "
+                    f"search_public_defs cannot find binders — it only searches public "
+                    f"definitions. Call `inspect_lean_goals` on a `?_` hole to see which "
+                    f"binders are in scope, then match the actual parameter names from the "
+                    f"theorem signature."
+                )
+            mathlib_hits = [
+                n for n in unknown_names
+                if n not in _LEAN_TACTIC_NAMES and _is_mathlib_shaped(n)
+            ]
+            if mathlib_hits:
+                name = mathlib_hits[0]
+                return (
+                    f"ESCALATION: `{name}` is a Mathlib lemma name, but this workspace has "
+                    f"NO Mathlib dependency. Stop searching for `add_*` / `sub_*` / `Nat.*` "
+                    f"lemmas — they do not exist here. Close arithmetic goals with `omega` "
+                    f"(linear Nat/Int), `ring` (commutative rings), or `simp arith`. For "
+                    f"project helpers call search_public_defs with a KEYWORD, not a guessed "
+                    f"lemma name."
+                )
             return (
                 "ESCALATION: Stop guessing identifier names. Use the search_public_defs tool "
                 "to find the exact names from the implementation and specification files."
@@ -515,8 +1100,17 @@ def _materialize_workspace(self, workspace: Path) -> None:
 
     def _extract_theorem_signature(self, text: str) -> str | None:
         short_name = self.paths.theorem_name.rsplit(".", 1)[-1]
+        # Match any proof style: tactic-mode (`:= by ...`) or term-mode
+        # (`:= rfl`, `:= fun n => ...`, `:= Eq.mpr ...`). Previously the
+        # regex required `:= by`, so a valid term-mode proof returned None
+        # while the expected signature (extracted from an initial `:= by`
+        # file) was a string — the inequality fired a false
+        # `theorem_statement_mismatch` even though the `theorem name : TYPE`
+        # prefix was unchanged. Anchoring on `:=` alone (with the `by`
+        # branch preferred when present, to stay bug-compatible for
+        # tactic-mode) lets both styles produce the same signature string.
         pattern = re.compile(
-            rf"theorem\s+{re.escape(short_name)}\b(?P<signature>.*?)(?::=)",
+            rf"theorem\s+{re.escape(short_name)}\b(?P<signature>.*?):=\s*(?:by\b)?",
             re.DOTALL,
         )
         match = pattern.search(text)
@@ -546,6 +1140,68 @@ def _module_name(rel_path: str) -> str:
         return module_path.replace("/", ".")
 
 
+_LAKE_BUILD_CACHE: dict[str, bool] = {}
+
+
+def _attempt_lake_build(module_name: str | None) -> bool:
+    """Best-effort `lake build` for a module. Returns True on success.
+
+    Always invokes `lake build` — this is the self-heal path, called when the
+    runtime observed a missing .olean at check time, so the previously cached
+    "success" entry is stale and cannot be trusted. The cache is refreshed
+    with the latest result so subsequent prebuild calls can short-circuit
+    correctly.
+    """
+    if not module_name:
+        return False
+    if not module_name.startswith("Benchmark."):
+        return False
+    code, _output = lean_run_command(["lake", "build", module_name], cwd=ROOT)
+    success = code == 0
+    _LAKE_BUILD_CACHE[module_name] = success
+    return success
+
+
+def prebuild_task_modules(task: dict[str, Any]) -> list[dict[str, Any]]:
+    """Pre-build implementation/specification .olean files for a task.
+
+    Returns a list of build reports. Meant to be called once before starting
+    the agent loop so on-the-fly compilation inside `lake env lean` does not
+    race with fast agent retries.
+    """
+    reports: list[dict[str, Any]] = []
+    targets: list[str] = []
+    for rel_path in list(task.get("implementation_files", [])) + list(task.get("specification_files", [])):
+        path = Path(rel_path)
+        if path.suffix != ".lean":
+            continue
+        module_name = ".".join(path.with_suffix("").parts)
+        # Only modules inside the `Benchmark` lean_lib are buildable via `lake build`.
+        # Source-of-truth files under `cases/` are mirrored into `Benchmark/Cases/` and
+        # that mirror is what lake actually compiles.
+        if not module_name.startswith("Benchmark."):
+            continue
+        if module_name in targets:
+            continue
+        targets.append(module_name)
+    for module_name in targets:
+        if _LAKE_BUILD_CACHE.get(module_name):
+            reports.append({"module": module_name, "status": "cached"})
+            continue
+        code, output = lean_run_command(["lake", "build", module_name], cwd=ROOT)
+        success = code == 0
+        if success:
+            _LAKE_BUILD_CACHE[module_name] = True
+        reports.append(
+            {
+                "module": module_name,
+                "status": "ok" if success else "failed",
+                "output": output[-600:] if not success else "",
+            }
+        )
+    return reports
+
+
 def extract_contract_simp_terms(task: dict[str, Any]) -> list[str]:
     """Extract concrete simp terms from implementation and specification files.
 
@@ -586,11 +1242,303 @@ def extract_contract_simp_terms(task: dict[str, Any]) -> list[str]:
     return terms
 
 
+# Term-expecting tokens/punctuation that immediately precede a `?_` hole
+# when the hole is in term (expression) position rather than tactic position.
+# Matches at end-of-string after the hole's predecessor text is sliced off.
+_TERM_POSITION_RE = re.compile(
+    r"(?:"
+    r"\b(?:exact|refine|apply|show|have|let|suffices|exact\?|refine!|exact!|"
+    r"use|calc|from|fun)\s*"  # term-expecting keywords
+    r"|[⟨(,\[{]\s*"             # inside anonymous constructors / tuples / lists
+    r"|:=\s*"                    # RHS of let / have := ?_
+    r")$"
+)
+# Lean's diagnostic header format is `<source-file>:LINE:COL: <kind>: <msg>`.
+# Two code paths reach this regex family:
+#   1. `evaluate_candidate` (run_lean_check / write_editable_proof) writes a
+#      `CandidateCheck.lean` stub and reports errors against that name.
+#   2. `inspect_lean_goals` runs Lean against the actual editable file path
+#      (e.g. `Benchmark/Generated/Foo/Bar.lean`) because it needs `check_goals`
+#      to introspect the real `?_` hole — no stub wrapper.
+# Corpus analysis of 83 runs found 32/88 (36%) of inspect_lean_goals outputs
+# still contained `linter.unusedSimpArgs` blocks because the old, hardcoded
+# `CandidateCheck\.lean:` regex silently skipped them. Accepting any
+# `<nonws>.lean:LINE:COL:` header lets the same strip + fingerprint logic
+# apply to both code paths uniformly.
+_FP_LINE_COL_RE = re.compile(r"\S+\.lean:\d+:\d+:")
+_FP_WS_RE = re.compile(r"\s+")
+_LEAN_BLOCK_HEADER_RE = re.compile(
+    r"^\S+\.lean:\d+:\d+:\s*(error|warning|note|info):"
+)
+
+
+_LEAN_OUTPUT_CAP_CHARS = 16000
+
+
+def _cap_lean_output(output: str, max_chars: int = _LEAN_OUTPUT_CAP_CHARS) -> str:
+    """Bound Lean-check output to a character budget the model can read.
+
+    Corpus analysis of 201 interactive `run_lean_check` results found the
+    stripped-output distribution was heavy-tailed: median 1.4 KB, p95 32 KB,
+    max 136 KB (pre-strip max 300 KB — a single call consuming >70 k tokens).
+    The tail is driven by goals whose state contains deeply nested
+    `match`/`if` chains over contract state; 16 separate errors each
+    displaying a 10 KB goal easily adds up to 100 KB. That blows the
+    context budget and buries the first (usually most actionable) error.
+
+    Truncate to `max_chars` with a clear marker so the first errors stay
+    intact and the model knows output was elided. 16 KB keeps ~89 % of
+    real corpus outputs untouched while capping the worst case at about
+    4 k tokens.
+    """
+    if len(output) <= max_chars:
+        return output
+    # Cut on a line boundary inside the budget so we never slice mid-token.
+    head = output[:max_chars]
+    last_newline = head.rfind("\n")
+    if last_newline > max_chars // 2:
+        head = head[:last_newline]
+    dropped = len(output) - len(head)
+    return (
+        f"{head}\n"
+        f"[... Lean output truncated: {dropped} more characters elided to "
+        f"keep the tool result within the model's context budget. The first "
+        f"errors are preserved above — address them before expecting the "
+        f"later diagnostics to matter, since Lean errors cascade.]"
+    )
+
+
+def _strip_noise_warnings(output: str) -> str:
+    """Drop `linter.unusedSimpArgs` warning blocks from Lean stdout.
+
+    Lean 4.22 emits a multi-line warning for every simp argument it deems
+    unused. Each block spans the header line, the unused-arg name, a
+    "Hint: Omit it..." directive, a 3–8 line reconstructed simp invocation
+    with strikethrough glyphs, and a "Note: This linter can be disabled
+    with `set_option linter.unusedSimpArgs false`" footer. Across the 37
+    failed-check blocks in the current corpus these blocks account for
+    844/846 total warnings and roughly 20 KB of the average 34 KB
+    details blob — pure noise from the model's point of view because
+    the actual repair work is always driven by errors, not by this lint.
+
+    A block begins at a `<source>.lean:L:C: warning: This simp argument
+    is unused:` header and ends at the next Lean diagnostic header
+    (error/warning/note/info) or end-of-output. The `<source>` prefix
+    is matched generically so outputs from `inspect_lean_goals` (which
+    runs Lean against the editable file directly, not the
+    `CandidateCheck.lean` stub) are stripped the same as outputs from
+    `run_lean_check`. Every other diagnostic kind (including unrelated
+    warnings) is preserved verbatim.
+    """
+    if not output or "This simp argument is unused" not in output:
+        return output
+    lines = output.splitlines(keepends=True)
+    kept: list[str] = []
+    skip = False
+    for line in lines:
+        header = _LEAN_BLOCK_HEADER_RE.match(line)
+        if header:
+            skip = (
+                header.group(1) == "warning"
+                and "This simp argument is unused" in line
+            )
+        if not skip:
+            kept.append(line)
+    return "".join(kept)
+
+
+def _is_term_position_hole(proof: str, hole_start: int) -> bool:
+    """True iff the `?_` at `hole_start` sits where Lean expects a term.
+
+    Looks back up to 40 chars of the preceding text (stripping trailing
+    whitespace) and matches against known term-expecting prefixes. Used by
+    `_substitute_holes` to decide whether a raw tactic substitution must be
+    wrapped in `(by ...)` so the resulting expression type-checks.
+    """
+    window = proof[max(0, hole_start - 40):hole_start]
+    # Strip trailing whitespace/newlines — `exact\n  ?_` is still term position.
+    window_r = window.rstrip()
+    # Re-append a single space so the regex's trailing `\s*$` consistently
+    # matches with or without original whitespace.
+    return bool(_TERM_POSITION_RE.search(window_r + " "))
+
+
+def _is_fully_paren_wrapped(raw: str) -> bool:
+    """Return True iff `raw` is a single parenthesised expression.
+
+    Correct check: after the opening `(`, parenthesis nesting depth must stay
+    >= 1 for every position up to (but not including) the final char, and
+    return to 0 exactly at the final `)`. Rejects `(a) + (b)`, `(a)(b)`,
+    `(foo) bar (baz)`; accepts `(a)`, `((a + b))`, `(first | a | b)`.
+    Respects Lean string literals so a `(` inside `"..."` doesn't count.
+    """
+    n = len(raw)
+    if n < 2 or raw[0] != "(" or raw[-1] != ")":
+        return False
+    depth = 0
+    in_string = False
+    i = 0
+    while i < n:
+        ch = raw[i]
+        if in_string:
+            if ch == "\\" and i + 1 < n:
+                i += 2
+                continue
+            if ch == '"':
+                in_string = False
+            i += 1
+            continue
+        if ch == '"':
+            in_string = True
+        elif ch == "(":
+            depth += 1
+        elif ch == ")":
+            depth -= 1
+            if depth == 0 and i != n - 1:
+                # Outer group closed before the end -> not a single wrap.
+                return False
+        i += 1
+    return depth == 0
+
+
+def _substitute_holes(proof: str, tactic: str) -> str:
+    """Replace every `?_` in `proof` with a context-adapted form of `tactic`.
+
+    At term-position holes (`exact ?_`, `⟨?_, ?_⟩`, `:= ?_`, ...) the
+    substitute must be a term, so wrap a raw tactic as `(by <tactic>)` unless
+    the caller already provided a term form. At tactic-position holes the
+    substitute must be a tactic, so strip a leading `by ` to avoid nested
+    `by ... by ...` blocks.
+    """
+    raw = tactic.strip()
+    # Already a term form? (leading `by `/`by\n`, or fully wrapped in parens)
+    starts_by = raw.startswith("by ") or raw.startswith("by\n")
+    # `fully_paren_wrapped` means the outer `(` at position 0 is the partner
+    # of the outer `)` at the end — i.e. the whole string is one parenthesised
+    # expression. A plain depth count (startswith/endswith + balanced totals)
+    # mis-classifies strings like `(a) + (b)` or `(foo) bar (baz)`, which
+    # would get their "term form" left as-is and become invalid when
+    # substituted into a term-position hole. Track nesting depth and confirm
+    # it only returns to zero on the final character.
+    fully_paren_wrapped = _is_fully_paren_wrapped(raw)
+    # Precompute the tactic-position form: strip a leading `by ` or `by\n`
+    # so substitution at a tactic hole doesn't nest `by`. Leave paren-
+    # wrapped forms alone — those often indicate grouping the caller wants
+    # preserved as a single tactic (`(first | a | b)`).
+    if starts_by:
+        tactic_form = raw[3:].lstrip()
+    else:
+        tactic_form = raw
+    # Term-position form: must be a valid term. `(by <tac>)` wraps a raw
+    # tactic. A bare `by <tac>` is also a tactic-block term, but at a
+    # term-position hole like `exact ?_` it produces `exact by <tac>` which
+    # Lean parses as applying `exact` to `by` rather than as an `exact` on a
+    # tactic block — invalid syntax. Wrap `by <tac>` in parentheses in that
+    # case. A fully paren-wrapped value is already a safe term and is left
+    # alone (it may be grouping tactics the caller wants preserved, e.g.
+    # `(first | a | b)`; at a term hole that still reads as a term).
+    if fully_paren_wrapped:
+        term_form = raw
+    elif starts_by:
+        term_form = f"({raw})"
+    else:
+        term_form = f"(by {raw})"
+
+    out: list[str] = []
+    cursor = 0
+    for match in HOLE_PATTERN.finditer(proof):
+        out.append(proof[cursor:match.start()])
+        if _is_term_position_hole(proof, match.start()):
+            out.append(term_form)
+        else:
+            out.append(tactic_form)
+        cursor = match.end()
+    out.append(proof[cursor:])
+    return "".join(out)
+
+
+def _normalize_details_fp(details: str) -> str:
+    """Return a whitespace/line-number-agnostic fingerprint of a Lean error.
+
+    Strips the leading `<source>.lean:LINE:COL:` markers and collapses
+    all whitespace runs so two Lean runs that differ only in formatting
+    noise produce the same fingerprint. Truncated to 512 chars — long
+    enough to distinguish genuinely different errors, short enough that
+    minor trailing-hint variation doesn't break the match.
+    """
+    if not details:
+        return ""
+    d = _FP_LINE_COL_RE.sub("", details)
+    d = _FP_WS_RE.sub(" ", d).strip()
+    return d[:512]
+
+
+# Missing-olean errors can be infrastructure (a Benchmark dependency wasn't
+# pre-built) or the model's fault (imported a module that doesn't exist). We
+# only classify the former as environment_error so stagnation/temperature
+# logic still applies to model-caused import mistakes.
+# Lean prints both forms of this diagnostic, depending on context:
+#   object file '<path>.olean' does not exist
+#   object file '<path>.olean' of module <Name> does not exist
+# so accept arbitrary text (incl. "of module <Name>") between the path and
+# the "does not exist" tail.
+_MISSING_OLEAN_RE = re.compile(
+    r"object file ['\"]([^'\"]+\.olean)['\"]?[^\n]*?does not exist"
+)
+INFRA_ONLY_ERROR_PATTERNS = (
+    re.compile(r"lean executable .* not found", re.IGNORECASE),
+)
+
+
+def _missing_olean_module(details: str) -> str | None:
+    """Extract the module name whose .olean is missing, if the error is environmental."""
+    match = _MISSING_OLEAN_RE.search(details)
+    if not match:
+        return None
+    olean_path = match.group(1)
+    # Strip any leading directories up to "Benchmark" (since paths may be absolute)
+    marker = "/Benchmark/"
+    idx = olean_path.rfind(marker)
+    if idx >= 0:
+        rel = olean_path[idx + 1 :]
+    else:
+        rel = olean_path
+    if rel.endswith(".olean"):
+        rel = rel[: -len(".olean")]
+    return rel.replace("/", ".")
+
+
+# Preflight failure_mode values that preflight_candidate returns. Used by
+# _annotate_check_result to surface these as failure_class directly rather than
+# collapsing them into "other" via English-language classify_failure lookup.
+_PREFLIGHT_FAILURE_MODES = frozenset({
+    "empty_response",
+    "placeholder_detected",
+    "hidden_proof_import_detected",
+    "hidden_case_import_detected",
+    "theorem_statement_mismatch",
+})
+
+
 def classify_failure(details: str) -> str:
     """Classify a Lean checker failure into a coarse category."""
     if not details:
         return "unknown"
     lower = details.lower()
+    # Infrastructure errors that the model cannot reasonably be blamed for.
+    for pattern in INFRA_ONLY_ERROR_PATTERNS:
+        if pattern.search(details):
+            return "environment_error"
+    # Missing .olean is infra only when it is a Benchmark.* dependency *whose
+    # source file actually exists* in the tree -- meaning lake should have
+    # built it but didn't. If the source file is missing too, the model
+    # imported / referenced something that doesn't exist, which is its own
+    # mistake and should go through the normal stagnation/temperature loop.
+    missing_module = _missing_olean_module(details)
+    if missing_module and missing_module.startswith("Benchmark."):
+        source_rel = Path(*missing_module.split(".")).with_suffix(".lean")
+        if (ROOT / source_rel).is_file():
+            return "environment_error"
     if "unknown identifier" in lower or "unknown constant" in lower:
         return "unknown_identifier"
     if "unsolved goals" in lower:
@@ -619,35 +1567,319 @@ def classify_failure(details: str) -> str:
         return "rfl_failed"
     if "invalid" in lower and "conv tactic" in lower:
         return "tactic_misuse"
+    if "omega could not prove the goal" in lower:
+        return "omega_failed"
+    if "tactic 'constructor' failed" in details and "not an inductive datatype" in lower:
+        return "constructor_failed"
+    # `cases` / `induction` on a non-inductive target (e.g. an implication
+    # `A → B`, a function, or a Prop that isn't a recognised eliminator) is a
+    # distinct failure mode from `constructor` — Lean phrases it as "major
+    # premise type is not an inductive type". Corpus analysis: 22 incidents in
+    # 1 failed task (setup_owners_acyclicity) all repeating the same `cases h`
+    # on an implication, because the generic "other" bucket gave no actionable
+    # hint. Split into its own class so the hint can cover `intro` /
+    # `by_cases` / `absurd` — the actual remedies for this shape.
+    if (
+        ("tactic 'cases' failed" in details or "tactic 'induction' failed" in details)
+        and "not an inductive type" in lower
+    ):
+        return "cases_failed"
+    if "unknown module prefix" in lower:
+        return "module_not_found"
+    if "don't know how to synthesize placeholder" in lower:
+        return "synthesis_failed"
+    # Parse errors (`unexpected token '…'`, `unexpected identifier`, or the
+    # "expected '{' or indented tactic sequence" shape) indicate malformed
+    # Lean syntax rather than a semantic proof failure. Corpus analysis of
+    # 83 runs: 21 failed-run events across 14 tasks contain a parse error
+    # as one of the error lines, and 2 tasks surface it with no other
+    # classifiable signal (collapsing to "other"). Giving those cases an
+    # explicit class unlocks a targeted syntax hint.
+    if (
+        "error: unexpected token" in lower
+        or "error: unexpected identifier" in lower
+        or "expected '{' or indented tactic sequence" in lower
+    ):
+        return "parse_error"
     return "other"
 
 
 def _build_check_hints(failure_class: str, details: str) -> list[str]:
     """Build targeted repair hints based on failure classification."""
     hints: list[str] = []
+    if failure_class == "environment_error":
+        hints.append(
+            "ENVIRONMENT ERROR (not your fault): a dependency .olean is missing. "
+            "The harness is attempting to rebuild it. If this persists, your proof is likely correct; "
+            "retry run_lean_check once more."
+        )
+        return hints
+    if failure_class == "placeholder_detected":
+        hints.append(
+            "PREFLIGHT REJECTED: proof contains `sorry` or `admit`. The harness "
+            "will never accept these. Replace every `sorry`/`admit` with a real "
+            "tactic, or use `?_` (unnamed hole) to probe a sub-goal with "
+            "inspect_lean_goals / try_tactic_at_hole."
+        )
+        return hints
+    if failure_class == "theorem_statement_mismatch":
+        hints.append(
+            "PREFLIGHT REJECTED: you changed the editable theorem signature. Only "
+            "the proof body after `:=` is editable. Restore the exact theorem "
+            "declaration from the original editable file (re-read it with "
+            "read_public_file if unsure) and edit only the body."
+        )
+        return hints
+    if failure_class == "hidden_proof_import_detected":
+        hints.append(
+            "PREFLIGHT REJECTED: proof imports a hidden `Benchmark.Cases.*.Proofs` "
+            "module. Reference-solution modules are not part of the public API. "
+            "Remove that import and write the proof yourself."
+        )
+        return hints
+    if failure_class == "hidden_case_import_detected":
+        hints.append(
+            "PREFLIGHT REJECTED: proof imports a non-public `Benchmark.Cases.*` "
+            "module. Only `Benchmark.Cases.*.Specs` (and your own editable file) "
+            "are visible. Remove the blocked import."
+        )
+        return hints
+    if failure_class == "empty_response":
+        hints.append(
+            "PREFLIGHT REJECTED: the proof content was empty. Submit the full "
+            "Lean file including `import`, `namespace`, and the theorem with "
+            "its proof body."
+        )
+        return hints
     if failure_class == "unknown_identifier":
-        if "decide_True" in details or "decide_False" in details:
+        unknown_names = _UNKNOWN_IDENT_RE.findall(details)
+        tactic_hits = [n for n in unknown_names if n in _LEAN_TACTIC_NAMES]
+        var_hits = [
+            n for n in unknown_names
+            if n not in _LEAN_TACTIC_NAMES and "." not in n
+            and n and n[0].islower() and "_" not in n
+        ]
+        mathlib_hits = [
+            n for n in unknown_names
+            if n not in _LEAN_TACTIC_NAMES and _is_mathlib_shaped(n)
+        ]
+        if tactic_hits:
+            name = tactic_hits[0]
+            hints.append(
+                f"`{name}` is a TACTIC, not an identifier. Lean reports `unknown identifier "
+                f"'{name}'` when a tactic is written in TERM position (after `exact`, `refine`, "
+                f"`apply`, `:=`, inside `⟨ ⟩`, etc.). Fix: wrap the tactic in `by` — e.g. "
+                f"`exact by {name} ...` or `:= by {name} ...`. If the goal is already in tactic "
+                f"mode, remove the `exact`/`refine` prefix and call `{name}` directly."
+            )
+        elif var_hits:
+            name = var_hits[0]
+            hints.append(
+                f"`{name}` looks like a LOCAL VARIABLE name, not a definition. "
+                f"`unknown identifier '{name}'` means `{name}` is not in scope at that point — "
+                f"it may have been introduced in a different branch, shadowed, or never bound. "
+                f"Use `inspect_lean_goals` to see the exact binders in scope at each `?_`, and "
+                f"re-check the theorem signature for the actual parameter names. Do NOT call "
+                f"search_public_defs for a local-variable-shaped name — it searches definitions, "
+                f"not binders."
+            )
+        elif "decide_True" in details or "decide_False" in details:
             hints.append("CRITICAL: `decide_True` and `decide_False` do not exist. Remove them. Instead, pass precondition hypotheses directly to `simp` - it handles `decide` reduction automatically.")
         else:
-            hints.append("Use search_public_defs to find correct names from spec/impl files.")
-        hints.append("Check imports. Standard names: Nat.lt_of_not_ge, Nat.not_le_of_lt.")
+            if mathlib_hits:
+                name = mathlib_hits[0]
+                hints.append(
+                    f"`{name}` is a Mathlib-style lemma name, but this workspace has NO "
+                    f"Mathlib dependency — only core Lean 4, Batteries, and the task's own "
+                    f"`Benchmark.*` modules are importable. Do not keep guessing names like "
+                    f"`add_sub_*`, `sub_eq_*`, `lt_of_*`, or `Nat.div_*` — they will not be "
+                    f"found. For arithmetic goals use `omega` (linear Nat/Int), `ring` "
+                    f"(commutative rings), or `simp arith` directly; for project helpers "
+                    f"use search_public_defs on a keyword, not a guessed lemma name."
+                )
+            else:
+                hints.append("Use search_public_defs to find correct names from spec/impl files.")
+        if not tactic_hits and not var_hits and not mathlib_hits:
+            hints.append("Check imports. Standard names: Nat.lt_of_not_ge, Nat.not_le_of_lt.")
     elif failure_class == "unsolved_goals":
         hints.append("Use inspect_lean_goals with a ?_ hole to see exact goal state.")
+        # Detect `case <label>` markers in the unsolved-goals output. When
+        # present, the agent has already case-split successfully and exactly
+        # one branch remains open — re-splitting is wrong, the fix is to
+        # close the specific branch using its branch-specific hypothesis.
+        # Corpus analysis: 59 of 127 unsolved_goals incidents across 22
+        # tasks (46%) carry a `case <label>` marker; the current hint set
+        # tells the agent to "restructure with by_cases" which can make it
+        # undo its own working split.
+        case_labels = re.findall(r"\ncase ([a-zA-Z_][a-zA-Z0-9_.]*)\n", details)
+        if case_labels:
+            seen_lbls: list[str] = []
+            for lbl in case_labels:
+                if lbl not in seen_lbls:
+                    seen_lbls.append(lbl)
+            lbl_list = ", ".join(f"`{l}`" for l in seen_lbls[:4])
+            hints.append(
+                f"The unsolved goals list shows open case(s): {lbl_list}. You have "
+                f"ALREADY split successfully — do NOT restructure or re-split. Focus on "
+                f"closing just the named branch(es) using the branch-specific "
+                f"hypotheses now in scope (e.g. `h✝ : ¬P` inside a negative case). "
+                f"Common fixes per branch: add the branch hypothesis to "
+                f"`simp_all [..., hbranch]`, use `omega` when the branch hypothesis "
+                f"is an arithmetic (in)equality, or finish with `exact absurd hx hy` "
+                f"when two branch hypotheses contradict each other."
+            )
         if "if " in details or "match" in details:
             hints.append("If simp leaves `if`/`match` with free variables, use `by_cases` on each unresolved condition BEFORE calling simp. Pass all case hypotheses to simp. Do NOT use `split` after simp or `native_decide`/`decide` on goals with free variables.")
+        # Corpus analysis of 29 failed interactive runs found 11 (38%) ending
+        # with an unsolved_goals error whose goal still carried the UNFOLDED
+        # MONADIC TRACE — markers like `ContractResult.success`/`.revert`,
+        # `Contract.run`, or a wrapper like `Core.Address.ofNat ((match ...))`
+        # around a nested `match` over `getMappingAddr`/storage. Cross-family:
+        # safe/owner_manager_reach (6), zama/erc7984 (2), paladin_votes (1),
+        # kleros/sortition_trees (1), with 0 of 54 passed runs showing the
+        # pattern (clean failure signal). In every case the agent kept adding
+        # more helpers (`ContractResult.success`, `.snd`, `Contract.run`, …)
+        # to its `simp` list without closing the goal, because the remaining
+        # `if <cond>` arms in the trace test PROPOSITIONAL equality while the
+        # available hypotheses are in BEq form (`(x != zeroAddress) = true`).
+        # The existing if/match hint above is too generic — it never tells
+        # the agent to bridge BEq→Prop or to `split_ifs` on the unreduced arms.
+        has_monadic_trace = (
+            "ContractResult.success" in details
+            or "ContractResult.revert" in details
+            or "Contract.run" in details
+        )
+        # Also catch the case where the literal markers above are absent
+        # but the goal carries a raw `(X).run s).snd` pattern — i.e. the
+        # agent tried to close the theorem without ever unfolding
+        # `Contract.run`. Corpus analysis: this adds `swap_owner_ownerListInvariant`
+        # (1 failed task whose final error has "unsolved goals" alongside a
+        # synthesis placeholder), with 0 of 54 passed runs' final details
+        # matching the pattern in a goal line.
+        if not has_monadic_trace:
+            for _ln in details.split("\n"):
+                _stripped = _ln.lstrip()
+                if _stripped.startswith("⊢") and re.search(
+                    r"\.run\s+\w+\)\.snd", _ln
+                ):
+                    has_monadic_trace = True
+                    break
+        if has_monadic_trace:
+            hints.append(
+                "Your `simp` unfolded the contract function but the goal "
+                "still carries the UNFOLDED MONADIC TRACE — look for "
+                "`ContractResult.success`/`.revert`, nested `match` arms, "
+                "or wrappers like `Core.Address.ofNat ((match ...))`. Do "
+                "NOT keep adding more definitions (`ContractResult.success`, "
+                "`.revert`, `.snd`, `Contract.run`, …) to your `simp` list; "
+                "those are not the closing rewrites. Two concrete moves: "
+                "(1) `split_ifs` (or `split`) to force case analysis on every "
+                "leftover `if <cond> then ... else ...` inside the trace — "
+                "each branch gives you a propositional hypothesis `h : x = 0` "
+                "or `h : ¬ x = 0` that discharges the arm. "
+                "(2) PRECONVERT any BEq hypothesis to propositional form "
+                "BEFORE re-running simp: e.g. "
+                "`have hNZ : owner ≠ zeroAddress := by simpa using hNotZero`. "
+                "The `if owner = 0 then revert …` branch in the trace tests "
+                "propositional equality, so a bare `(owner != zeroAddress) = "
+                "true` will not discharge it until you bridge the forms. "
+                "After preconverting, `simp_all` (not `simp`) can usually "
+                "close the whole trace in one step because it rewrites the "
+                "Prop-form hypotheses into the goal."
+            )
         if "unused" in details.lower() and ("hBound" in details or "hypothesis" in details.lower()):
             hints.append("If a hypothesis is reported as unused by simp, try `simp_all` instead of `simp`. `simp_all` rewrites hypotheses into the goal, resolving mismatches between spec helper names and unfolded definitions.")
-        hints.append("Try restructuring: `by_cases h : condition · simp [..., h] · simp [..., h]`.")
+        # Only suggest a fresh by_cases restructure when we're NOT already
+        # inside a successful case-split — otherwise the agent may undo its
+        # own progress.
+        if not case_labels:
+            hints.append("Try restructuring: `by_cases h : condition · simp [..., h] · simp [..., h]`.")
     elif failure_class == "type_mismatch":
         if "decide" in details:
             hints.append("The goal contains `decide` expressions. Pass all precondition hypotheses to `simp` and it will reduce `decide` automatically. Do NOT try to manually match `decide` types.")
+        # Corpus analysis of 29 failed interactive runs: 8 tasks (28%) hit a
+        # type_mismatch where "is expected to have type" is followed by
+        # un-reduced monadic-trace machinery — `ContractResult.revert`,
+        # `ContractResult.success`, or nested `match match if ...` blocks.
+        # This is a distinct shape from the cross-class `.val` coercion
+        # asymmetry detector: here the hypothesis has been simplified to a
+        # concrete shape (e.g. `¬Core.Address.ofNat (s.storageMap 0 owner).val = 0`)
+        # but the expected type still carries the raw Contract.run trace
+        # (e.g. `... ((match match if owner = 0 then ContractResult.revert ...`).
+        # The generic "Unfold definitions" hint below does not name the
+        # actual reducers to feed simp, so the agent loops on `exact h`
+        # or `rw [...]` without ever reducing the goal. Tasks affected:
+        # safe/{add_owner,remove_owner,swap_owner,setup_owners}_* covering
+        # is_owner_correctness, owner_list_invariant, in_list_reachable.
+        _expected_unreduced = bool(
+            re.search(
+                r"is expected to have type.{0,800}?"
+                r"(?:ContractResult\.(?:revert|success)|match\s+match)",
+                details,
+                re.DOTALL,
+            )
+        )
+        if _expected_unreduced:
+            hints.append(
+                "TYPE MISMATCH with un-reduced monadic trace on the "
+                "EXPECTED side: your hypothesis has been simplified "
+                "(e.g. `.storageMap 0 owner`) but the goal's expected "
+                "type still contains raw `ContractResult.revert` / "
+                "`ContractResult.success` / nested `match match if ...` "
+                "blocks from an unreduced `Contract.run`. `exact h` will "
+                "NEVER unify these — Lean does not automatically reduce "
+                "the expected type. Fix: reduce the goal FIRST with "
+                "`simp only [X, Contract.run, ContractResult.snd, "
+                "ContractResult.revert, ContractResult.success, "
+                "Verity.bind, Bind.bind, Verity.pure, Pure.pure]` where "
+                "`X` is the contract function literally visible in the "
+                "match (e.g. `OwnerManager.addOwner`, "
+                "`OwnerManager.removeOwner`, `OwnerManager.swapOwner`, "
+                "`OwnerManager.setupOwners`). You may also need "
+                "`split_ifs` on the `if owner = 0` / sentinel guards. "
+                "ONLY after the expected type is in simplified form will "
+                "`exact h` / `simpa using h` unify."
+            )
         hints.append("Unfold definitions to align types. Check spec matches impl.")
     elif failure_class == "split_failed":
         hints.append("Do not split the post-state. Use by_cases with branch-specific helpers.")
     elif failure_class == "no_goals":
         hints.append("Previous simp closed the goal. Remove trailing tactics.")
     elif failure_class == "free_variables":
-        hints.append("Reduce to concrete equalities before decide/native_decide.")
+        # Corpus analysis of 29 failed interactive runs found 3 distinct tasks
+        # (damn_vulnerable_defi side_entrance, kleros sortition_trees, safe
+        # owner_manager_reach add_owner) hitting `expected type must not
+        # contain free variables` with 19 total occurrences across attempts.
+        # Lean's own error text tells the user "Use the '+revert' option to
+        # automatically cleanup and revert free variables" — yet the prior
+        # hint ("Reduce to concrete equalities before decide/native_decide")
+        # didn't mention `revert` at all and pointed agents away from the
+        # exact remedy. The trigger is always `decide` / `native_decide` /
+        # `cases <var>` / `induction <var>` run on a goal that still
+        # mentions local hypotheses (e.g. `hLow`, `hHigh`, `nodeIndex`) or
+        # pattern-bound names (`val✝`, `isLt✝`). Surface `revert` as the
+        # primary fix and list the alternative tactics (`omega`, `simp_all`,
+        # `rcases`) that work on open goals with free hypotheses in scope.
+        hints.append(
+            "Lean rejected the goal because its type still contains FREE "
+            "VARIABLES — local hypotheses or pattern-bound names "
+            "(`val✝`, `isLt✝`, …) the tactic cannot close over. `decide`, "
+            "`native_decide`, `cases <x>`, and `induction <x>` all require "
+            "a closed goal. Two generic remedies: "
+            "(a) `revert <h1> <h2> ... <x>` EVERY local hypothesis and "
+            "variable that appears in the displayed goal, then re-run the "
+            "tactic — this turns the goal into a closed implication. The "
+            "Lean 4 shortcut is `decide +revert` / `native_decide +revert`, "
+            "which Lean's own error hint recommends. "
+            "(b) Replace `decide` / `native_decide` with `omega` (for "
+            "Nat/Int inequalities), `simp_all` (for boolean/equational "
+            "goals), or an explicit `exact` term — these tactics consult "
+            "the local hypothesis context directly and do not require a "
+            "closed goal. For `cases <x>` / `induction <x>` on a "
+            "structure, prefer `rcases x with ⟨...⟩` or destructure inside "
+            "a `have`/`obtain` so you do not leak `val✝`/`isLt✝` into the "
+            "surrounding goal."
+        )
     elif failure_class == "unknown_tactic":
         hints.append("Use standard Lean 4 / Mathlib tactics only.")
     elif failure_class == "simp_no_progress":
@@ -665,69 +1897,329 @@ def _build_check_hints(failure_class: str, details: str) -> list[str]:
     elif failure_class == "tactic_misuse":
         hints.append("The tactic was used incorrectly for this goal shape.")
         hints.append("Check the goal state with inspect_lean_goals using a ?_ hole.")
-    return hints
-
-
-def _build_repair_guidance(details: str) -> str:
-    """Build structured repair guidance string from Lean error details (from main)."""
-    hints: list[str] = []
-    if "tactic 'split' failed" in details:
+    elif failure_class == "omega_failed":
         hints.append(
-            "- Do not `split` the final post-state blindly. Prove branch-specific helper theorems first, then use `by_cases` plus `simpa`."
+            "omega only handles LINEAR integer/natural arithmetic. It cannot close goals "
+            "containing variable * variable, division, or modulus. Look at the "
+            "counterexample section — any term on the RHS of `where` that mixes two "
+            "variables multiplicatively, or uses `/` or `%`, is outside omega's reach."
         )
-    if "no goals to be solved" in details:
-        hints.append(
-            "- A previous `simp` likely closed the goal already. Remove trailing tactics after the goal is solved."
-        )
-    if "expected type must not contain free variables" in details:
-        hints.append(
-            "- Do not use `native_decide` or `decide` on goals that still contain parameters. First reduce to concrete equalities."
-        )
-    if "unknown constant" in details or "Unknown identifier" in details or "unknown identifier" in details:
+        nonlinear_hints: list[str] = []
+        # Verity-specific: when the counterexample's `where:` section binds a
+        # variable to `↑(mul …)`, `↑(add …)`, or `↑(sub …)`, omega is seeing
+        # the Uint256 operation as an OPAQUE Nat — not as `a.val * b.val`,
+        # `a.val + b.val`, or `a.val - b.val`. That masks what is often
+        # actually a LINEAR goal once the `.val` coercion is rewritten under
+        # the no-overflow hypothesis already in scope. Corpus analysis of 29
+        # failed interactive runs found 38 omega_failed incidents carrying
+        # 96 opaque-op occurrences (mul: 45, add: 34, sub: 17), yet ZERO
+        # proofs (failed OR passed) used the canonical conversion lemmas —
+        # the agent searched for related terms like "val_mul", "Uint256
+        # mul add sub ge theorem lemma val", "div_mul_le" but never found
+        # the right names. Give it the specific lemma + hypothesis shape.
+        opaque_ops = set(re.findall(r"↑\((mul|add|sub)\s", details))
+        if opaque_ops:
+            op_lemmas = []
+            if "mul" in opaque_ops:
+                op_lemmas.append(
+                    "`Uint256.mul_eq_of_lt (h : a.val * b.val < modulus) : "
+                    "(a * b).val = a.val * b.val`"
+                )
+            if "add" in opaque_ops:
+                op_lemmas.append(
+                    "`Uint256.add_eq_of_lt (h : a.val + b.val < modulus) : "
+                    "(a + b).val = a.val + b.val`"
+                )
+            if "sub" in opaque_ops:
+                op_lemmas.append(
+                    "`Uint256.sub_eq_of_le (h : b.val ≤ a.val) : "
+                    "(a - b).val = a.val - b.val`"
+                )
+            ops_shown = "/".join(sorted(opaque_ops))
+            nonlinear_hints.append(
+                f"The counterexample shows `↑({ops_shown} …)` opaque terms — "
+                f"omega cannot see inside a Uint256 `mul` / `add` / `sub` "
+                f"application. Rewrite the `.val` coercion FIRST using: "
+                + "; ".join(op_lemmas)
+                + ". The required bound (typically the spec's `hNoOverflow` "
+                "premise) is already in scope — pass it as the argument. "
+                "After `rw [Uint256.mul_eq_of_lt hNoOverflow]` (or similar) "
+                "the goal becomes a plain `Nat` (in)equality and omega will "
+                "close it."
+            )
+        if "/" in details or "% " in details or " mod " in details:
+            nonlinear_hints.append(
+                "For division/modulus: first rewrite `a / b` and `a % b` via "
+                "`Nat.div_add_mod` / `Nat.mul_div_cancel'` so omega sees a linear form, "
+                "or case-split on whether the divisor is zero and handle each branch."
+            )
+        if "val *" in details or "* ↑" in details:
+            nonlinear_hints.append(
+                "For variable multiplications: introduce helper lemmas that bound the "
+                "product (e.g. `Nat.mul_le_mul`), or try `nlinarith` / `positivity` which "
+                "handle some nonlinear cases. Pure omega will never close a goal whose "
+                "counterexample mentions a product of two symbolic `.val` terms."
+            )
+        hints.extend(nonlinear_hints)
+    elif failure_class == "constructor_failed":
         hints.append(
-            "- You are referencing a lemma or constant that does not exist in this Lean 4 environment. "
-            "Do not guess lemma names. Instead, use `simp` with the relevant definitions, `omega` for arithmetic, "
-            "or `decide`/`native_decide` for decidable propositions. Remove all references to unknown names."
+            "`constructor` only works on inductive-type goals (And, Or, Exists, Sigma, "
+            "structures). The goal you're targeting is an equality, implication, or an "
+            "unreduced expression — not a constructor-shaped type. Either (a) `simp` / "
+            "`unfold` first to expose an inductive head symbol, (b) `intro` pending "
+            "hypotheses if the goal is `A → B`, or (c) use `refine ⟨_, _⟩` / "
+            "`exact ⟨_, _⟩` if you already know the witnesses for an And/Exists."
         )
-    if "unsolved goals" in details and "match" in details:
+    elif failure_class == "cases_failed":
         hints.append(
-            "- The remaining goal contains a `match` expression. Use `split` to case-split on the match, "
-            "then solve each branch separately. If the match is on a ContractResult, try "
-            "`simp only [...]` to reduce it first, or use `cases` on the matched expression."
+            "`cases` / `induction` requires an inductive-type term. The major "
+            "premise here is NOT inductive — most commonly it's an implication "
+            "`A → B` (the agent tried `cases h` where `h : A → B`), a function "
+            "type, or a raw equality between non-inductive values. Remedies: "
+            "(a) if the hypothesis is `A → B`, first produce `A` and apply it "
+            "(`have hb := h ha`) or `intro` if the implication is the goal; "
+            "(b) for a decidable Prop use `by_cases h : P` instead of `cases`; "
+            "(c) to derive `False` from a contradictory hypothesis use "
+            "`exact absurd … h` or `exact (h …).elim`; (d) for `Bool`-valued "
+            "equalities like `x == y = true`, rewrite with `Bool.ne_iff` / "
+            "`beq_iff_eq` before case-splitting. Do NOT keep retrying `cases` "
+            "on the same target."
         )
-    if "unsolved goals" in details and "if " in details:
+    elif failure_class == "module_not_found":
         hints.append(
-            "- The remaining goal contains an `if` expression. Use `by_cases h : <condition>` to split on the condition, "
-            "then `simp [h, ...]` in each branch. Alternatively, add the condition's hypothesis to the `simp` call."
+            "The import path you requested is not available in this workspace. In "
+            "particular, `Mathlib` is NOT a dependency of verity-benchmark — only the "
+            "core Lean 4 prelude, `Batteries`, and the task's own `Benchmark.*` public "
+            "modules are importable. Remove the offending `import` line and reach for "
+            "core Lean / Batteries lemmas, or search_public_defs for existing helpers."
         )
-    if "unsolved goals" in details and "match" not in details and "if " not in details:
+    elif failure_class == "synthesis_failed":
         hints.append(
-            "- Unsolved goals remain. Check that `simp` is given all necessary definitions and hypotheses."
+            "Lean could not infer a `_` / `?_` placeholder from context. Either (a) "
+            "replace `_` with an explicit term, (b) add a `show <goal type>` line above "
+            "the tactic so Lean knows the expected type, or (c) use `?_` (named hole) "
+            "with `inspect_lean_goals` to see what Lean expected there before filling it."
         )
-    if "type mismatch" in details:
-        hints.append(
-            "- A type mismatch often means the proof term or tactic result does not match the goal. Re-read the spec and ensure your proof targets the correct type."
+        # Corpus analysis: 3 of 7 failed runs ending in `synthesis_failed` left
+        # a raw `(X).run s).snd` monadic trace in the goal at the hole — the
+        # agent had written `exact ?_` without ever unfolding the contract
+        # function, so `inspect_lean_goals` would just show the un-reduced
+        # trace again. Of 54 passed runs, only 1 intermediate check hit this
+        # shape (and the run recovered afterward), so the pattern is a clean
+        # failure-side signal. Tasks: safe/swap_owner_ownerListInvariant,
+        # safe/setupOwners_ownerListInvariant, safe/removeOwner_isOwnerCorrectness,
+        # zama/transfer_sufficient. The existing generic hint above never tells
+        # the agent that the hole is unreachable until `Contract.run` unfolds.
+        _run_snd_in_goal = False
+        for _ln in details.split("\n"):
+            _stripped = _ln.lstrip()
+            if _stripped.startswith("⊢") and re.search(
+                r"\.run\s+\w+\)\.snd", _ln
+            ):
+                _run_snd_in_goal = True
+                break
+        if _run_snd_in_goal:
+            hints.append(
+                "The goal at the `?_` / `_` hole still contains a raw "
+                "`(X).run s).snd` monadic trace — `Contract.run` has NOT "
+                "been reduced, so no placeholder term can unify with it. "
+                "Filling the hole with more `?_` or `inspect_lean_goals` "
+                "alone will not make progress; you must first UNFOLD the "
+                "contract function before (or at) the hole. Concrete move: "
+                "replace `exact ?_` with "
+                "`simp [X, Contract.run, Verity.bind, Bind.bind, Verity.pure, "
+                "Pure.pure, ContractResult.snd]` where `X` is the contract "
+                "function literally visible in the goal (e.g. "
+                "`OwnerManager.swapOwner`, `ERC7984.transfer`). Once the "
+                "trace is reduced, re-run inspect_lean_goals to see the "
+                "propositional residue and close it with `split_ifs` / "
+                "`simp_all` / branch-hypotheses as usual. Do NOT submit a "
+                "final proof body that still contains `?_`; the harness "
+                "reports `don't know how to synthesize placeholder` and the "
+                "run fails even though the rest of the skeleton is fine."
+            )
+        else:
+            # Corpus analysis of 29 failed runs: 7 terminate with
+            # `don't know how to synthesize placeholder`. Of those 7, only
+            # ~3 have a `(X).run s).snd` monadic trace in the goal (handled
+            # above). The other ~4 land with goals that are arithmetic on
+            # `s.storage` (ethereum/full_deposit_preserves_partial_gap,
+            # lido/shares_conversion_monotone), list-predicate witnesses
+            # (safe/setup_owners_acyclicity,
+            # safe/setup_owners_owner_list_invariant), or conditional
+            # `if … then … else …` expressions — shapes where the existing
+            # generic `show <goal type>` hint is not actionable, so the
+            # agent just re-probes with `inspect_lean_goals` and loops
+            # until the tool budget runs out. Emit a shape-aware hint so
+            # the agent knows to replace the underscore with an explicit
+            # witness rather than continue probing.
+            hints.append(
+                "`don't know how to synthesize placeholder` means an "
+                "underscore `_` (or named hole `?_`) inside a `refine` / "
+                "`exact ⟨…⟩` / constructor call has no canonical filling. "
+                "Lean will NOT invent a Nat, Uint256, list, or proof term "
+                "— you must supply it. Concrete fixes by goal shape: "
+                "(a) arithmetic (e.g. `⊢ add x 1 - add y 1 = x - y`, "
+                "`⊢ n + k = m`) → replace `_` with `(by omega)` or "
+                "`(by simp; omega)`; "
+                "(b) conditional (`⊢ if P then … else …`) → case-split "
+                "with `split_ifs` BEFORE reaching the hole so each branch "
+                "has a concrete target; "
+                "(c) list-invariant witness → write the explicit list "
+                "literal (e.g. `[owner1, owner2, owner3]`) rather than "
+                "`_`; "
+                "(d) propositional `And` / `Exists` → replace `⟨_, _⟩` "
+                "with `⟨<explicit witness>, by <tactic>⟩`. Repeating "
+                "`inspect_lean_goals` at the same hole will show the same "
+                "unsolvable placeholder — do not retry the same shape, "
+                "rewrite the hole with one of the concrete forms above."
+            )
+    elif failure_class == "parse_error":
+        # Lean 4 core does NOT recognise `lemma` — it is a Mathlib-only alias
+        # for `theorem`. When the agent writes `(private) lemma foo ...` in a
+        # no-Mathlib workspace, Lean reports `unexpected identifier; expected
+        # 'abbrev', 'axiom', ..., or 'theorem'` at the `lemma` token. Corpus
+        # analysis of 83 interactive runs: 3 of 29 failed tasks
+        # (lido/locked_funds_solvency, openzeppelin/preview_deposit_rounds_down,
+        # safe/in_list_reachable — 10% of failures) wrote `lemma` helpers at
+        # some point; 1 of 54 passed runs also tried it but moved on after
+        # one rewrite. The generic parse-error hint below lists four shapes
+        # (tactic-in-term-position, missing `by`, stray tokens, branch
+        # indentation) but NONE of them mention keyword choice, so the agent
+        # keeps re-editing the proof body while the real fix is a one-token
+        # rename at the declaration header. Fire the lemma-specific hint FIRST
+        # when the error's "expected … or 'theorem'" list appears (a fingerprint
+        # unique to the top-level-command parse shape).
+        _expects_theorem = (
+            "expected 'abbrev'" in details
+            and "'theorem'" in details
         )
-    if "simp made no progress" in details:
+        if _expects_theorem:
+            hints.append(
+                "Lean 4 core does NOT recognise `lemma` — it is a Mathlib-only "
+                "alias for `theorem`, and this workspace has no Mathlib. The "
+                "\"expected 'abbrev', …, or 'theorem'\" list in the error is "
+                "Lean telling you which top-level commands ARE valid at that "
+                "position. Fix: rename every `lemma` (and `private lemma`) "
+                "helper in the candidate to `theorem` (and `private theorem`). "
+                "The declaration body does not need any other change."
+            )
         hints.append(
-            "- `simp` made no progress with the given arguments. Add more definitions to unfold, "
-            "or the simp arguments may already be fully reduced. Try removing the unproductive simp call."
+            "Lean rejected the proof before type-checking — the candidate contains "
+            "invalid Lean 4 syntax. Common causes: (a) a tactic written in term "
+            "position (e.g. `exact simp [...]` instead of `exact by simp [...]`), "
+            "(b) a `by` block without an indented tactic on the next line, (c) stray "
+            "`;`, `|`, or `using` tokens outside a `have`/`simpa` context, (d) a "
+            "`· simp [...]` branch indented less than the bullet. Re-read the "
+            "editable file via read_public_file to see the exact character positions "
+            "in the error, and rewrite the proof body as a clean `:= by <tactics>` "
+            "block — do not try to patch token-by-token."
         )
+
+    # Pattern-based hints that cut across failure classes. These used to live in
+    # a separate `_build_repair_guidance` pass that was appended after this
+    # function ran; corpus analysis showed 68% of its output was semantically
+    # redundant (sometimes contradictory) with the class-based hints above, so
+    # that pass was removed. The few patterns it uniquely covered — binder-type
+    # inference, Lean syntax errors, and the ContractState.storage function
+    # hint — are preserved here.
     if "failed to infer binder type" in details:
         hints.append(
-            "- Lean cannot infer a binder type. Add explicit type annotations to your helper lemma parameters."
+            "Lean cannot infer a binder type. Add explicit type annotations to "
+            "your helper lemma parameters."
         )
     if "unexpected token" in details or "expected 'by'" in details:
         hints.append(
-            "- Syntax error. Ensure the theorem body uses `:= by` followed by tactics. "
-            "Do not use `:=` with a term-mode proof unless you are certain of the syntax."
+            "Syntax error. Ensure the theorem body uses `:= by` followed by "
+            "tactics. Do not use `:=` with a term-mode proof unless you are "
+            "certain of the syntax."
         )
-    if "Function expected at" in details or "unknown identifier" in details:
+    if "Function expected at" in details:
         hints.append(
-            "- Use `s.storage 0` (function application) not `s.storage[0]` or `s.storage.0`. "
-            "ContractState.storage is a function `Nat → Uint256`."
+            "Use `s.storage 0` (function application) not `s.storage[0]` or "
+            "`s.storage.0`. `ContractState.storage` is a function `Nat → Uint256`."
         )
-    return "\n".join(hints)
+    # Detect the recurring Uint256/Address `.val` coercion asymmetry: one side
+    # of a `type mismatch … has type … but is expected to have type …` pair
+    # has a `.val` projection and the other does not. Corpus analysis of 83
+    # interactive runs: the pattern `"after simplification has type … .val"`
+    # appears in 14 of 29 failed tasks (48%), yet only 2 of those tasks have
+    # `failure_class == "type_mismatch"` at the point of failure — the rest
+    # cascade into `unsolved_goals` / `unknown_identifier` when secondary
+    # errors come from the same simp call, so the old in-branch hint was
+    # skipped for 12/14 of the actual `.val` mismatches. Lifting the check
+    # to run cross-class fires the hint whenever the mismatch text appears,
+    # regardless of which error Lean listed first.
+    _tm = re.search(
+        r"has type\s+(.{5,300}?)\s+but is expected to have type\s+(.{5,300})",
+        details, re.DOTALL,
+    )
+    if _tm and (".val" in _tm.group(1)) != (".val" in _tm.group(2)):
+        _val_hint = (
+            "Your hypothesis differs from the expected type by a `.val` projection "
+            "(Uint256/Address/Nat). Do NOT keep retrying `exact h` — Lean will not "
+            "insert the coercion for you. Use `by simpa using h` or `by simp_all` "
+            "to let simp bridge the `.val`; if the goal is a Prop inequality, "
+            "`by omega` after exposing `.val` on both sides also works. If the "
+            "mismatch is inside a negation like `¬x = 0` vs `¬x.val = 0`, rewrite "
+            "with the underlying injectivity lemma (e.g. `Core.Uint256.val_eq_zero`, "
+            "`Core.Address.ofNat_eq_zero`) found via search_public_defs."
+        )
+        if _val_hint not in hints:
+            hints.append(_val_hint)
+    # Detect Lean's `unused simp argument` linter warning and surface
+    # generic meta-advice. Corpus analysis of 29 failed interactive runs:
+    # 16 tasks (55%) emit at least one `This simp argument is unused:
+    # <name>` warning (450 total matches across those tasks), spanning 5
+    # failure classes — unsolved_goals (8), synthesis_failed (3),
+    # unknown_identifier (3), free_variables (1), omega_failed (1). The
+    # only pre-existing gate lives inside the `unsolved_goals` branch and
+    # fires on `"hBound" in details or "hypothesis" in details.lower()` —
+    # `hBound` is a hypothesis name from one single task, and the word
+    # `"hypothesis"` never appears in Lean's linter text (the linter says
+    # "simp argument"), so in practice the old gate only matched 1 of 16
+    # tasks. A cross-class check on the exact warning text fires on all
+    # 16 with no FP risk: 45 passing tasks also hit this warning during
+    # iteration and still closed their proofs, so the warning is
+    # non-terminal. The name-bearing hint text is naturally state-keyed
+    # (different flagged args → different first-80-char fingerprint), so
+    # it won't be dedup-suppressed when the agent resubmits with new
+    # unused args.
+    _unused_simp_args = re.findall(
+        r"This simp argument is unused:\s*\n\s*(\S+)", details
+    )
+    if _unused_simp_args:
+        # Dedupe while preserving order, cap to keep hint readable.
+        _seen: set[str] = set()
+        _ordered: list[str] = []
+        for _n in _unused_simp_args:
+            if _n not in _seen:
+                _seen.add(_n)
+                _ordered.append(_n)
+            if len(_ordered) >= 4:
+                break
+        _names_str = ", ".join(f"`{n}`" for n in _ordered)
+        _unused_hint = (
+            f"Lean's linter reports UNUSED simp arguments ({_names_str}): "
+            f"these hypotheses/definitions cannot be used as rewrites by "
+            f"`simp [...]` against the current goal. Piling on more arguments "
+            f"will not close it. Concrete moves: (1) REMOVE each flagged "
+            f"argument as the linter suggests — leaving dead args in obscures "
+            f"the real obstruction. (2) If the flagged item is a HYPOTHESIS "
+            f"in BEq form (e.g. `(x != y) = true`), convert to Prop form "
+            f"FIRST: `have h' : x ≠ y := by simpa using h`, then pass `h'` "
+            f"to simp, OR switch the whole call to `simp_all` — `simp_all` "
+            f"rewrites hypotheses INTO the goal and often bridges BEq/Prop "
+            f"mismatches that `simp [h]` cannot. (3) If the flagged item is "
+            f"a DEFINITION (module-qualified, e.g. `ContractX.foo`), simp "
+            f"either already unfolded it or it has no simp-lemma form — "
+            f"drop it, and if you need the unfolding use `unfold` / "
+            f"`simp only [ContractX.foo]` explicitly. Do NOT resubmit with "
+            f"the same unused arguments."
+        )
+        if _unused_hint not in hints:
+            hints.append(_unused_hint)
+    return hints
 
 
 def tool_result_json(result: dict[str, Any]) -> str:
diff --git a/schemas/agent-run.schema.json b/schemas/agent-run.schema.json
index ece50053..3b0b612a 100644
--- a/schemas/agent-run.schema.json
+++ b/schemas/agent-run.schema.json
@@ -283,6 +283,12 @@
     },
     "analysis": {
       "type": "object"
+    },
+    "prebuild_reports": {
+      "type": "array",
+      "items": {
+        "type": "object"
+      }
     }
   }
 }
diff --git a/scripts/generate_task_skeletons.py b/scripts/generate_task_skeletons.py
new file mode 100755
index 00000000..59e39236
--- /dev/null
+++ b/scripts/generate_task_skeletons.py
@@ -0,0 +1,485 @@
+#!/usr/bin/env python3
+"""Grind-first task skeleton generator for Benchmark/Generated/**/Tasks/*.lean.
+
+This script rewrites (or previews) the editable proof template for every task
+manifest under ``cases/``. The rewriter keeps everything an agent relies on to
+understand the goal — imports of the case's ``Specs``, namespace, ``open``
+declarations, the theorem docstring, and the theorem signature — but swaps the
+proof body for a grind-first skeleton that also imports ``Benchmark.Grindset``.
+
+Default skeleton body:
+
+    import Benchmark.Grindset
+    ...
+    theorem foo ... := by
+      -- Grindset-first: unfold the spec, then try grind with case-local hints.
+      -- If grind fails, see harness/PROOF_PATTERNS.md for simp / by_cases
+      -- fallbacks and for the `grind?` lemma-discovery loop.
+      unfold foo_spec
+      grind [ContractName.fn, ContractName.fieldA, ContractName.fieldB]
+
+When we cannot confidently determine the contract symbols to hint (no call of
+the form ``ContractName.fn`` appears in the theorem body, or no companion
+``Contract.lean`` is found), the body falls back to a bare ``grind`` followed
+by a ``sorry`` line that is commented out — the agent still sees a grind-first
+template without the script fabricating a hint list.
+
+Usage
+-----
+
+Dry-run a preview of every regenerated template into
+``Benchmark/GeneratedPreview/`` without touching live files::
+
+    python3 scripts/generate_task_skeletons.py --preview
+
+Rewrite live ``Benchmark/Generated/...`` files in place (only do this when
+you are sure no live benchmark run is reading them)::
+
+    python3 scripts/generate_task_skeletons.py --in-place
+
+Operate on a single task file::
+
+    python3 scripts/generate_task_skeletons.py --preview \\
+        Benchmark/Generated/Lido/VaulthubLocked/Tasks/CeildivSandwich.lean
+
+Emit a single unified patch instead of writing files::
+
+    python3 scripts/generate_task_skeletons.py --patch > grindset/s3-skeletons.patch
+
+Assumptions
+-----------
+
+* The live generator for Verity benchmark tasks is the human author following
+  ``CONTRIBUTING.md``; there is no pre-existing Python scaffolding tool. This
+  script stands in as the canonical rewriter so future task skeletons inherit
+  the grind-first shape automatically.
+* ``Benchmark.Grindset`` is either the real bundle of ``@[grind]`` lemmas from
+  branch ``grindset/s1-verity-grindset`` or the empty stub shipped alongside
+  this script on ``grindset/s3-skeleton-gen``. Either way, ``import
+  Benchmark.Grindset`` resolves and is safe.
+"""
+from __future__ import annotations
+
+import argparse
+import difflib
+import re
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable
+
+ROOT = Path(__file__).resolve().parent.parent
+GENERATED_ROOT = ROOT / "Benchmark" / "Generated"
+PREVIEW_ROOT = ROOT / "Benchmark" / "GeneratedPreview"
+CASES_ROOT = ROOT / "Benchmark" / "Cases"
+GRINDSET_IMPORT = "import Benchmark.Grindset"
+PLACEHOLDER_LINE_RE = re.compile(
+    r"^\s*--\s*Replace this placeholder with a complete Lean proof\.\s*$"
+)
+
+
+# ---------------------------------------------------------------------------
+# Parsing helpers
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class TemplateFile:
+    path: Path
+    imports: list[str]
+    namespace: str | None
+    opens: list[str]
+    docstring: list[str] | None
+    theorem_prelude: list[str]
+    theorem_body_keep: list[str]
+    theorem_name: str
+    trailing: list[str]
+    raw: str
+
+
+_THEOREM_RE = re.compile(r"^\s*theorem\s+([A-Za-z_][A-Za-z0-9_']*)\b")
+
+
+def parse_template(path: Path) -> TemplateFile | None:
+    """Parse an existing ``Tasks/<Name>.lean`` skeleton into its structural
+    parts. Returns ``None`` for files that do not look like a task template
+    (missing a ``theorem`` or a ``:= by`` body)."""
+
+    text = path.read_text()
+    lines = text.splitlines()
+
+    imports: list[str] = []
+    namespace: str | None = None
+    opens: list[str] = []
+    docstring: list[str] | None = None
+    theorem_prelude: list[str] = []
+    theorem_name = ""
+    theorem_body_keep: list[str] = []
+    trailing: list[str] = []
+
+    i = 0
+    n = len(lines)
+
+    # imports / namespace / opens / blanks, until we hit `/--` or `theorem`
+    while i < n:
+        line = lines[i]
+        stripped = line.strip()
+        if stripped.startswith("import "):
+            imports.append(line)
+            i += 1
+            continue
+        if stripped.startswith("namespace "):
+            namespace = stripped[len("namespace "):].strip()
+            i += 1
+            continue
+        if stripped.startswith("open "):
+            opens.append(line)
+            i += 1
+            continue
+        if stripped == "" or stripped.startswith("--"):
+            # allow blanks / line comments in the preamble
+            i += 1
+            continue
+        if stripped.startswith("/--") or _THEOREM_RE.match(line):
+            break
+        # Anything else in the preamble (e.g. a `private def`) is unexpected
+        # for a skeleton; fall through and let the parser bail out.
+        break
+
+    # optional docstring
+    if i < n and lines[i].strip().startswith("/--"):
+        doc_start = i
+        while i < n and "-/" not in lines[i]:
+            i += 1
+        if i >= n:
+            return None
+        docstring = lines[doc_start:i + 1]
+        i += 1
+
+    # theorem signature up to ":= by"
+    if i >= n or not _THEOREM_RE.match(lines[i]):
+        return None
+    m = _THEOREM_RE.match(lines[i])
+    theorem_name = m.group(1)
+    sig_start = i
+    while i < n and ":= by" not in lines[i]:
+        i += 1
+    if i >= n:
+        return None
+    theorem_prelude = lines[sig_start:i + 1]
+    i += 1
+
+    # body lines until `end <namespace>` (or EOF)
+    body_start = i
+    end_marker_idx = n
+    for j in range(i, n):
+        if lines[j].strip().startswith("end ") and namespace is not None \
+                and lines[j].strip() == f"end {namespace}":
+            end_marker_idx = j
+            break
+    body_lines = lines[body_start:end_marker_idx]
+    trailing = lines[end_marker_idx:]
+
+    # Keep any existing body lines that are NOT the placeholder; the rewriter
+    # does not use them, but we record them for dry-run diagnostics.
+    for line in body_lines:
+        if PLACEHOLDER_LINE_RE.match(line):
+            continue
+        if line.strip() in {"exact ?_", "sorry"}:
+            continue
+        theorem_body_keep.append(line)
+
+    return TemplateFile(
+        path=path,
+        imports=imports,
+        namespace=namespace,
+        opens=opens,
+        docstring=docstring,
+        theorem_prelude=theorem_prelude,
+        theorem_body_keep=theorem_body_keep,
+        theorem_name=theorem_name,
+        trailing=trailing,
+        raw=text,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Contract-symbol extraction
+# ---------------------------------------------------------------------------
+
+
+_CONTRACT_CALL_RE = re.compile(r"\b([A-Z][A-Za-z0-9_]*)\.([a-z][A-Za-z0-9_]*)\b")
+_VERITY_CONTRACT_RE = re.compile(r"^\s*verity_contract\s+([A-Z][A-Za-z0-9_]*)")
+_STORAGE_FIELD_RE = re.compile(
+    r"^\s*([A-Za-z_][A-Za-z0-9_]*)\s*:\s*.+:=\s*slot\s+\d+\s*$"
+)
+
+
+def locate_contract_file(namespace: str | None) -> Path | None:
+    """Given a namespace like ``Benchmark.Cases.Kleros.SortitionTrees``, return
+    the path to the companion ``Contract.lean`` if present."""
+    if not namespace:
+        return None
+    if not namespace.startswith("Benchmark.Cases."):
+        return None
+    rel = namespace.split(".")
+    # rel == ["Benchmark", "Cases", "Kleros", "SortitionTrees"]
+    contract = ROOT.joinpath(*rel, "Contract.lean")
+    if contract.is_file():
+        return contract
+    return None
+
+
+def parse_contract_storage(contract_path: Path) -> tuple[str | None, list[str]]:
+    """Return ``(contract_name, storage_field_names)`` by scanning a
+    ``verity_contract <Name> where ... storage <f> : T := slot N`` block."""
+    text = contract_path.read_text()
+    lines = text.splitlines()
+    contract_name: str | None = None
+    fields: list[str] = []
+    in_storage = False
+    storage_indent = None
+
+    for line in lines:
+        if contract_name is None:
+            m = _VERITY_CONTRACT_RE.match(line)
+            if m:
+                contract_name = m.group(1)
+            continue
+        stripped_no_trailing = line.rstrip()
+        if not in_storage:
+            if stripped_no_trailing.strip() == "storage":
+                in_storage = True
+                storage_indent = len(line) - len(line.lstrip())
+            continue
+        # in storage block
+        if not stripped_no_trailing.strip():
+            continue
+        line_indent = len(line) - len(line.lstrip())
+        # Leaving the storage block when we dedent back to/below the
+        # `storage` keyword.
+        if line_indent <= (storage_indent or 0):
+            in_storage = False
+            continue
+        m = _STORAGE_FIELD_RE.match(line)
+        if m:
+            fields.append(m.group(1))
+    return contract_name, fields
+
+
+def extract_contract_symbols(
+    template: TemplateFile,
+) -> tuple[str | None, list[str]]:
+    """Return ``(ContractName, hint_symbols)`` where ``hint_symbols`` is the
+    list passed inside the ``grind [...]`` brackets. ``None`` for the contract
+    name means we could not confidently pick hints."""
+    body_text = "\n".join(template.theorem_prelude)
+    calls = _CONTRACT_CALL_RE.findall(body_text)
+    if not calls:
+        return None, []
+
+    # Score candidates: the contract name used most often in the signature is
+    # almost certainly the one whose storage fields we want to load.
+    counts: dict[str, int] = {}
+    fn_names: dict[str, list[str]] = {}
+    for ctor, fn in calls:
+        counts[ctor] = counts.get(ctor, 0) + 1
+        fn_names.setdefault(ctor, []).append(fn)
+
+    # Prefer the contract whose companion Contract.lean actually exists.
+    contract_path = locate_contract_file(template.namespace)
+    picked = None
+    declared_name: str | None = None
+    fields: list[str] = []
+    if contract_path is not None:
+        declared_name, fields = parse_contract_storage(contract_path)
+        if declared_name and declared_name in counts:
+            picked = declared_name
+
+    if picked is None:
+        # Fall back to the most-used Contract-like identifier.
+        picked = max(counts, key=lambda k: counts[k])
+
+    hints: list[str] = []
+    # first: the contract.fn (deduped, preserving signature order)
+    seen: set[str] = set()
+    for fn in fn_names.get(picked, []):
+        sym = f"{picked}.{fn}"
+        if sym not in seen:
+            hints.append(sym)
+            seen.add(sym)
+    # then: every declared storage field, if we found any
+    for f in fields:
+        sym = f"{picked}.{f}"
+        if sym not in seen:
+            hints.append(sym)
+            seen.add(sym)
+    return picked, hints
+
+
+def infer_spec_name(theorem_prelude: list[str]) -> str | None:
+    """Return the ``_spec`` name referenced inside the theorem signature, if
+    any. We look for the first ``foo_spec`` token in the signature."""
+    for line in theorem_prelude:
+        m = re.search(r"\b([A-Za-z_][A-Za-z0-9_']*_spec)\b", line)
+        if m:
+            return m.group(1)
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Rendering
+# ---------------------------------------------------------------------------
+
+
+def render_skeleton(template: TemplateFile) -> str:
+    contract_name, hints = extract_contract_symbols(template)
+    spec_name = infer_spec_name(template.theorem_prelude)
+
+    imports = list(template.imports)
+    if GRINDSET_IMPORT not in imports:
+        imports.append(GRINDSET_IMPORT)
+
+    out: list[str] = []
+    out.extend(imports)
+    out.append("")
+    if template.namespace:
+        out.append(f"namespace {template.namespace}")
+        out.append("")
+    out.extend(template.opens)
+    if template.opens:
+        out.append("")
+    if template.docstring:
+        out.extend(template.docstring)
+    out.extend(template.theorem_prelude)
+
+    # Proof body: grind-first
+    body: list[str] = []
+    body.append(
+        "  -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md."
+    )
+    body.append(
+        "  -- Try `grind` with contract symbol hints; fall back to `simp` /"
+    )
+    body.append("  -- `by_cases` if grind leaves goals. Use `grind?` for hints.")
+    if spec_name:
+        body.append(f"  unfold {spec_name}")
+    if hints:
+        hint_list = ", ".join(hints)
+        body.append(f"  grind [{hint_list}]")
+    elif contract_name:
+        body.append(f"  grind [{contract_name}]")
+    else:
+        # No confidently pickable hint list: emit a bare grind. If grind does
+        # not close, the agent will replace this with a `sorry`-free proof.
+        body.append("  grind")
+
+    out.extend(body)
+    if template.namespace:
+        out.append("")
+        out.append(f"end {template.namespace}")
+    return "\n".join(out).rstrip() + "\n"
+
+
+# ---------------------------------------------------------------------------
+# CLI / driver
+# ---------------------------------------------------------------------------
+
+
+def iter_templates(paths: Iterable[Path]) -> Iterable[Path]:
+    for p in paths:
+        p = p.resolve()
+        if p.is_file() and p.suffix == ".lean":
+            yield p
+        elif p.is_dir():
+            yield from sorted(p.rglob("*.lean"))
+
+
+def _default_targets() -> list[Path]:
+    if not GENERATED_ROOT.is_dir():
+        return []
+    return sorted(
+        p for p in GENERATED_ROOT.rglob("*.lean")
+        if "/Tasks/" in str(p)
+    )
+
+
+def main(argv: list[str] | None = None) -> int:
+    ap = argparse.ArgumentParser(description=__doc__.splitlines()[0])
+    mode = ap.add_mutually_exclusive_group()
+    mode.add_argument(
+        "--preview",
+        action="store_true",
+        help=(
+            "Write rewritten templates under Benchmark/GeneratedPreview/ "
+            "instead of Benchmark/Generated/."
+        ),
+    )
+    mode.add_argument(
+        "--in-place",
+        action="store_true",
+        help="Overwrite Benchmark/Generated/**/Tasks/*.lean in place.",
+    )
+    mode.add_argument(
+        "--patch",
+        action="store_true",
+        help="Emit a unified diff on stdout; do not write any files.",
+    )
+    ap.add_argument(
+        "paths",
+        nargs="*",
+        type=Path,
+        help=(
+            "Optional explicit files/dirs. Defaults to all Benchmark/Generated"
+            "/**/Tasks/*.lean files."
+        ),
+    )
+    args = ap.parse_args(argv)
+
+    if not any([args.preview, args.in_place, args.patch]):
+        args.preview = True  # safer default
+
+    targets = list(iter_templates(args.paths)) if args.paths else _default_targets()
+    if not targets:
+        print("no task skeleton templates found", file=sys.stderr)
+        return 1
+
+    changed = 0
+    for path in targets:
+        template = parse_template(path)
+        if template is None:
+            print(f"skip (unparsed): {path.relative_to(ROOT)}", file=sys.stderr)
+            continue
+        new_text = render_skeleton(template)
+        if new_text == template.raw:
+            continue
+        changed += 1
+        rel = path.relative_to(ROOT)
+        if args.patch:
+            diff = difflib.unified_diff(
+                template.raw.splitlines(keepends=True),
+                new_text.splitlines(keepends=True),
+                fromfile=f"a/{rel}",
+                tofile=f"b/{rel}",
+            )
+            sys.stdout.writelines(diff)
+            continue
+        if args.preview:
+            try:
+                rel_to_gen = path.relative_to(GENERATED_ROOT)
+            except ValueError:
+                rel_to_gen = Path(path.name)
+            out_path = PREVIEW_ROOT / rel_to_gen
+        else:  # in-place
+            out_path = path
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+        out_path.write_text(new_text)
+        print(f"wrote {out_path.relative_to(ROOT)}")
+    if args.patch:
+        return 0
+    print(f"done: {changed} file(s) regenerated")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/repeat_benchmark_compare.py b/scripts/repeat_benchmark_compare.py
index ce5341ed..b648228f 100644
--- a/scripts/repeat_benchmark_compare.py
+++ b/scripts/repeat_benchmark_compare.py
@@ -3,7 +3,7 @@
 
 Usage:
     python3 scripts/repeat_benchmark_compare.py run \
-        --profiles openrouter-gemini-3.1-flash-lite-preview combined-lean-tools \
+        --profiles interactive-gpt interactive-opus \
         --tasks ethereum/deposit_contract_minimal/deposit_count \
                kleros/sortition_trees/node_id_bijection \
         --repeats 3
diff --git a/scripts/run_benchmark_matrix.py b/scripts/run_benchmark_matrix.py
index c8f61dfc..0a53635e 100644
--- a/scripts/run_benchmark_matrix.py
+++ b/scripts/run_benchmark_matrix.py
@@ -39,9 +39,9 @@ class BenchmarkTarget:
 
 TARGET_CONFIGS: dict[str, Path] = {
     "builtin-fast": ROOT / "harness/agents/default.json",
-    "builtin-smart": ROOT / "harness/agents/builtin-smart.json",
-    "openrouter-gemini-3.1-flash-lite-preview": ROOT / "harness/agents/openrouter-gemini-3.1-flash-lite-preview.json",
-    "leanstral": ROOT / "harness/agents/leanstral.json",
+    "interactive-gpt": ROOT / "harness/agents/interactive-gpt.json",
+    "interactive-opus": ROOT / "harness/agents/interactive-opus.json",
+    "interactive-smart": ROOT / "harness/agents/interactive-smart.json",
 }
 
 
@@ -154,9 +154,9 @@ def target_specs(args: argparse.Namespace) -> list[BenchmarkTarget]:
     requested_keys = list(args.target_key) if getattr(args, "target_key", None) else list(TARGET_CONFIGS)
     repeat_map = {
         "builtin-fast": args.fast_repeats,
-        "builtin-smart": args.smart_repeats,
-        "openrouter-gemini-3.1-flash-lite-preview": args.openrouter_repeats,
-        "leanstral": args.leanstral_repeats,
+        "interactive-gpt": args.interactive_gpt_repeats,
+        "interactive-opus": args.interactive_opus_repeats,
+        "interactive-smart": args.interactive_smart_repeats,
     }
     return [benchmark_target(key, repeat_map[key]) for key in requested_keys]
 
@@ -783,9 +783,9 @@ def build_parser() -> argparse.ArgumentParser:
 
     start_parser = subparsers.add_parser("start", help="Start a new matrix run in the background")
     start_parser.add_argument("--fast-repeats", type=int, default=3)
-    start_parser.add_argument("--smart-repeats", type=int, default=3)
-    start_parser.add_argument("--openrouter-repeats", type=int, default=1)
-    start_parser.add_argument("--leanstral-repeats", type=int, default=1)
+    start_parser.add_argument("--interactive-gpt-repeats", type=int, default=1)
+    start_parser.add_argument("--interactive-opus-repeats", type=int, default=1)
+    start_parser.add_argument("--interactive-smart-repeats", type=int, default=1)
     start_parser.add_argument(
         "--target-key",
         action="append",
diff --git a/scripts/run_resumable_matrix.py b/scripts/run_resumable_matrix.py
new file mode 100755
index 00000000..a9ccb0f0
--- /dev/null
+++ b/scripts/run_resumable_matrix.py
@@ -0,0 +1,433 @@
+#!/usr/bin/env python3
+"""Resumable benchmark matrix runner.
+
+Runs every task in the active suite against each configured profile.
+Skips (task, profile) combos that already have a result file, so the
+script can be re-invoked after interruption (rate-limit, crash, etc.)
+to continue where it left off.
+
+Logs progress to `results/matrix_runs/<run_id>/progress.jsonl` and
+emits a summary at `results/matrix_runs/<run_id>/summary.json` after
+every completed task — so even a partial run leaves analyzable output.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parent.parent
+
+
+def utc_now() -> str:
+    return datetime.now(tz=timezone.utc).isoformat(timespec="seconds").replace("+00:00", "Z")
+
+
+def load_profile(profile_name: str) -> dict:
+    path = ROOT / "harness" / "agents" / f"{profile_name}.json"
+    return json.loads(path.read_text(encoding="utf-8"))
+
+
+def result_file_for(profile: dict, task_ref: str) -> Path:
+    track = profile.get("track", "custom")
+    slug = profile.get("run_slug", profile.get("agent_id", "unknown"))
+    safe_task = task_ref.replace("/", "__")
+    return ROOT / "results" / "agent_runs" / track / slug / f"{safe_task}.json"
+
+
+def list_active_tasks() -> list[str]:
+    env = os.environ.copy()
+    env["PYTHONPATH"] = str(ROOT / "harness") + os.pathsep + env.get("PYTHONPATH", "")
+    result = subprocess.run(
+        ["python3", "harness/agent_runner.py", "list", "--suite", "active"],
+        cwd=ROOT,
+        capture_output=True,
+        text=True,
+        check=False,
+        env=env,
+    )
+    if result.returncode != 0:
+        print("failed to list tasks:", result.stderr, file=sys.stderr)
+        sys.exit(1)
+    return [line.strip() for line in result.stdout.splitlines() if line.strip()]
+
+
+def run_one(
+    profile_name: str,
+    task_ref: str,
+    *,
+    timeout_seconds: int,
+    extra_env: dict[str, str] | None = None,
+) -> tuple[int, str, str, float]:
+    """Run one task; return (exit_code, stdout, stderr, elapsed)."""
+    env = os.environ.copy()
+    if extra_env:
+        env.update(extra_env)
+    # Ensure lake is on PATH. Use the invoking user's HOME rather than a
+    # hard-coded "/root/.elan/bin" so non-root shells (local dev, CI runners)
+    # still pick up elan-installed toolchains.
+    elan_bin = os.path.join(env.get("HOME") or os.path.expanduser("~"), ".elan", "bin")
+    env["PATH"] = f"{elan_bin}:{env.get('PATH', '')}"
+    cmd = [
+        "bash",
+        "scripts/exec_with_dotenvx.sh",
+        "python3",
+        "harness/agent_runner.py",
+        "run",
+        task_ref,
+        "--profile",
+        profile_name,
+    ]
+    start = time.perf_counter()
+    try:
+        result = subprocess.run(
+            cmd,
+            cwd=ROOT,
+            capture_output=True,
+            text=True,
+            check=False,
+            env=env,
+            timeout=timeout_seconds,
+        )
+        elapsed = time.perf_counter() - start
+        return result.returncode, result.stdout, result.stderr, elapsed
+    except subprocess.TimeoutExpired as e:
+        elapsed = time.perf_counter() - start
+        return 124, e.stdout or "", (e.stderr or "") + f"\n[runner] timeout after {timeout_seconds}s", elapsed
+
+
+def classify_failure(stderr: str, exit_code: int) -> str:
+    low = (stderr or "").lower()
+    if exit_code == 124:
+        return "timeout"
+    if "rate limit" in low or "429" in low or "rate_limit" in low or "too many requests" in low:
+        return "rate_limited"
+    if "401" in low or "unauthorized" in low or "invalid_api_key" in low:
+        return "auth_error"
+    if "connection" in low and ("refused" in low or "reset" in low or "timed out" in low):
+        return "connection_error"
+    if exit_code != 0:
+        return "harness_error"
+    return "ok"
+
+
+def read_result(path: Path) -> dict | None:
+    try:
+        return json.loads(path.read_text(encoding="utf-8"))
+    except Exception:
+        return None
+
+
+def summarize(run_dir: Path, profiles: list[str], tasks: list[str]) -> dict:
+    summary: dict = {
+        "generated_at": utc_now(),
+        "profiles": {},
+        "total_tasks": len(tasks),
+    }
+    for name in profiles:
+        try:
+            profile = load_profile(name)
+        except Exception as e:
+            summary["profiles"][name] = {"error": f"cannot load profile: {e}"}
+            continue
+        counts = {"passed": 0, "failed": 0, "missing": 0, "error": 0}
+        details = []
+        for task in tasks:
+            path = result_file_for(profile, task)
+            if not path.exists():
+                counts["missing"] += 1
+                details.append({"task": task, "state": "missing"})
+                continue
+            r = read_result(path)
+            if not r:
+                counts["error"] += 1
+                details.append({"task": task, "state": "unreadable"})
+                continue
+            ev = r.get("evaluation") or {}
+            status = ev.get("status", "unknown")
+            if status == "passed":
+                counts["passed"] += 1
+            else:
+                counts["failed"] += 1
+            details.append(
+                {
+                    "task": task,
+                    "state": status,
+                    "failure_mode": ev.get("failure_mode"),
+                    "elapsed_seconds": r.get("elapsed_seconds"),
+                    "tool_calls_used": r.get("tool_calls_used"),
+                }
+            )
+        summary["profiles"][name] = {
+            "track": profile.get("track"),
+            "run_slug": profile.get("run_slug"),
+            "model": profile.get("model"),
+            "counts": counts,
+            "pass_rate": (counts["passed"] / len(tasks)) if tasks else None,
+            "tasks": details,
+        }
+    return summary
+
+
+def write_summary(run_dir: Path, profiles: list[str], tasks: list[str]) -> None:
+    s = summarize(run_dir, profiles, tasks)
+    (run_dir / "summary.json").write_text(json.dumps(s, indent=2), encoding="utf-8")
+
+
+def append_progress(run_dir: Path, record: dict) -> None:
+    with (run_dir / "progress.jsonl").open("a", encoding="utf-8") as f:
+        f.write(json.dumps(record) + "\n")
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--profiles", nargs="+", required=True, help="Agent profile names")
+    parser.add_argument("--run-id", default=None, help="Run id (default: timestamp)")
+    parser.add_argument("--timeout", type=int, default=600, help="Per-task timeout (s)")
+    parser.add_argument(
+        "--rate-limit-backoff",
+        type=int,
+        default=60,
+        help="Seconds to pause after a rate-limit error before continuing to next task",
+    )
+    parser.add_argument(
+        "--max-rate-limits-per-profile",
+        type=int,
+        default=5,
+        help="Skip remaining tasks for a profile after N rate-limit hits in a row",
+    )
+    parser.add_argument(
+        "--tasks",
+        nargs="*",
+        default=None,
+        help="Specific tasks to run; defaults to the full active suite",
+    )
+    parser.add_argument("--dry-run", action="store_true")
+    args = parser.parse_args()
+
+    run_id = args.run_id or datetime.now().strftime("matrix-%Y%m%d-%H%M%S")
+    run_dir = ROOT / "results" / "matrix_runs" / run_id
+    run_dir.mkdir(parents=True, exist_ok=True)
+
+    tasks = args.tasks or list_active_tasks()
+    print(f"[runner] run_id={run_id} profiles={args.profiles} tasks={len(tasks)}")
+    append_progress(
+        run_dir,
+        {
+            "event": "run_start",
+            "ts": utc_now(),
+            "profiles": args.profiles,
+            "task_count": len(tasks),
+            "run_id": run_id,
+        },
+    )
+
+    # Initial summary snapshot so partial runs always leave analyzable output.
+    write_summary(run_dir, args.profiles, tasks)
+
+    for profile_name in args.profiles:
+        try:
+            profile = load_profile(profile_name)
+        except Exception as e:
+            print(f"[runner] cannot load profile {profile_name}: {e}", file=sys.stderr)
+            append_progress(
+                run_dir,
+                {"event": "profile_error", "ts": utc_now(), "profile": profile_name, "error": str(e)},
+            )
+            continue
+
+        print(f"[runner] === profile {profile_name} (model={profile.get('model')}) ===")
+        append_progress(
+            run_dir,
+            {"event": "profile_start", "ts": utc_now(), "profile": profile_name, "model": profile.get("model")},
+        )
+
+        consecutive_rate_limits = 0
+        profile_passed = 0
+        profile_failed = 0
+        profile_skipped_existing = 0
+        profile_errors = 0
+
+        for idx, task_ref in enumerate(tasks, 1):
+            result_path = result_file_for(profile, task_ref)
+            if result_path.exists():
+                r = read_result(result_path)
+                # Treat unreadable/corrupted artifacts as missing rather than
+                # silently marking the task as SKIP. A previous run may have
+                # been interrupted mid-write, leaving a truncated JSON file
+                # that `read_result` returns None for. If we trusted the
+                # existence check alone, a resumed matrix would silently
+                # skip the task and finish with stale `unknown` status
+                # entries — the whole point of resume is to fill those gaps,
+                # so delete the corrupt artifact and fall through to RUN.
+                if r is None:
+                    # Keep dry-run read-only: never unlink artifacts when
+                    # --dry-run is set; report what a real run would do.
+                    if args.dry_run:
+                        print(
+                            f"[runner]   [{idx:>2}/{len(tasks)}] {task_ref} -> "
+                            f"DRY (existing artifact unreadable; would delete and rerun)"
+                        )
+                        continue
+                    try:
+                        result_path.unlink()
+                    except OSError:
+                        pass
+                    print(
+                        f"[runner]   [{idx:>2}/{len(tasks)}] {task_ref} -> "
+                        f"RERUN (existing artifact was unreadable; deleted)"
+                    )
+                    append_progress(
+                        run_dir,
+                        {
+                            "event": "task_unreadable_rerun",
+                            "ts": utc_now(),
+                            "profile": profile_name,
+                            "task": task_ref,
+                        },
+                    )
+                else:
+                    status = r.get("evaluation", {}).get("status", "unknown")
+                    print(f"[runner]   [{idx:>2}/{len(tasks)}] {task_ref} -> SKIP (exists, status={status})")
+                    append_progress(
+                        run_dir,
+                        {
+                            "event": "task_skip_existing",
+                            "ts": utc_now(),
+                            "profile": profile_name,
+                            "task": task_ref,
+                            "status": status,
+                        },
+                    )
+                    profile_skipped_existing += 1
+                    if status == "passed":
+                        profile_passed += 1
+                    else:
+                        profile_failed += 1
+                    continue
+
+            if args.dry_run:
+                print(f"[runner]   [{idx:>2}/{len(tasks)}] {task_ref} -> DRY (would run)")
+                continue
+
+            print(f"[runner]   [{idx:>2}/{len(tasks)}] {task_ref} -> RUN")
+            append_progress(
+                run_dir,
+                {"event": "task_start", "ts": utc_now(), "profile": profile_name, "task": task_ref},
+            )
+
+            exit_code, stdout, stderr, elapsed = run_one(
+                profile_name, task_ref, timeout_seconds=args.timeout
+            )
+
+            # Determine outcome
+            classified = classify_failure(stderr, exit_code)
+            status = None
+            failure_mode = None
+            if result_path.exists():
+                r = read_result(result_path)
+                if r:
+                    ev = r.get("evaluation") or {}
+                    status = ev.get("status")
+                    failure_mode = ev.get("failure_mode")
+
+            outcome_record = {
+                "event": "task_end",
+                "ts": utc_now(),
+                "profile": profile_name,
+                "task": task_ref,
+                "exit_code": exit_code,
+                "elapsed_seconds": round(elapsed, 2),
+                "classified": classified,
+                "evaluation_status": status,
+                "failure_mode": failure_mode,
+                "stderr_tail": (stderr or "")[-500:],
+            }
+            append_progress(run_dir, outcome_record)
+
+            short = status or classified
+            print(f"[runner]      -> {short} (exit={exit_code}, {elapsed:.1f}s)")
+
+            if status == "passed":
+                profile_passed += 1
+                consecutive_rate_limits = 0
+            elif classified == "rate_limited":
+                consecutive_rate_limits += 1
+                profile_errors += 1
+                print(
+                    f"[runner]   rate-limit hit ({consecutive_rate_limits}/"
+                    f"{args.max_rate_limits_per_profile}), sleeping {args.rate_limit_backoff}s"
+                )
+                time.sleep(args.rate_limit_backoff)
+                if consecutive_rate_limits >= args.max_rate_limits_per_profile:
+                    print(
+                        f"[runner]   too many rate limits for {profile_name}; "
+                        f"skipping remaining {len(tasks) - idx} tasks for this profile"
+                    )
+                    append_progress(
+                        run_dir,
+                        {
+                            "event": "profile_rate_limit_skip",
+                            "ts": utc_now(),
+                            "profile": profile_name,
+                            "remaining": len(tasks) - idx,
+                        },
+                    )
+                    break
+            elif result_path.exists():
+                profile_failed += 1
+                consecutive_rate_limits = 0
+            else:
+                profile_errors += 1
+                consecutive_rate_limits = 0
+
+            # Refresh summary after every task so a killed run leaves useful output.
+            write_summary(run_dir, args.profiles, tasks)
+
+        append_progress(
+            run_dir,
+            {
+                "event": "profile_end",
+                "ts": utc_now(),
+                "profile": profile_name,
+                "passed": profile_passed,
+                "failed": profile_failed,
+                "skipped_existing": profile_skipped_existing,
+                "errors": profile_errors,
+            },
+        )
+
+    write_summary(run_dir, args.profiles, tasks)
+    append_progress(run_dir, {"event": "run_end", "ts": utc_now()})
+
+    # Print final summary
+    s = summarize(run_dir, args.profiles, tasks)
+    print("\n" + "=" * 60)
+    print(f"Final summary (run_id={run_id})")
+    print("=" * 60)
+    for name, info in s["profiles"].items():
+        if "error" in info:
+            print(f"  {name}: ERROR {info['error']}")
+            continue
+        c = info["counts"]
+        pr = info["pass_rate"]
+        print(
+            f"  {name:30s} passed={c['passed']:>3} "
+            f"failed={c['failed']:>3} missing={c['missing']:>3} "
+            f"error={c['error']:>3} rate={pr:.1%}"
+            if pr is not None
+            else f"  {name:30s} passed={c['passed']:>3} failed={c['failed']:>3}"
+        )
+    print(f"\nSummary JSON: {run_dir / 'summary.json'}")
+    print(f"Progress log: {run_dir / 'progress.jsonl'}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())