diff --git a/.github/actions/setup-lean/action.yml b/.github/actions/setup-lean/action.yml index 726d8204..0ae39eee 100644 --- a/.github/actions/setup-lean/action.yml +++ b/.github/actions/setup-lean/action.yml @@ -30,12 +30,13 @@ runs: LEAN_TOOLCHAIN_HASH: ${{ hashFiles('lean-toolchain') }} LAKEFILE_HASH: ${{ hashFiles('lakefile.lean') }} LAKE_MANIFEST_HASH: ${{ hashFiles('lake-manifest.json') }} + CACHE_SALT_HASH: ${{ hashFiles('.github/cache-salt') }} run: | - elan_key="elan-benchmark-${CACHE_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}" - packages_key="lake-packages-benchmark-${CACHE_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}" - packages_main_key="lake-packages-benchmark-${MAIN_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}" - build_key="lake-build-benchmark-${CACHE_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}" - build_main_key="lake-build-benchmark-${MAIN_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}" + elan_key="elan-benchmark-${CACHE_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${CACHE_SALT_HASH}" + packages_key="lake-packages-benchmark-${CACHE_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}-${CACHE_SALT_HASH}" + packages_main_key="lake-packages-benchmark-${MAIN_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}-${CACHE_SALT_HASH}" + build_key="lake-build-benchmark-${CACHE_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}-${CACHE_SALT_HASH}" + build_main_key="lake-build-benchmark-${MAIN_BUCKET}-${RUNNER_OS_NAME}-${LEAN_TOOLCHAIN_HASH}-${LAKEFILE_HASH}-${LAKE_MANIFEST_HASH}-${CACHE_SALT_HASH}" { echo "use_sticky=${USE_STICKY}" echo "use_build_sticky=${USE_BUILD_STICKY}" diff --git a/.github/cache-salt b/.github/cache-salt new file mode 100644 index 00000000..d00491fd --- /dev/null +++ b/.github/cache-salt @@ -0,0 +1 @@ +1 diff --git a/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/DepositSetsPoolBalance.lean b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/DepositSetsPoolBalance.lean new file mode 100644 index 00000000..1ffd50b5 --- /dev/null +++ b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/DepositSetsPoolBalance.lean @@ -0,0 +1,22 @@ +import Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.DamnVulnerableDeFi.SideEntrance + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `deposit` stores `oldPoolBalance + amount` in `poolBalance`. +-/ +theorem deposit_sets_pool_balance + (amount : Uint256) (s : ContractState) : + let s' := ((SideEntrance.deposit amount).run s).snd + deposit_sets_pool_balance_spec amount s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold deposit_sets_pool_balance_spec + grind [SideEntrance.deposit, SideEntrance.poolBalance, SideEntrance.totalCredits, SideEntrance.creditOf] + +end Benchmark.Cases.DamnVulnerableDeFi.SideEntrance diff --git a/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/DepositSetsSenderCredit.lean b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/DepositSetsSenderCredit.lean new file mode 100644 index 00000000..6ed16810 --- /dev/null +++ b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/DepositSetsSenderCredit.lean @@ -0,0 +1,22 @@ +import Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.DamnVulnerableDeFi.SideEntrance + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `deposit` increases the caller's credited balance by `amount`. +-/ +theorem deposit_sets_sender_credit + (amount : Uint256) (s : ContractState) : + let s' := ((SideEntrance.deposit amount).run s).snd + deposit_sets_sender_credit_spec amount s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold deposit_sets_sender_credit_spec + grind [SideEntrance.deposit, SideEntrance.poolBalance, SideEntrance.totalCredits, SideEntrance.creditOf] + +end Benchmark.Cases.DamnVulnerableDeFi.SideEntrance diff --git a/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/ExploitTraceDrainsPool.lean b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/ExploitTraceDrainsPool.lean new file mode 100644 index 00000000..0e1c33ce --- /dev/null +++ b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/ExploitTraceDrainsPool.lean @@ -0,0 +1,27 @@ +import Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.DamnVulnerableDeFi.SideEntrance + +open Verity +open Verity.EVM.Uint256 + +/-- +If the caller starts with zero credited balance, then borrowing `amount`, +repaying through `deposit`, and withdrawing immediately reduces pool ETH by +exactly `amount`. +-/ +theorem exploit_trace_drains_pool + (amount : Uint256) (s : ContractState) + (hBorrow : amount <= s.storage 0) + (hFresh : s.storageMap 2 s.sender = 0) : + let s' := ((SideEntrance.flashLoanViaDeposit amount).run s).snd + let s'' := ((SideEntrance.withdraw).run s').snd + exploit_trace_drains_pool_spec amount s s'' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold exploit_trace_drains_pool_spec + grind [SideEntrance.flashLoanViaDeposit, SideEntrance.withdraw, SideEntrance.poolBalance, SideEntrance.totalCredits, SideEntrance.creditOf] + +end Benchmark.Cases.DamnVulnerableDeFi.SideEntrance diff --git a/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/FlashLoanViaDepositPreservesPoolBalance.lean b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/FlashLoanViaDepositPreservesPoolBalance.lean new file mode 100644 index 00000000..7a8de9e5 --- /dev/null +++ b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/FlashLoanViaDepositPreservesPoolBalance.lean @@ -0,0 +1,24 @@ +import Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.DamnVulnerableDeFi.SideEntrance + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing the summarized flash-loan-plus-deposit path leaves tracked pool ETH +unchanged. +-/ +theorem flashLoanViaDeposit_preserves_pool_balance + (amount : Uint256) (s : ContractState) + (hBorrow : amount <= s.storage 0) : + let s' := ((SideEntrance.flashLoanViaDeposit amount).run s).snd + flashLoanViaDeposit_preserves_pool_balance_spec amount s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold flashLoanViaDeposit_preserves_pool_balance_spec + grind [SideEntrance.flashLoanViaDeposit, SideEntrance.poolBalance, SideEntrance.totalCredits, SideEntrance.creditOf] + +end Benchmark.Cases.DamnVulnerableDeFi.SideEntrance diff --git a/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/FlashLoanViaDepositSetsSenderCredit.lean b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/FlashLoanViaDepositSetsSenderCredit.lean new file mode 100644 index 00000000..3024ac89 --- /dev/null +++ b/Benchmark/GeneratedPreview/DamnVulnerableDeFi/SideEntrance/Tasks/FlashLoanViaDepositSetsSenderCredit.lean @@ -0,0 +1,24 @@ +import Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.DamnVulnerableDeFi.SideEntrance + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing the summarized flash-loan-plus-deposit path mints caller credit +equal to the borrowed amount. +-/ +theorem flashLoanViaDeposit_sets_sender_credit + (amount : Uint256) (s : ContractState) + (hBorrow : amount <= s.storage 0) : + let s' := ((SideEntrance.flashLoanViaDeposit amount).run s).snd + flashLoanViaDeposit_sets_sender_credit_spec amount s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold flashLoanViaDeposit_sets_sender_credit_spec + grind [SideEntrance.flashLoanViaDeposit, SideEntrance.poolBalance, SideEntrance.totalCredits, SideEntrance.creditOf] + +end Benchmark.Cases.DamnVulnerableDeFi.SideEntrance diff --git a/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/ChainStartThreshold.lean b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/ChainStartThreshold.lean new file mode 100644 index 00000000..cfbe50b9 --- /dev/null +++ b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/ChainStartThreshold.lean @@ -0,0 +1,26 @@ +import Benchmark.Cases.Ethereum.DepositContractMinimal.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Ethereum.DepositContractMinimal + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing a threshold-crossing full deposit sets `chainStarted`. +-/ +theorem full_deposit_starts_chain_at_threshold + (depositAmount : Uint256) (s : ContractState) + (hCount : s.storage 0 < 4294967295) + (hMin : depositAmount >= 1000000000) + (hFull : depositAmount >= 32000000000) + (hThreshold : add (s.storage 1) 1 = 65536) : + let s' := ((DepositContractMinimal.deposit depositAmount).run s).snd + deposit_starts_chain_at_threshold_spec depositAmount s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold deposit_starts_chain_at_threshold_spec + grind [DepositContractMinimal.deposit, DepositContractMinimal.depositCount, DepositContractMinimal.fullDepositCount, DepositContractMinimal.chainStarted] + +end Benchmark.Cases.Ethereum.DepositContractMinimal diff --git a/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/DepositCount.lean b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/DepositCount.lean new file mode 100644 index 00000000..e4cf08ba --- /dev/null +++ b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/DepositCount.lean @@ -0,0 +1,25 @@ +import Benchmark.Cases.Ethereum.DepositContractMinimal.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Ethereum.DepositContractMinimal + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `deposit` on the successful path increments the total deposit counter +by exactly one. +-/ +theorem deposit_increments_deposit_count + (depositAmount : Uint256) (s : ContractState) + (hCount : s.storage 0 < 4294967295) + (hMin : depositAmount >= 1000000000) : + let s' := ((DepositContractMinimal.deposit depositAmount).run s).snd + deposit_increments_deposit_count_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold deposit_increments_deposit_count_spec + grind [DepositContractMinimal.deposit, DepositContractMinimal.depositCount, DepositContractMinimal.fullDepositCount, DepositContractMinimal.chainStarted] + +end Benchmark.Cases.Ethereum.DepositContractMinimal diff --git a/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/FullDepositIncrementsFullCount.lean b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/FullDepositIncrementsFullCount.lean new file mode 100644 index 00000000..b3f8587c --- /dev/null +++ b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/FullDepositIncrementsFullCount.lean @@ -0,0 +1,26 @@ +import Benchmark.Cases.Ethereum.DepositContractMinimal.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Ethereum.DepositContractMinimal + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `deposit` at or above the full threshold increments +`fullDepositCount` by one. +-/ +theorem full_deposit_increments_full_count + (depositAmount : Uint256) (s : ContractState) + (hCount : s.storage 0 < 4294967295) + (hMin : depositAmount >= 1000000000) + (hFull : depositAmount >= 32000000000) : + let s' := ((DepositContractMinimal.deposit depositAmount).run s).snd + deposit_increments_full_count_for_full_deposit_spec depositAmount s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold deposit_increments_full_count_for_full_deposit_spec + grind [DepositContractMinimal.deposit, DepositContractMinimal.depositCount, DepositContractMinimal.fullDepositCount, DepositContractMinimal.chainStarted] + +end Benchmark.Cases.Ethereum.DepositContractMinimal diff --git a/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/FullDepositPreservesPartialGap.lean b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/FullDepositPreservesPartialGap.lean new file mode 100644 index 00000000..368c8623 --- /dev/null +++ b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/FullDepositPreservesPartialGap.lean @@ -0,0 +1,25 @@ +import Benchmark.Cases.Ethereum.DepositContractMinimal.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Ethereum.DepositContractMinimal + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing a full deposit increments both counters in lockstep, so the gap +between all deposits and full deposits is preserved. +-/ +theorem full_deposit_preserves_partial_gap + (depositAmount : Uint256) (s : ContractState) + (hCount : s.storage 0 < 4294967295) + (hMin : depositAmount >= 1000000000) + (hFull : depositAmount >= 32000000000) : + let s' := ((DepositContractMinimal.deposit depositAmount).run s).snd + s'.storage 0 - s'.storage 1 = s.storage 0 - s.storage 1 := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [DepositContractMinimal.deposit, DepositContractMinimal.depositCount, DepositContractMinimal.fullDepositCount, DepositContractMinimal.chainStarted] + +end Benchmark.Cases.Ethereum.DepositContractMinimal diff --git a/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/SmallDepositPreservesFullCount.lean b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/SmallDepositPreservesFullCount.lean new file mode 100644 index 00000000..be5da501 --- /dev/null +++ b/Benchmark/GeneratedPreview/Ethereum/DepositContractMinimal/Tasks/SmallDepositPreservesFullCount.lean @@ -0,0 +1,26 @@ +import Benchmark.Cases.Ethereum.DepositContractMinimal.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Ethereum.DepositContractMinimal + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `deposit` below the full threshold leaves `fullDepositCount` +unchanged. +-/ +theorem small_deposit_preserves_full_count + (depositAmount : Uint256) (s : ContractState) + (hCount : s.storage 0 < 4294967295) + (hMin : depositAmount >= 1000000000) + (hSmall : depositAmount < 32000000000) : + let s' := ((DepositContractMinimal.deposit depositAmount).run s).snd + deposit_preserves_full_count_for_small_deposit_spec depositAmount s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold deposit_preserves_full_count_for_small_deposit_spec + grind [DepositContractMinimal.deposit, DepositContractMinimal.depositCount, DepositContractMinimal.fullDepositCount, DepositContractMinimal.chainStarted] + +end Benchmark.Cases.Ethereum.DepositContractMinimal diff --git a/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/DrawIntervalMatchesWeights.lean b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/DrawIntervalMatchesWeights.lean new file mode 100644 index 00000000..e092fc3d --- /dev/null +++ b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/DrawIntervalMatchesWeights.lean @@ -0,0 +1,25 @@ +import Benchmark.Cases.Kleros.SortitionTrees.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Kleros.SortitionTrees + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `draw` follows the encoded ticket intervals used by the +implementation. +-/ +theorem draw_interval_matches_weights + (ticket : Uint256) (s : ContractState) + (hRoot : s.storage 0 != 0) + (hInRange : ticket < s.storage 0) : + let s' := ((SortitionTrees.draw ticket).run s).snd + draw_interval_matches_weights_spec ticket s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold draw_interval_matches_weights_spec + grind [SortitionTrees.draw, SortitionTrees.rootSum, SortitionTrees.leftSum, SortitionTrees.rightSum, SortitionTrees.leaf0, SortitionTrees.leaf1, SortitionTrees.leaf2, SortitionTrees.leaf3, SortitionTrees.nodeIndexesToIDs, SortitionTrees.IDsToNodeIndexes, SortitionTrees.selectedNode] + +end Benchmark.Cases.Kleros.SortitionTrees diff --git a/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/DrawSelectsValidLeaf.lean b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/DrawSelectsValidLeaf.lean new file mode 100644 index 00000000..1365bd55 --- /dev/null +++ b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/DrawSelectsValidLeaf.lean @@ -0,0 +1,24 @@ +import Benchmark.Cases.Kleros.SortitionTrees.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Kleros.SortitionTrees + +open Verity +open Verity.EVM.Uint256 + +/-- +Any successful `draw` resolves to one of the four leaf node indices. +-/ +theorem draw_selects_valid_leaf + (ticket : Uint256) (s : ContractState) + (hRoot : s.storage 0 != 0) + (hInRange : ticket < s.storage 0) : + let s' := ((SortitionTrees.draw ticket).run s).snd + draw_selects_valid_leaf_spec s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold draw_selects_valid_leaf_spec + grind [SortitionTrees.draw, SortitionTrees.rootSum, SortitionTrees.leftSum, SortitionTrees.rightSum, SortitionTrees.leaf0, SortitionTrees.leaf1, SortitionTrees.leaf2, SortitionTrees.leaf3, SortitionTrees.nodeIndexesToIDs, SortitionTrees.IDsToNodeIndexes, SortitionTrees.selectedNode] + +end Benchmark.Cases.Kleros.SortitionTrees diff --git a/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/NodeIdBijection.lean b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/NodeIdBijection.lean new file mode 100644 index 00000000..f0ea91ed --- /dev/null +++ b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/NodeIdBijection.lean @@ -0,0 +1,25 @@ +import Benchmark.Cases.Kleros.SortitionTrees.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Kleros.SortitionTrees + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `setLeaf` writes matching forward and reverse mapping entries for the +updated node and stake-path id. +-/ +theorem node_id_bijection + (nodeIndex stakePathID weight : Uint256) (s : ContractState) + (hLow : nodeIndex >= 3) + (hHigh : nodeIndex <= 6) : + let s' := ((SortitionTrees.setLeaf nodeIndex stakePathID weight).run s).snd + node_id_bijection_spec nodeIndex stakePathID s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold node_id_bijection_spec + grind [SortitionTrees.setLeaf, SortitionTrees.rootSum, SortitionTrees.leftSum, SortitionTrees.rightSum, SortitionTrees.leaf0, SortitionTrees.leaf1, SortitionTrees.leaf2, SortitionTrees.leaf3, SortitionTrees.nodeIndexesToIDs, SortitionTrees.IDsToNodeIndexes, SortitionTrees.selectedNode] + +end Benchmark.Cases.Kleros.SortitionTrees diff --git a/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/ParentEqualsSumOfChildren.lean b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/ParentEqualsSumOfChildren.lean new file mode 100644 index 00000000..def9850c --- /dev/null +++ b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/ParentEqualsSumOfChildren.lean @@ -0,0 +1,24 @@ +import Benchmark.Cases.Kleros.SortitionTrees.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Kleros.SortitionTrees + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `setLeaf` recomputes each parent node from its direct children. +-/ +theorem parent_equals_sum_of_children + (nodeIndex stakePathID weight : Uint256) (s : ContractState) + (hLow : nodeIndex >= 3) + (hHigh : nodeIndex <= 6) : + let s' := ((SortitionTrees.setLeaf nodeIndex stakePathID weight).run s).snd + parent_equals_sum_of_children_spec s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold parent_equals_sum_of_children_spec + grind [SortitionTrees.setLeaf, SortitionTrees.rootSum, SortitionTrees.leftSum, SortitionTrees.rightSum, SortitionTrees.leaf0, SortitionTrees.leaf1, SortitionTrees.leaf2, SortitionTrees.leaf3, SortitionTrees.nodeIndexesToIDs, SortitionTrees.IDsToNodeIndexes, SortitionTrees.selectedNode] + +end Benchmark.Cases.Kleros.SortitionTrees diff --git a/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/RootEqualsSumOfLeaves.lean b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/RootEqualsSumOfLeaves.lean new file mode 100644 index 00000000..1b6ce94d --- /dev/null +++ b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/RootEqualsSumOfLeaves.lean @@ -0,0 +1,24 @@ +import Benchmark.Cases.Kleros.SortitionTrees.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Kleros.SortitionTrees + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `setLeaf` recomputes the root as the sum of the four leaf weights. +-/ +theorem root_equals_sum_of_leaves + (nodeIndex stakePathID weight : Uint256) (s : ContractState) + (hLow : nodeIndex >= 3) + (hHigh : nodeIndex <= 6) : + let s' := ((SortitionTrees.setLeaf nodeIndex stakePathID weight).run s).snd + root_equals_sum_of_leaves_spec s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold root_equals_sum_of_leaves_spec + grind [SortitionTrees.setLeaf, SortitionTrees.rootSum, SortitionTrees.leftSum, SortitionTrees.rightSum, SortitionTrees.leaf0, SortitionTrees.leaf1, SortitionTrees.leaf2, SortitionTrees.leaf3, SortitionTrees.nodeIndexesToIDs, SortitionTrees.IDsToNodeIndexes, SortitionTrees.selectedNode] + +end Benchmark.Cases.Kleros.SortitionTrees diff --git a/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/RootMinusLeftEqualsRightSubtree.lean b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/RootMinusLeftEqualsRightSubtree.lean new file mode 100644 index 00000000..c6b679ab --- /dev/null +++ b/Benchmark/GeneratedPreview/Kleros/SortitionTrees/Tasks/RootMinusLeftEqualsRightSubtree.lean @@ -0,0 +1,25 @@ +import Benchmark.Cases.Kleros.SortitionTrees.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Kleros.SortitionTrees + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `setLeaf` keeps the root partitioned into left and right subtree +weights. +-/ +theorem root_minus_left_equals_right_subtree + (nodeIndex stakePathID weight : Uint256) (s : ContractState) + (hLow : nodeIndex >= 3) + (hHigh : nodeIndex <= 6) : + let s' := ((SortitionTrees.setLeaf nodeIndex stakePathID weight).run s).snd + root_minus_left_equals_right_subtree_spec s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold root_minus_left_equals_right_subtree_spec + grind [SortitionTrees.setLeaf, SortitionTrees.rootSum, SortitionTrees.leftSum, SortitionTrees.rightSum, SortitionTrees.leaf0, SortitionTrees.leaf1, SortitionTrees.leaf2, SortitionTrees.leaf3, SortitionTrees.nodeIndexesToIDs, SortitionTrees.IDsToNodeIndexes, SortitionTrees.selectedNode] + +end Benchmark.Cases.Kleros.SortitionTrees diff --git a/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/CeildivSandwich.lean b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/CeildivSandwich.lean new file mode 100644 index 00000000..c1036363 --- /dev/null +++ b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/CeildivSandwich.lean @@ -0,0 +1,25 @@ +import Benchmark.Cases.Lido.VaulthubLocked.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Lido.VaulthubLocked + +open Verity +open Verity.EVM.Uint256 + +/-- +Supporting arithmetic lemma: ceil(x/d) * d >= x for positive d. +This is a key bound used in the F-01 solvency proof to connect the +ceiling division in the reserve computation back to the original amount. +-/ +theorem ceildiv_sandwich + (x d : Uint256) + (hd : d > 0) + (hNoOverflow : (ceilDiv x d).val * d.val < modulus) : + ceildiv_sandwich_spec x d := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold ceildiv_sandwich_spec + grind + +end Benchmark.Cases.Lido.VaulthubLocked diff --git a/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/LockedFundsSolvency.lean b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/LockedFundsSolvency.lean new file mode 100644 index 00000000..b60c8c5b --- /dev/null +++ b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/LockedFundsSolvency.lean @@ -0,0 +1,55 @@ +import Benchmark.Cases.Lido.VaulthubLocked.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Lido.VaulthubLocked + +open Verity +open Verity.EVM.Uint256 + +/-- +Certora F-01: Locked funds solvency. +After executing `syncLocked`, the stored locked amount (slot 6) multiplied by +the reserve ratio complement is at least the liability (from liabilityShares +in slot 1) multiplied by total basis points: + + s'.storage 6 * (BP - RR) >= getPooledEthBySharesRoundUp(LS, TPE, TS) * BP + +The proof requires a case split on whether the computed reserve or the minimal +reserve dominates, then algebraic manipulation using the ceilDiv sandwich bound +and share conversion monotonicity. +-/ +theorem locked_funds_solvency + (s : ContractState) + -- Axioms + (hMaxLS : s.storage 0 ≥ s.storage 1) + (hRR_pos : s.storage 3 > 0) + (hRR_lt : s.storage 3 < TOTAL_BASIS_POINTS) + (hTS : s.storage 5 > 0) + (hTPE : s.storage 4 > 0) + -- No overflow: maxLiabilityShares * totalPooledEther fits in Uint256 + (hNoOverflow1 : (s.storage 0).val * (s.storage 4).val < modulus) + -- No overflow: liability * reserveRatioBP fits in Uint256 + (hNoOverflow2 : (getPooledEthBySharesRoundUp (s.storage 0) (s.storage 4) (s.storage 5)).val + * (s.storage 3).val < modulus) + -- No overflow: the add inside locked (liability + effectiveReserve) fits in Uint256 + (hNoOverflow3 : let liab := getPooledEthBySharesRoundUp (s.storage 0) (s.storage 4) (s.storage 5) + let reserve := ceilDiv (mul liab (s.storage 3)) (sub TOTAL_BASIS_POINTS (s.storage 3)) + let eff := if reserve ≥ s.storage 2 then reserve else s.storage 2 + liab.val + eff.val < modulus) + -- No overflow: locked * (BP - RR) fits in Uint256 + (hNoOverflow4 : let liab := getPooledEthBySharesRoundUp (s.storage 0) (s.storage 4) (s.storage 5) + let reserve := ceilDiv (mul liab (s.storage 3)) (sub TOTAL_BASIS_POINTS (s.storage 3)) + let eff := if reserve ≥ s.storage 2 then reserve else s.storage 2 + (add liab eff).val * (sub TOTAL_BASIS_POINTS (s.storage 3)).val < modulus) + -- No overflow: liability * BP fits in Uint256 + (hNoOverflow5 : (getPooledEthBySharesRoundUp (s.storage 1) (s.storage 4) (s.storage 5)).val + * TOTAL_BASIS_POINTS.val < modulus) : + let s' := ((VaultHubLocked.syncLocked).run s).snd + locked_funds_solvency_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold locked_funds_solvency_spec + grind [VaultHubLocked.syncLocked, VaultHubLocked.maxLiabilityShares, VaultHubLocked.liabilityShares, VaultHubLocked.minimalReserve, VaultHubLocked.reserveRatioBP, VaultHubLocked.totalPooledEther, VaultHubLocked.totalShares, VaultHubLocked.lockedAmount] + +end Benchmark.Cases.Lido.VaulthubLocked diff --git a/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/MaxLiabilitySharesBound.lean b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/MaxLiabilitySharesBound.lean new file mode 100644 index 00000000..e89d4ea4 --- /dev/null +++ b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/MaxLiabilitySharesBound.lean @@ -0,0 +1,23 @@ +import Benchmark.Cases.Lido.VaulthubLocked.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Lido.VaulthubLocked + +open Verity +open Verity.EVM.Uint256 + +/-- +Certora P-VH-04: maxLiabilityShares >= liabilityShares. +This invariant is maintained by the VaultHub's minting and reporting logic. +-/ +theorem max_liability_shares_bound + (maxLiabilityShares liabilityShares : Uint256) + (hBound : maxLiabilityShares ≥ liabilityShares) : + max_liability_shares_bound_spec maxLiabilityShares liabilityShares := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold max_liability_shares_bound_spec + grind + +end Benchmark.Cases.Lido.VaulthubLocked diff --git a/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/ReserveRatioBounds.lean b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/ReserveRatioBounds.lean new file mode 100644 index 00000000..8ce57a5b --- /dev/null +++ b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/ReserveRatioBounds.lean @@ -0,0 +1,24 @@ +import Benchmark.Cases.Lido.VaulthubLocked.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Lido.VaulthubLocked + +open Verity +open Verity.EVM.Uint256 + +/-- +Certora P-VH-03: Reserve ratio is strictly between 0 and TOTAL_BASIS_POINTS. +This is enforced by the vault connection validation logic. +-/ +theorem reserve_ratio_bounds + (reserveRatioBP : Uint256) + (hPos : reserveRatioBP > 0) + (hLt : reserveRatioBP < TOTAL_BASIS_POINTS) : + reserve_ratio_bounds_spec reserveRatioBP := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold reserve_ratio_bounds_spec + grind + +end Benchmark.Cases.Lido.VaulthubLocked diff --git a/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/SharesConversionMonotone.lean b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/SharesConversionMonotone.lean new file mode 100644 index 00000000..08162108 --- /dev/null +++ b/Benchmark/GeneratedPreview/Lido/VaulthubLocked/Tasks/SharesConversionMonotone.lean @@ -0,0 +1,26 @@ +import Benchmark.Cases.Lido.VaulthubLocked.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Lido.VaulthubLocked + +open Verity +open Verity.EVM.Uint256 + +/-- +Supporting arithmetic lemma: getPooledEthBySharesRoundUp is monotone in shares. +If a >= b then getPooledEthBySharesRoundUp(a) >= getPooledEthBySharesRoundUp(b). +Needed to lift the F-01 solvency bound from maxLiabilityShares to liabilityShares. +-/ +theorem shares_conversion_monotone + (a b : Uint256) + (totalPooledEther totalShares : Uint256) + (hTS : totalShares > 0) + (hNoOverflow : a.val * totalPooledEther.val < modulus) : + shares_conversion_monotone_spec a b totalPooledEther totalShares := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold shares_conversion_monotone_spec + grind + +end Benchmark.Cases.Lido.VaulthubLocked diff --git a/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsBookValue.lean b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsBookValue.lean new file mode 100644 index 00000000..249f7159 --- /dev/null +++ b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsBookValue.lean @@ -0,0 +1,23 @@ +import Benchmark.Cases.NexusMutual.RammPriceBand.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.NexusMutual.RammPriceBand + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `syncPriceBand` stores the synchronized book value. +-/ +theorem syncPriceBand_sets_book_value + (capital_ supply_ : Uint256) (s : ContractState) + (hSupply : supply_ != 0) : + let s' := ((RammPriceBand.syncPriceBand capital_ supply_).run s).snd + syncPriceBand_sets_book_value_spec capital_ supply_ s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold syncPriceBand_sets_book_value_spec + grind [RammPriceBand.syncPriceBand, RammPriceBand.capital, RammPriceBand.supply, RammPriceBand.bookValue, RammPriceBand.buySpotPrice, RammPriceBand.sellSpotPrice] + +end Benchmark.Cases.NexusMutual.RammPriceBand diff --git a/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsBuyPrice.lean b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsBuyPrice.lean new file mode 100644 index 00000000..b2af2f7d --- /dev/null +++ b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsBuyPrice.lean @@ -0,0 +1,23 @@ +import Benchmark.Cases.NexusMutual.RammPriceBand.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.NexusMutual.RammPriceBand + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `syncPriceBand` stores the synchronized buy quote. +-/ +theorem syncPriceBand_sets_buy_price + (capital_ supply_ : Uint256) (s : ContractState) + (hSupply : supply_ != 0) : + let s' := ((RammPriceBand.syncPriceBand capital_ supply_).run s).snd + syncPriceBand_sets_buy_price_spec capital_ supply_ s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold syncPriceBand_sets_buy_price_spec + grind [RammPriceBand.syncPriceBand, RammPriceBand.capital, RammPriceBand.supply, RammPriceBand.bookValue, RammPriceBand.buySpotPrice, RammPriceBand.sellSpotPrice] + +end Benchmark.Cases.NexusMutual.RammPriceBand diff --git a/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsCapital.lean b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsCapital.lean new file mode 100644 index 00000000..36954bbd --- /dev/null +++ b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsCapital.lean @@ -0,0 +1,23 @@ +import Benchmark.Cases.NexusMutual.RammPriceBand.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.NexusMutual.RammPriceBand + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `syncPriceBand` stores the provided capital value. +-/ +theorem syncPriceBand_sets_capital + (capital_ supply_ : Uint256) (s : ContractState) + (hSupply : supply_ != 0) : + let s' := ((RammPriceBand.syncPriceBand capital_ supply_).run s).snd + syncPriceBand_sets_capital_spec capital_ s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold syncPriceBand_sets_capital_spec + grind [RammPriceBand.syncPriceBand, RammPriceBand.capital, RammPriceBand.supply, RammPriceBand.bookValue, RammPriceBand.buySpotPrice, RammPriceBand.sellSpotPrice] + +end Benchmark.Cases.NexusMutual.RammPriceBand diff --git a/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsSellPrice.lean b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsSellPrice.lean new file mode 100644 index 00000000..a8c83109 --- /dev/null +++ b/Benchmark/GeneratedPreview/NexusMutual/RammPriceBand/Tasks/SyncSetsSellPrice.lean @@ -0,0 +1,23 @@ +import Benchmark.Cases.NexusMutual.RammPriceBand.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.NexusMutual.RammPriceBand + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `syncPriceBand` stores the synchronized sell quote. +-/ +theorem syncPriceBand_sets_sell_price + (capital_ supply_ : Uint256) (s : ContractState) + (hSupply : supply_ != 0) : + let s' := ((RammPriceBand.syncPriceBand capital_ supply_).run s).snd + syncPriceBand_sets_sell_price_spec capital_ supply_ s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold syncPriceBand_sets_sell_price_spec + grind [RammPriceBand.syncPriceBand, RammPriceBand.capital, RammPriceBand.supply, RammPriceBand.bookValue, RammPriceBand.buySpotPrice, RammPriceBand.sellSpotPrice] + +end Benchmark.Cases.NexusMutual.RammPriceBand diff --git a/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/BuyGeBookValue.lean b/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/BuyGeBookValue.lean new file mode 100644 index 00000000..227f18df --- /dev/null +++ b/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/BuyGeBookValue.lean @@ -0,0 +1,28 @@ +import Benchmark.Cases.NexusMutual.RammPriceBand.Proofs +import Benchmark.Grindset + +namespace Benchmark.Cases.NexusMutual.RammSpotPrice + +open Verity +open Verity.EVM.Uint256 + +/-- +The buy spot price is always at or above book value, regardless of whether +the ratchet has converged (BV branch) or is still converging (ratchet branch). +-/ +theorem spotPrice_buy_ge_book_value + (eth oldEth oldNxmBuyReserve oldNxmSellReserve capital supply elapsed speed : Uint256) + (hEth : eth != 0) + (hOldEth : oldEth != 0) + (hSupply : supply != 0) + (hCapital : capital != 0) + (hBuyReserve : calculateBuyReserve eth oldEth oldNxmBuyReserve capital supply elapsed speed != 0) + (hSafe : buyArithmeticSafe eth oldEth oldNxmBuyReserve capital supply elapsed speed) : + spotPrice_buy_ge_book_value_spec eth oldEth oldNxmBuyReserve oldNxmSellReserve capital supply elapsed speed := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold spotPrice_buy_ge_book_value_spec + grind + +end Benchmark.Cases.NexusMutual.RammSpotPrice diff --git a/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/SellLeBookValue.lean b/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/SellLeBookValue.lean new file mode 100644 index 00000000..22df4afd --- /dev/null +++ b/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/SellLeBookValue.lean @@ -0,0 +1,29 @@ +import Benchmark.Cases.NexusMutual.RammPriceBand.Proofs +import Benchmark.Grindset + +namespace Benchmark.Cases.NexusMutual.RammSpotPrice + +open Verity +open Verity.EVM.Uint256 + +/-- +The sell spot price is always at or below book value, regardless of whether +the ratchet has converged (BV branch) or is still converging (ratchet branch). +-/ +theorem spotPrice_sell_le_book_value + (eth oldEth oldNxmBuyReserve oldNxmSellReserve capital supply elapsed speed : Uint256) + (hEth : eth != 0) + (hOldEth : oldEth != 0) + (hSupply : supply != 0) + (hCapital : capital != 0) + (hSellReserve : calculateSellReserve eth oldEth oldNxmSellReserve capital supply elapsed speed != 0) + (hSafe : sellArithmeticSafe eth oldEth oldNxmSellReserve capital supply elapsed speed) + (hScale : realisticSellScale eth capital supply) : + spotPrice_sell_le_book_value_spec eth oldEth oldNxmBuyReserve oldNxmSellReserve capital supply elapsed speed := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold spotPrice_sell_le_book_value_spec + grind + +end Benchmark.Cases.NexusMutual.RammSpotPrice diff --git a/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/SellLeBuy.lean b/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/SellLeBuy.lean new file mode 100644 index 00000000..3cf73197 --- /dev/null +++ b/Benchmark/GeneratedPreview/NexusMutual/RammSpotPrice/Tasks/SellLeBuy.lean @@ -0,0 +1,31 @@ +import Benchmark.Cases.NexusMutual.RammPriceBand.Proofs +import Benchmark.Grindset + +namespace Benchmark.Cases.NexusMutual.RammSpotPrice + +open Verity +open Verity.EVM.Uint256 + +/-- +The sell spot price never exceeds the buy spot price. +Together with buy_ge_book_value and sell_le_book_value, this gives: sell ≤ bv ≤ buy. +-/ +theorem spotPrice_sell_le_buy + (eth oldEth oldNxmBuyReserve oldNxmSellReserve capital supply elapsed speed : Uint256) + (hEth : eth != 0) + (hOldEth : oldEth != 0) + (hSupply : supply != 0) + (hCapital : capital != 0) + (hBuyReserve : calculateBuyReserve eth oldEth oldNxmBuyReserve capital supply elapsed speed != 0) + (hSellReserve : calculateSellReserve eth oldEth oldNxmSellReserve capital supply elapsed speed != 0) + (hBuySafe : buyArithmeticSafe eth oldEth oldNxmBuyReserve capital supply elapsed speed) + (hSellSafe : sellArithmeticSafe eth oldEth oldNxmSellReserve capital supply elapsed speed) + (hScale : realisticSellScale eth capital supply) : + spotPrice_sell_le_buy_spec eth oldEth oldNxmBuyReserve oldNxmSellReserve capital supply elapsed speed := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold spotPrice_sell_le_buy_spec + grind + +end Benchmark.Cases.NexusMutual.RammSpotPrice diff --git a/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/DepositSetsTotalAssets.lean b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/DepositSetsTotalAssets.lean new file mode 100644 index 00000000..0fa5c7c9 --- /dev/null +++ b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/DepositSetsTotalAssets.lean @@ -0,0 +1,22 @@ +import Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `deposit` stores `oldTotalAssets + assets` in `totalAssets`. +-/ +theorem deposit_sets_totalAssets + (assets : Uint256) (s : ContractState) : + let s' := ((ERC4626VirtualOffsetDeposit.deposit assets).run s).snd + deposit_sets_totalAssets_spec assets s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold deposit_sets_totalAssets_spec + grind [ERC4626VirtualOffsetDeposit.deposit, ERC4626VirtualOffsetDeposit.totalAssets, ERC4626VirtualOffsetDeposit.totalShares] + +end Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit diff --git a/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/DepositSetsTotalShares.lean b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/DepositSetsTotalShares.lean new file mode 100644 index 00000000..077be747 --- /dev/null +++ b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/DepositSetsTotalShares.lean @@ -0,0 +1,22 @@ +import Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `deposit` stores `oldTotalShares + previewDeposit(assets)` in `totalShares`. +-/ +theorem deposit_sets_totalShares + (assets : Uint256) (s : ContractState) : + let s' := ((ERC4626VirtualOffsetDeposit.deposit assets).run s).snd + deposit_sets_totalShares_spec assets s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold deposit_sets_totalShares_spec + grind [ERC4626VirtualOffsetDeposit.deposit, ERC4626VirtualOffsetDeposit.totalAssets, ERC4626VirtualOffsetDeposit.totalShares] + +end Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit diff --git a/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/PositiveDepositMintsPositiveSharesUnderRateBound.lean b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/PositiveDepositMintsPositiveSharesUnderRateBound.lean new file mode 100644 index 00000000..962daefe --- /dev/null +++ b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/PositiveDepositMintsPositiveSharesUnderRateBound.lean @@ -0,0 +1,29 @@ +import Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit.Specs +import Verity.Stdlib.Math +import Benchmark.Grindset + +namespace Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit + +open Verity +open Verity.EVM.Uint256 +open Verity.Stdlib.Math + +/-- +Under the rate-bound assumption that the exact numerator already reaches one full +denominator-width, a positive deposit mints a positive number of shares. +-/ +theorem positive_deposit_mints_positive_shares_under_rate_bound + (assets : Uint256) (s : ContractState) + (hAssets : assets ≠ 0) + (hDenom : add (s.storage 0) virtualAssets ≠ 0) + (hRate : ((add (s.storage 0) virtualAssets : Uint256) : Nat) + <= (assets : Nat) * ((add (s.storage 1) virtualShares : Uint256) : Nat)) + (hMul : (assets : Nat) * ((add (s.storage 1) virtualShares : Uint256) : Nat) <= MAX_UINT256) : + positive_deposit_mints_positive_shares_under_rate_bound_spec assets s := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold positive_deposit_mints_positive_shares_under_rate_bound_spec + grind + +end Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit diff --git a/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/PreviewDepositRoundsDown.lean b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/PreviewDepositRoundsDown.lean new file mode 100644 index 00000000..300b5060 --- /dev/null +++ b/Benchmark/GeneratedPreview/OpenZeppelin/ERC4626VirtualOffsetDeposit/Tasks/PreviewDepositRoundsDown.lean @@ -0,0 +1,25 @@ +import Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit.Specs +import Verity.Stdlib.Math +import Benchmark.Grindset + +namespace Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit + +open Verity +open Verity.EVM.Uint256 +open Verity.Stdlib.Math + +/-- +`previewDeposit` rounds down, so the minted share estimate times the denominator +never exceeds the exact numerator product when the multiplication is exact. +-/ +theorem previewDeposit_rounds_down + (assets : Uint256) (s : ContractState) + (hMul : (assets : Nat) * ((add (s.storage 1) virtualShares : Uint256) : Nat) <= MAX_UINT256) : + previewDeposit_rounds_down_spec assets s := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold previewDeposit_rounds_down_spec + grind + +end Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimMarksBothClaimed.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimMarksBothClaimed.lean new file mode 100644 index 00000000..0aa9a987 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimMarksBothClaimed.lean @@ -0,0 +1,29 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimBoth` on the successful path marks the caller as claimed for +both tokens. +-/ +theorem claimBoth_marks_both_claimed + (usdcShareWad wethShareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hUsdcFresh : s.storageMap 5 s.sender = 0) + (hWethFresh : s.storageMap 9 s.sender = 0) + (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0) + (hWethBound : add (s.storage 7) (computedWethClaimAmount wethShareWad s) <= s.storage 6) : + let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad true).run s).snd + claimBoth_marks_both_claimed_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimBoth_marks_both_claimed_spec + grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimUpdatesRoundClaimed.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimUpdatesRoundClaimed.lean new file mode 100644 index 00000000..c27fa521 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimUpdatesRoundClaimed.lean @@ -0,0 +1,29 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimBoth` on the successful path increases both claimed counters +by exactly their computed claim amounts. +-/ +theorem claimBoth_updates_round_claimed + (usdcShareWad wethShareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hUsdcFresh : s.storageMap 5 s.sender = 0) + (hWethFresh : s.storageMap 9 s.sender = 0) + (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0) + (hWethBound : add (s.storage 7) (computedWethClaimAmount wethShareWad s) <= s.storage 6) : + let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad true).run s).snd + claimBoth_updates_round_claimed_spec usdcShareWad wethShareWad s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimBoth_updates_round_claimed_spec + grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimUpdatesTotalAllocated.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimUpdatesTotalAllocated.lean new file mode 100644 index 00000000..c160d241 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimUpdatesTotalAllocated.lean @@ -0,0 +1,29 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimBoth` on the successful path decreases both allocated counters +by exactly their computed claim amounts. +-/ +theorem claimBoth_updates_total_allocated + (usdcShareWad wethShareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hUsdcFresh : s.storageMap 5 s.sender = 0) + (hWethFresh : s.storageMap 9 s.sender = 0) + (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0) + (hWethBound : add (s.storage 7) (computedWethClaimAmount wethShareWad s) <= s.storage 6) : + let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad true).run s).snd + claimBoth_updates_total_allocated_spec usdcShareWad wethShareWad s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimBoth_updates_total_allocated_spec + grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimedPlusAllocatedConserved.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimedPlusAllocatedConserved.lean new file mode 100644 index 00000000..b78a55de --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothClaimedPlusAllocatedConserved.lean @@ -0,0 +1,29 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimBoth` preserves the claimed-plus-allocated accounting mass +for both tokens. +-/ +theorem claimBoth_claimed_plus_allocated_conserved + (usdcShareWad wethShareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hUsdcFresh : s.storageMap 5 s.sender = 0) + (hWethFresh : s.storageMap 9 s.sender = 0) + (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0) + (hWethBound : add (s.storage 7) (computedWethClaimAmount wethShareWad s) <= s.storage 6) : + let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad true).run s).snd + claimBoth_claimed_plus_allocated_conserved_spec usdcShareWad wethShareWad s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimBoth_claimed_plus_allocated_conserved_spec + grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothMatchesIndependentClaims.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothMatchesIndependentClaims.lean new file mode 100644 index 00000000..bfd56df8 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothMatchesIndependentClaims.lean @@ -0,0 +1,29 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimBoth` yields the same USDC slice as `claimUsdc` alone and the +same WETH slice as `claimWeth` alone. +-/ +theorem claimBoth_matches_independent_claims + (usdcShareWad wethShareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hUsdcFresh : s.storageMap 5 s.sender = 0) + (hWethFresh : s.storageMap 9 s.sender = 0) + (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0) + (hWethBound : add (s.storage 7) (computedWethClaimAmount wethShareWad s) <= s.storage 6) : + let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad true).run s).snd + claimBoth_matches_independent_claims_spec usdcShareWad wethShareWad s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimBoth_matches_independent_claims_spec + grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothNoOverclaim.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothNoOverclaim.lean new file mode 100644 index 00000000..d40c9519 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothNoOverclaim.lean @@ -0,0 +1,28 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimBoth` on the successful path preserves both round bounds. +-/ +theorem claimBoth_preserves_round_bounds + (usdcShareWad wethShareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hUsdcFresh : s.storageMap 5 s.sender = 0) + (hWethFresh : s.storageMap 9 s.sender = 0) + (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0) + (hWethBound : add (s.storage 7) (computedWethClaimAmount wethShareWad s) <= s.storage 6) : + let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad true).run s).snd + claimBoth_preserves_round_bounds_spec s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimBoth_preserves_round_bounds_spec + grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothUsdcBoundViolationRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothUsdcBoundViolationRejected.lean new file mode 100644 index 00000000..781d4181 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothUsdcBoundViolationRejected.lean @@ -0,0 +1,30 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimBoth` when the computed USDC payout would exceed the round +total reverts before any state writes, leaving the contract state unchanged. +-/ +theorem claimBoth_reverts_if_usdc_exceeds_total + (usdcShareWad : Uint256) + (wethProofAccepted : Bool) + (wethShareWad : Uint256) + (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hUsdcFresh : s.storageMap 5 s.sender = 0) + (hUsdcExceeds : add (s.storage 1) (computedClaimAmount usdcShareWad s) > s.storage 0) : + let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad wethProofAccepted).run s).snd + claimBoth_reverts_if_usdc_exceeds_total_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimBoth_reverts_if_usdc_exceeds_total_spec + grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothUsdcDoubleClaimRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothUsdcDoubleClaimRejected.lean new file mode 100644 index 00000000..1df572bd --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothUsdcDoubleClaimRejected.lean @@ -0,0 +1,29 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimBoth` with a previously claimed USDC entitlement reverts +before any state writes, leaving the contract state unchanged. +-/ +theorem claimBoth_reverts_if_usdc_already_claimed + (usdcShareWad : Uint256) + (usdcProofAccepted wethProofAccepted : Bool) + (wethShareWad : Uint256) + (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hClaimed : s.storageMap 5 s.sender != 0) : + let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad usdcProofAccepted wethShareWad wethProofAccepted).run s).snd + claimBoth_reverts_if_usdc_already_claimed_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimBoth_reverts_if_usdc_already_claimed_spec + grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothWethBoundViolationRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothWethBoundViolationRejected.lean new file mode 100644 index 00000000..08b77542 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothWethBoundViolationRejected.lean @@ -0,0 +1,30 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimBoth` when the computed WETH payout would exceed the round +total reverts and rolls back the earlier USDC sub-claim, leaving the contract +state unchanged. +-/ +theorem claimBoth_reverts_if_weth_exceeds_total + (usdcShareWad wethShareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hUsdcFresh : s.storageMap 5 s.sender = 0) + (hWethFresh : s.storageMap 9 s.sender = 0) + (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0) + (hWethExceeds : add (s.storage 7) (computedWethClaimAmount wethShareWad s) > s.storage 6) : + let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad true).run s).snd + claimBoth_reverts_if_weth_exceeds_total_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimBoth_reverts_if_weth_exceeds_total_spec + grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothWethDoubleClaimRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothWethDoubleClaimRejected.lean new file mode 100644 index 00000000..1d7a5ad0 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BothWethDoubleClaimRejected.lean @@ -0,0 +1,30 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimBoth` with a previously claimed WETH entitlement reverts and +rolls back the earlier USDC sub-claim, leaving the contract state unchanged. +-/ +theorem claimBoth_reverts_if_weth_already_claimed + (usdcShareWad wethShareWad : Uint256) + (wethProofAccepted : Bool) + (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hUsdcFresh : s.storageMap 5 s.sender = 0) + (hWethClaimed : s.storageMap 9 s.sender != 0) + (hUsdcBound : add (s.storage 1) (computedClaimAmount usdcShareWad s) <= s.storage 0) : + let s' := ((StreamRecoveryClaimUsdc.claimBoth usdcShareWad true wethShareWad wethProofAccepted).run s).snd + claimBoth_reverts_if_weth_already_claimed_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimBoth_reverts_if_weth_already_claimed_spec + grind [StreamRecoveryClaimUsdc.claimBoth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BoundViolationRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BoundViolationRejected.lean new file mode 100644 index 00000000..04d9d696 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/BoundViolationRejected.lean @@ -0,0 +1,27 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimUsdc` when the computed payout would exceed the round total +reverts before any state writes, leaving the contract state unchanged. +-/ +theorem claimUsdc_reverts_if_exceeds_total + (shareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hFresh : s.storageMap 5 s.sender = 0) + (hExceeds : add (s.storage 1) (computedClaimAmount shareWad s) > s.storage 0) : + let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad true).run s).snd + claimUsdc_reverts_if_exceeds_total_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimUsdc_reverts_if_exceeds_total_spec + grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimMarksUser.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimMarksUser.lean new file mode 100644 index 00000000..b9bee7b2 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimMarksUser.lean @@ -0,0 +1,26 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimUsdc` on the successful path marks the caller as claimed. +-/ +theorem claimUsdc_marks_user_claimed + (shareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hFresh : s.storageMap 5 s.sender = 0) + (hBound : add (s.storage 1) (computedClaimAmount shareWad s) <= s.storage 0) : + let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad true).run s).snd + claimUsdc_marks_claimed_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimUsdc_marks_claimed_spec + grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimUpdatesRoundClaimed.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimUpdatesRoundClaimed.lean new file mode 100644 index 00000000..bd26fbda --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimUpdatesRoundClaimed.lean @@ -0,0 +1,27 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimUsdc` on the successful path increases `roundUsdcClaimed` +by exactly the computed claim amount. +-/ +theorem claimUsdc_updates_round_claimed + (shareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hFresh : s.storageMap 5 s.sender = 0) + (hBound : add (s.storage 1) (computedClaimAmount shareWad s) <= s.storage 0) : + let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad true).run s).snd + claimUsdc_updates_round_claimed_spec shareWad s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimUsdc_updates_round_claimed_spec + grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimUpdatesTotalAllocated.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimUpdatesTotalAllocated.lean new file mode 100644 index 00000000..f8d7ae44 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimUpdatesTotalAllocated.lean @@ -0,0 +1,27 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimUsdc` on the successful path decreases `totalUsdcAllocated` +by exactly the computed claim amount. +-/ +theorem claimUsdc_updates_total_allocated + (shareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hFresh : s.storageMap 5 s.sender = 0) + (hBound : add (s.storage 1) (computedClaimAmount shareWad s) <= s.storage 0) : + let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad true).run s).snd + claimUsdc_updates_total_allocated_spec shareWad s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimUsdc_updates_total_allocated_spec + grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimedPlusAllocatedConserved.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimedPlusAllocatedConserved.lean new file mode 100644 index 00000000..91a4f0fe --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/ClaimedPlusAllocatedConserved.lean @@ -0,0 +1,27 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimUsdc` moves the computed amount from `totalUsdcAllocated` +into `roundUsdcClaimed`, preserving the combined accounting mass. +-/ +theorem claimUsdc_claimed_plus_allocated_conserved + (shareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hFresh : s.storageMap 5 s.sender = 0) + (hBound : add (s.storage 1) (computedClaimAmount shareWad s) <= s.storage 0) : + let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad true).run s).snd + claimUsdc_claimed_plus_allocated_conserved_spec shareWad s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimUsdc_claimed_plus_allocated_conserved_spec + grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/DoubleClaimRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/DoubleClaimRejected.lean new file mode 100644 index 00000000..2428a3bc --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/DoubleClaimRejected.lean @@ -0,0 +1,26 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimUsdc` for an address that already claimed reverts before any +state writes, leaving the contract state unchanged. +-/ +theorem claimUsdc_reverts_if_already_claimed + (shareWad : Uint256) (proofAccepted : Bool) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hClaimed : s.storageMap 5 s.sender != 0) : + let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad proofAccepted).run s).snd + claimUsdc_reverts_if_already_claimed_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimUsdc_reverts_if_already_claimed_spec + grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/NoOverclaim.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/NoOverclaim.lean new file mode 100644 index 00000000..3d3b7616 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/NoOverclaim.lean @@ -0,0 +1,26 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimUsdc` on the successful path preserves the round bound. +-/ +theorem claimUsdc_preserves_round_bound + (shareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hFresh : s.storageMap 5 s.sender = 0) + (hBound : add (s.storage 1) (computedClaimAmount shareWad s) <= s.storage 0) : + let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad true).run s).snd + claimUsdc_preserves_round_bound_spec s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimUsdc_preserves_round_bound_spec + grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/UsdcPreservesWethState.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/UsdcPreservesWethState.lean new file mode 100644 index 00000000..feb369ee --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/UsdcPreservesWethState.lean @@ -0,0 +1,27 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimUsdc` on the successful path preserves the WETH accounting +slice. +-/ +theorem claimUsdc_preserves_weth_state + (shareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hFresh : s.storageMap 5 s.sender = 0) + (hBound : add (s.storage 1) (computedClaimAmount shareWad s) <= s.storage 0) : + let s' := ((StreamRecoveryClaimUsdc.claimUsdc shareWad true).run s).snd + claimUsdc_preserves_weth_state_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimUsdc_preserves_weth_state_spec + grind [StreamRecoveryClaimUsdc.claimUsdc, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethBoundViolationRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethBoundViolationRejected.lean new file mode 100644 index 00000000..b427fab5 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethBoundViolationRejected.lean @@ -0,0 +1,27 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimWeth` when the computed payout would exceed the round total +reverts before any state writes, leaving the contract state unchanged. +-/ +theorem claimWeth_reverts_if_exceeds_total + (shareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hFresh : s.storageMap 9 s.sender = 0) + (hExceeds : add (s.storage 7) (computedWethClaimAmount shareWad s) > s.storage 6) : + let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad true).run s).snd + claimWeth_reverts_if_exceeds_total_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimWeth_reverts_if_exceeds_total_spec + grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimMarksUser.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimMarksUser.lean new file mode 100644 index 00000000..bd2e9eff --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimMarksUser.lean @@ -0,0 +1,26 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimWeth` on the successful path marks the caller as claimed. +-/ +theorem claimWeth_marks_user_claimed + (shareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hFresh : s.storageMap 9 s.sender = 0) + (hBound : add (s.storage 7) (computedWethClaimAmount shareWad s) <= s.storage 6) : + let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad true).run s).snd + claimWeth_marks_claimed_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimWeth_marks_claimed_spec + grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimUpdatesRoundClaimed.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimUpdatesRoundClaimed.lean new file mode 100644 index 00000000..171d95a5 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimUpdatesRoundClaimed.lean @@ -0,0 +1,27 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimWeth` on the successful path increases `roundWethClaimed` +by exactly the computed claim amount. +-/ +theorem claimWeth_updates_round_claimed + (shareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hFresh : s.storageMap 9 s.sender = 0) + (hBound : add (s.storage 7) (computedWethClaimAmount shareWad s) <= s.storage 6) : + let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad true).run s).snd + claimWeth_updates_round_claimed_spec shareWad s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimWeth_updates_round_claimed_spec + grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimUpdatesTotalAllocated.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimUpdatesTotalAllocated.lean new file mode 100644 index 00000000..bc9bee1e --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimUpdatesTotalAllocated.lean @@ -0,0 +1,27 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimWeth` on the successful path decreases `totalWethAllocated` +by exactly the computed claim amount. +-/ +theorem claimWeth_updates_total_allocated + (shareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hFresh : s.storageMap 9 s.sender = 0) + (hBound : add (s.storage 7) (computedWethClaimAmount shareWad s) <= s.storage 6) : + let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad true).run s).snd + claimWeth_updates_total_allocated_spec shareWad s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimWeth_updates_total_allocated_spec + grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimedPlusAllocatedConserved.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimedPlusAllocatedConserved.lean new file mode 100644 index 00000000..09bd7f40 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethClaimedPlusAllocatedConserved.lean @@ -0,0 +1,27 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimWeth` moves the computed amount from `totalWethAllocated` +into `roundWethClaimed`, preserving the combined accounting mass. +-/ +theorem claimWeth_claimed_plus_allocated_conserved + (shareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hFresh : s.storageMap 9 s.sender = 0) + (hBound : add (s.storage 7) (computedWethClaimAmount shareWad s) <= s.storage 6) : + let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad true).run s).snd + claimWeth_claimed_plus_allocated_conserved_spec shareWad s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimWeth_claimed_plus_allocated_conserved_spec + grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethDoubleClaimRejected.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethDoubleClaimRejected.lean new file mode 100644 index 00000000..04b5428d --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethDoubleClaimRejected.lean @@ -0,0 +1,26 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimWeth` for an address that already claimed reverts before any +state writes, leaving the contract state unchanged. +-/ +theorem claimWeth_reverts_if_already_claimed + (shareWad : Uint256) (proofAccepted : Bool) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hClaimed : s.storageMap 9 s.sender != 0) : + let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad proofAccepted).run s).snd + claimWeth_reverts_if_already_claimed_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimWeth_reverts_if_already_claimed_spec + grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethNoOverclaim.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethNoOverclaim.lean new file mode 100644 index 00000000..c6160e09 --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethNoOverclaim.lean @@ -0,0 +1,26 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimWeth` on the successful path preserves the round bound. +-/ +theorem claimWeth_preserves_round_bound + (shareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hFresh : s.storageMap 9 s.sender = 0) + (hBound : add (s.storage 7) (computedWethClaimAmount shareWad s) <= s.storage 6) : + let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad true).run s).snd + claimWeth_preserves_round_bound_spec s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimWeth_preserves_round_bound_spec + grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethPreservesUsdcState.lean b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethPreservesUsdcState.lean new file mode 100644 index 00000000..539bb8eb --- /dev/null +++ b/Benchmark/GeneratedPreview/PaladinVotes/StreamRecoveryClaimUsdc/Tasks/WethPreservesUsdcState.lean @@ -0,0 +1,27 @@ +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `claimWeth` on the successful path preserves the USDC accounting +slice. +-/ +theorem claimWeth_preserves_usdc_state + (shareWad : Uint256) (s : ContractState) + (hWaiver : s.storageMap 4 s.sender != 0) + (hActive : s.storage 3 != 0) + (hFresh : s.storageMap 9 s.sender = 0) + (hBound : add (s.storage 7) (computedWethClaimAmount shareWad s) <= s.storage 6) : + let s' := ((StreamRecoveryClaimUsdc.claimWeth shareWad true).run s).snd + claimWeth_preserves_usdc_state_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold claimWeth_preserves_usdc_state_spec + grind [StreamRecoveryClaimUsdc.claimWeth, StreamRecoveryClaimUsdc.roundUsdcTotal, StreamRecoveryClaimUsdc.roundUsdcClaimed, StreamRecoveryClaimUsdc.totalUsdcAllocated, StreamRecoveryClaimUsdc.roundActive, StreamRecoveryClaimUsdc.hasSignedWaiver, StreamRecoveryClaimUsdc.hasClaimedUsdc, StreamRecoveryClaimUsdc.roundWethTotal, StreamRecoveryClaimUsdc.roundWethClaimed, StreamRecoveryClaimUsdc.totalWethAllocated, StreamRecoveryClaimUsdc.hasClaimedWeth] + +end Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerAcyclicity.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerAcyclicity.lean new file mode 100644 index 00000000..950e6a92 --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerAcyclicity.lean @@ -0,0 +1,32 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +addOwner preserves acyclicity of the owner linked list. + +After addOwner(owner), the list becomes: + SENTINEL → owner → old_head → ... → SENTINEL + +Acyclicity is a tautology — it holds for any state. The proof +(acyclic_generic) shows that any duplicate-free chain from SENTINEL's +successor ending at key ≠ SENTINEL cannot contain SENTINEL, purely +by the structure of the definitions. No pre-state hypotheses are needed +beyond the Solidity require guards. +-/ +theorem addOwner_acyclicity + (owner : Address) (s : ContractState) + (hNotZero : (owner != zeroAddress) = true) + (hNotSentinel : (owner != SENTINEL) = true) + (hFresh : (wordToAddress (s.storageMap 0 owner) == zeroAddress) = true) : + acyclic ((OwnerManager.addOwner owner).run s).snd := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [OwnerManager.addOwner, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerIsOwnerCorrectness.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerIsOwnerCorrectness.lean new file mode 100644 index 00000000..c2c82e84 --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerIsOwnerCorrectness.lean @@ -0,0 +1,33 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +Functional correctness of `addOwner`: the new address becomes an owner +and all other addresses' ownership status is unchanged. + +`isOwner s addr` holds iff `next s addr ≠ zeroAddress ∧ addr ≠ SENTINEL`. + +Proof strategy: use `addOwner_next_eq` to characterise the post-state +`next` function, then split into the two conjuncts of `addOwner_correctness`. +For the new owner: `next s' owner = next s SENTINEL ≠ 0`. +For others: `next s' k = next s k` when `k ≠ SENTINEL` and `k ≠ owner`. +-/ +theorem addOwner_isOwnerCorrectness + (owner : Address) (s : ContractState) + (hNotZero : (owner != zeroAddress) = true) + (hNotSentinel : (owner != SENTINEL) = true) + (hFresh : (wordToAddress (s.storageMap 0 owner) == zeroAddress) = true) + (hPreInv : ownerListInvariant s) : + let s' := ((OwnerManager.addOwner owner).run s).snd + addOwner_correctness s s' owner := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [OwnerManager.addOwner, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerOwnerListInvariant.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerOwnerListInvariant.lean new file mode 100644 index 00000000..e3c1c0bd --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/AddOwnerOwnerListInvariant.lean @@ -0,0 +1,38 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +Combined `ownerListInvariant` preservation under `addOwner`. + +The ownerListInvariant merges `inListReachable` and `reachableInList`: +membership (non-zero successor) is equivalent to reachability from +SENTINEL. This is strictly stronger than proving inListReachable alone. + +Proof strategy: prove both directions of the biconditional separately. +The forward direction (membership → reachability) follows from the +existing inListReachable proof. The reverse direction (reachability → +membership) requires showing that the new chain structure doesn't +introduce reachability to nodes with zero successors. + +Acyclicity and freshness are derived from ownerListInvariant internally, +not required as separate hypotheses. +-/ +theorem addOwner_ownerListInvariant + (owner : Address) (s : ContractState) + (hNotZero : (owner != zeroAddress) = true) + (hNotSentinel : (owner != SENTINEL) = true) + (hFresh : (wordToAddress (s.storageMap 0 owner) == zeroAddress) = true) + (hPreInv : ownerListInvariant s) : + let s' := ((OwnerManager.addOwner owner).run s).snd + ownerListInvariant s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [OwnerManager.addOwner, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/InListReachable.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/InListReachable.lean new file mode 100644 index 00000000..4340fd71 --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/InListReachable.lean @@ -0,0 +1,48 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +Certora `inListReachable` invariant preservation under `addOwner`. + +Given that in the pre-state every node with a non-zero successor is reachable +from SENTINEL, show that the same holds in the post-state after inserting +`owner` at the head of the linked list. + +Proof strategy: SENTINEL is trivially reachable (reflexivity). The new owner +is reachable via [SENTINEL, owner]. For any other key with a non-zero successor, +its next pointer is unchanged, so we can lift its pre-state witness chain to +the post-state and prepend the new path SENTINEL → owner → old_head. +-/ +theorem in_list_reachable + (owner : Address) (s : ContractState) + (hNotZero : (owner != zeroAddress) = true) + (hNotSentinel : (owner != SENTINEL) = true) + (hFresh : (wordToAddress (s.storageMap 0 owner) == zeroAddress) = true) + (hPreReach : ∀ key : Address, next s key ≠ zeroAddress → reachable s SENTINEL key) + -- Raw acyclicity: SENTINEL ∉ any chain from next s SENTINEL. + -- Strictly stronger than `acyclic s` (no noDuplicates guard). + (hAcyclic : ∀ key : Address, ∀ chain : List Address, + chain.head? = some (next s SENTINEL) → + chain.getLast? = some key → + isChain s chain → + SENTINEL ∉ chain) + -- Raw freshness: owner ∉ any chain from next s SENTINEL. + -- Strictly stronger than `freshInList s owner` (no noDuplicates guard). + (hOwnerFresh : ∀ key : Address, ∀ chain : List Address, + chain.head? = some (next s SENTINEL) → + chain.getLast? = some key → + isChain s chain → + owner ∉ chain) : + in_list_reachable_spec s ((OwnerManager.addOwner owner).run s).snd := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold in_list_reachable_spec + grind [OwnerManager.addOwner, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerAcyclicity.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerAcyclicity.lean new file mode 100644 index 00000000..de4213ab --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerAcyclicity.lean @@ -0,0 +1,30 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +removeOwner preserves acyclicity of the owner linked list. + +Acyclicity is a tautology — it holds for any state. The proof +(acyclic_generic) shows that any duplicate-free chain from SENTINEL's +successor ending at key ≠ SENTINEL cannot contain SENTINEL, purely +by the structure of the definitions. No pre-state acyclicity hypothesis +is needed. +-/ +theorem removeOwner_acyclicity + (prevOwner owner : Address) (s : ContractState) + (hNotZero : (owner != zeroAddress) = true) + (hNotSentinel : (owner != SENTINEL) = true) + (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == owner) = true) + (hOwnerInList : next s owner ≠ zeroAddress) : + acyclic ((OwnerManager.removeOwner prevOwner owner).run s).snd := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [OwnerManager.removeOwner, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerInListReachable.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerInListReachable.lean new file mode 100644 index 00000000..bb024614 --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerInListReachable.lean @@ -0,0 +1,44 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +Certora `inListReachable` invariant preservation under `removeOwner`. + +After removing `owner` by unlinking it from `prevOwner`, show that every +node with a non-zero successor in the post-state is still reachable from +SENTINEL. + +Proof strategy: The removed owner's mapping becomes 0 so it no longer +triggers the invariant. prevOwner now points to owner's old successor, +so chains that went through owner can "skip" it: replace +[... → prevOwner → owner → X → ...] with [... → prevOwner → X → ...]. +All other next pointers are unchanged. +-/ +theorem removeOwner_inListReachable + (prevOwner owner : Address) (s : ContractState) + (hNotZero : (owner != zeroAddress) = true) + (hNotSentinel : (owner != SENTINEL) = true) + (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == owner) = true) + -- The removed owner must have a non-zero successor (i.e. be in the list). + (hOwnerInList : next s owner ≠ zeroAddress) + -- Pre-state invariant + (hPreInv : inListReachable s) + -- Unique predecessor: each non-zero node has at most one non-zero predecessor. + (hUniquePred : uniquePredecessor s) + -- prevOwner is non-zero (a valid list node) + (hPrevNZ : prevOwner ≠ zeroAddress) + -- Zero address maps to itself + (hZeroInert : next s zeroAddress = zeroAddress) : + let s' := ((OwnerManager.removeOwner prevOwner owner).run s).snd + inListReachable s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [OwnerManager.removeOwner, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerIsOwnerCorrectness.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerIsOwnerCorrectness.lean new file mode 100644 index 00000000..df54abba --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerIsOwnerCorrectness.lean @@ -0,0 +1,33 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +Functional correctness of `removeOwner`: the removed address is no longer +an owner and all other addresses' ownership status is unchanged. + +`isOwner s addr` holds iff `next s addr ≠ zeroAddress ∧ addr ≠ SENTINEL`. + +Proof strategy: use `removeOwner_storageMap` to characterise the post-state +`next` function, then show `next s' owner = zeroAddress` and for all +`k ≠ owner`, `next s' k ≠ 0 ↔ next s k ≠ 0` by case-splitting on +`k = prevOwner`. +-/ +theorem removeOwner_isOwnerCorrectness + (prevOwner owner : Address) (s : ContractState) + (hNotZero : (owner != zeroAddress) = true) + (hNotSentinel : (owner != SENTINEL) = true) + (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == owner) = true) + (hOwnerInList : next s owner ≠ zeroAddress) : + let s' := ((OwnerManager.removeOwner prevOwner owner).run s).snd + removeOwner_correctness s s' owner := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [OwnerManager.removeOwner, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerOwnerListInvariant.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerOwnerListInvariant.lean new file mode 100644 index 00000000..a417106c --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/RemoveOwnerOwnerListInvariant.lean @@ -0,0 +1,32 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +Combined `ownerListInvariant` preservation under `removeOwner`. + +Properties like noSelfLoops and owner ≠ prevOwner are derived internally +from ownerListInvariant + uniquePredecessor, not required as hypotheses. +-/ +theorem removeOwner_ownerListInvariant + (prevOwner owner : Address) (s : ContractState) + (hNotZero : (owner != zeroAddress) = true) + (hNotSentinel : (owner != SENTINEL) = true) + (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == owner) = true) + (hOwnerInList : next s owner ≠ zeroAddress) + (hPreInv : ownerListInvariant s) + (hUniquePred : uniquePredecessor s) + (hPrevNZ : prevOwner ≠ zeroAddress) + (hZeroInert : next s zeroAddress = zeroAddress) : + let s' := ((OwnerManager.removeOwner prevOwner owner).run s).snd + ownerListInvariant s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [OwnerManager.removeOwner, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersAcyclicity.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersAcyclicity.lean new file mode 100644 index 00000000..30c1e904 --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersAcyclicity.lean @@ -0,0 +1,37 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +setupOwners establishes acyclicity of the owner linked list (base case). + +The constructed list SENTINEL → o1 → o2 → o3 → SENTINEL has no internal +cycles because all three owners are distinct, non-zero, and non-sentinel. +SENTINEL appears only as the list head and the terminal pointer +(o3 → SENTINEL), never in the interior of any chain starting from +SENTINEL's successor. +-/ +theorem setupOwners_acyclicity + (owner1 owner2 owner3 : Address) (s : ContractState) + (h1NZ : (owner1 != zeroAddress) = true) + (h1NS : (owner1 != SENTINEL) = true) + (h2NZ : (owner2 != zeroAddress) = true) + (h2NS : (owner2 != SENTINEL) = true) + (h3NZ : (owner3 != zeroAddress) = true) + (h3NS : (owner3 != SENTINEL) = true) + (h12 : (owner1 != owner2) = true) + (h13 : (owner1 != owner3) = true) + (h23 : (owner2 != owner3) = true) + (hClean : ∀ addr : Address, s.storageMap 0 addr = 0) : + let s' := ((OwnerManager.setupOwners owner1 owner2 owner3).run s).snd + acyclic s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [OwnerManager.setupOwners, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersInListReachable.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersInListReachable.lean new file mode 100644 index 00000000..e72e3cdf --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersInListReachable.lean @@ -0,0 +1,40 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +setupOwners establishes the `inListReachable` invariant from a clean state. +This is the base case: no pre-state invariant is required. + +After setupOwners(owner1, owner2, owner3), the linked list is: + SENTINEL → owner1 → owner2 → owner3 → SENTINEL + +Every node with a non-zero successor (SENTINEL, owner1, owner2, owner3) +is reachable from SENTINEL by construction. This can be proven by +characterizing the post-state storageMap and building explicit witness +chains for each node. +-/ +theorem setupOwners_inListReachable + (owner1 owner2 owner3 : Address) (s : ContractState) + (h1NZ : (owner1 != zeroAddress) = true) + (h1NS : (owner1 != SENTINEL) = true) + (h2NZ : (owner2 != zeroAddress) = true) + (h2NS : (owner2 != SENTINEL) = true) + (h3NZ : (owner3 != zeroAddress) = true) + (h3NS : (owner3 != SENTINEL) = true) + (h12 : (owner1 != owner2) = true) + (h13 : (owner1 != owner3) = true) + (h23 : (owner2 != owner3) = true) + (hClean : ∀ addr : Address, s.storageMap 0 addr = 0) : + let s' := ((OwnerManager.setupOwners owner1 owner2 owner3).run s).snd + inListReachable s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [OwnerManager.setupOwners, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersOwnerListInvariant.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersOwnerListInvariant.lean new file mode 100644 index 00000000..676511c5 --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SetupOwnersOwnerListInvariant.lean @@ -0,0 +1,40 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +setupOwners establishes the combined `ownerListInvariant` (base case). + +After setupOwners(owner1, owner2, owner3), the linked list is: + SENTINEL → owner1 → owner2 → owner3 → SENTINEL + +Both directions of the biconditional hold: every node with a non-zero +successor is reachable from SENTINEL (by explicit chains), and every +node reachable from SENTINEL has a non-zero successor (because only +SENTINEL, owner1, owner2, owner3 are reachable, and they all have +non-zero successors). +-/ +theorem setupOwners_ownerListInvariant + (owner1 owner2 owner3 : Address) (s : ContractState) + (h1NZ : (owner1 != zeroAddress) = true) + (h1NS : (owner1 != SENTINEL) = true) + (h2NZ : (owner2 != zeroAddress) = true) + (h2NS : (owner2 != SENTINEL) = true) + (h3NZ : (owner3 != zeroAddress) = true) + (h3NS : (owner3 != SENTINEL) = true) + (h12 : (owner1 != owner2) = true) + (h13 : (owner1 != owner3) = true) + (h23 : (owner2 != owner3) = true) + (hClean : ∀ addr : Address, s.storageMap 0 addr = 0) : + let s' := ((OwnerManager.setupOwners owner1 owner2 owner3).run s).snd + ownerListInvariant s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [OwnerManager.setupOwners, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerAcyclicity.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerAcyclicity.lean new file mode 100644 index 00000000..413689a1 --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerAcyclicity.lean @@ -0,0 +1,32 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +swapOwner preserves acyclicity of the owner linked list. + +Acyclicity is a tautology — it holds for any state. The proof +(acyclic_generic) shows that any duplicate-free chain from SENTINEL's +successor ending at key ≠ SENTINEL cannot contain SENTINEL, purely +by the structure of the definitions. No pre-state hypotheses are needed +beyond the Solidity require guards. +-/ +theorem swapOwner_acyclicity + (prevOwner oldOwner newOwner : Address) (s : ContractState) + (hNewNotZero : (newOwner != zeroAddress) = true) + (hNewNotSentinel : (newOwner != SENTINEL) = true) + (hNewFresh : (wordToAddress (s.storageMap 0 newOwner) == zeroAddress) = true) + (hOldNotZero : (oldOwner != zeroAddress) = true) + (hOldNotSentinel : (oldOwner != SENTINEL) = true) + (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == oldOwner) = true) : + acyclic ((OwnerManager.swapOwner prevOwner oldOwner newOwner).run s).snd := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [OwnerManager.swapOwner, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerInListReachable.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerInListReachable.lean new file mode 100644 index 00000000..c4055e4b --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerInListReachable.lean @@ -0,0 +1,46 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +Certora `inListReachable` invariant preservation under `swapOwner`. + +swapOwner atomically replaces oldOwner with newOwner in-place: + owners[newOwner] = owners[oldOwner] + owners[prevOwner] = newOwner + owners[oldOwner] = 0 + +Proof strategy: newOwner inherits oldOwner's successor. For any key with +a non-zero successor in the post-state, its pre-state chain through +oldOwner can be adapted by replacing oldOwner with newOwner: +[... → prevOwner → oldOwner → X → ...] becomes +[... → prevOwner → newOwner → X → ...]. +-/ +theorem swapOwner_inListReachable + (prevOwner oldOwner newOwner : Address) (s : ContractState) + (hNewNotZero : (newOwner != zeroAddress) = true) + (hNewNotSentinel : (newOwner != SENTINEL) = true) + (hNewFresh : (wordToAddress (s.storageMap 0 newOwner) == zeroAddress) = true) + (hOldNotZero : (oldOwner != zeroAddress) = true) + (hOldNotSentinel : (oldOwner != SENTINEL) = true) + (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == oldOwner) = true) + -- Pre-state invariant (full ownerListInvariant, not just inListReachable) + (hPreInvFull : ownerListInvariant s) + -- Unique predecessor: each non-zero node has at most one non-zero predecessor. + (hUniquePred : uniquePredecessor s) + -- prevOwner is non-zero (a valid list node) + (hPrevNZ : prevOwner ≠ zeroAddress) + -- Zero address maps to itself + (hZeroInert : next s zeroAddress = zeroAddress) : + let s' := ((OwnerManager.swapOwner prevOwner oldOwner newOwner).run s).snd + inListReachable s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [OwnerManager.swapOwner, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerIsOwnerCorrectness.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerIsOwnerCorrectness.lean new file mode 100644 index 00000000..2aff455a --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerIsOwnerCorrectness.lean @@ -0,0 +1,38 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +Functional correctness of `swapOwner`: the old owner is removed, the new +owner is added, and all other addresses' ownership status is unchanged. + +`isOwner s addr` holds iff `next s addr ≠ zeroAddress ∧ addr ≠ SENTINEL`. + +Proof strategy: use `swapOwner_storageMap` to characterise the post-state +`next` function, then show: + 1. `next s' oldOwner = zeroAddress` (old owner removed) + 2. `next s' newOwner = next s oldOwner ≠ 0` (new owner added) + 3. For all `k ≠ oldOwner, k ≠ newOwner`: `next s' k ≠ 0 ↔ next s k ≠ 0` + by case-splitting on `k = prevOwner`. +-/ +theorem swapOwner_isOwnerCorrectness + (prevOwner oldOwner newOwner : Address) (s : ContractState) + (hNewNotZero : (newOwner != zeroAddress) = true) + (hNewNotSentinel : (newOwner != SENTINEL) = true) + (hNewFresh : (wordToAddress (s.storageMap 0 newOwner) == zeroAddress) = true) + (hOldNotZero : (oldOwner != zeroAddress) = true) + (hOldNotSentinel : (oldOwner != SENTINEL) = true) + (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == oldOwner) = true) + (hOldInList : next s oldOwner ≠ zeroAddress) : + let s' := ((OwnerManager.swapOwner prevOwner oldOwner newOwner).run s).snd + swapOwner_correctness s s' oldOwner newOwner := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [OwnerManager.swapOwner, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerOwnerListInvariant.lean b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerOwnerListInvariant.lean new file mode 100644 index 00000000..d1ceab34 --- /dev/null +++ b/Benchmark/GeneratedPreview/Safe/OwnerManagerReach/Tasks/SwapOwnerOwnerListInvariant.lean @@ -0,0 +1,35 @@ +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Safe.OwnerManagerReach + +open Verity +open Verity.EVM.Uint256 + +/-- +Combined `ownerListInvariant` preservation under `swapOwner`. + +Properties like noSelfLoops, freshInList, and oldOwner ≠ prevOwner are +derived internally from ownerListInvariant + uniquePredecessor, not +required as hypotheses. +-/ +theorem swapOwner_ownerListInvariant + (prevOwner oldOwner newOwner : Address) (s : ContractState) + (hNewNotZero : (newOwner != zeroAddress) = true) + (hNewNotSentinel : (newOwner != SENTINEL) = true) + (hNewFresh : (wordToAddress (s.storageMap 0 newOwner) == zeroAddress) = true) + (hOldNotZero : (oldOwner != zeroAddress) = true) + (hOldNotSentinel : (oldOwner != SENTINEL) = true) + (hPrevLink : (wordToAddress (s.storageMap 0 prevOwner) == oldOwner) = true) + (hPreInv : ownerListInvariant s) + (hUniquePred : uniquePredecessor s) + (hPrevNZ : prevOwner ≠ zeroAddress) + (hZeroInert : next s zeroAddress = zeroAddress) : + let s' := ((OwnerManager.swapOwner prevOwner oldOwner newOwner).run s).snd + ownerListInvariant s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + grind [OwnerManager.swapOwner, OwnerManager.owners, OwnerManager.ownerCount] + +end Benchmark.Cases.Safe.OwnerManagerReach diff --git a/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapEnforcesFeeAdjustedInvariant.lean b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapEnforcesFeeAdjustedInvariant.lean new file mode 100644 index 00000000..ab80e3a9 --- /dev/null +++ b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapEnforcesFeeAdjustedInvariant.lean @@ -0,0 +1,28 @@ +import Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `applySwap` is only possible when the fee-adjusted product guard holds. +-/ +theorem applySwap_enforces_fee_adjusted_invariant + (balance0 balance1 amount0In amount1In : Uint256) (s : ContractState) + (hInput : amount0In != 0 || amount1In != 0) + (hFee0 : mul balance0 1000 >= mul amount0In 3) + (hFee1 : mul balance1 1000 >= mul amount1In 3) + (hK : mul (sub (mul balance0 1000) (mul amount0In 3)) + (sub (mul balance1 1000) (mul amount1In 3)) + >= mul (mul (s.storage 0) (s.storage 1)) 1000000) : + let s' := ((PairFeeAdjustedSwap.applySwap balance0 balance1 amount0In amount1In).run s).snd + applySwap_enforces_fee_adjusted_invariant_spec balance0 balance1 amount0In amount1In s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold applySwap_enforces_fee_adjusted_invariant_spec + grind [PairFeeAdjustedSwap.applySwap, PairFeeAdjustedSwap.reserve0, PairFeeAdjustedSwap.reserve1] + +end Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap diff --git a/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserve0.lean b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserve0.lean new file mode 100644 index 00000000..083857db --- /dev/null +++ b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserve0.lean @@ -0,0 +1,28 @@ +import Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `applySwap` stores the observed `balance0` as `reserve0`. +-/ +theorem applySwap_sets_reserve0 + (balance0 balance1 amount0In amount1In : Uint256) (s : ContractState) + (hInput : amount0In != 0 || amount1In != 0) + (hFee0 : mul balance0 1000 >= mul amount0In 3) + (hFee1 : mul balance1 1000 >= mul amount1In 3) + (hK : mul (sub (mul balance0 1000) (mul amount0In 3)) + (sub (mul balance1 1000) (mul amount1In 3)) + >= mul (mul (s.storage 0) (s.storage 1)) 1000000) : + let s' := ((PairFeeAdjustedSwap.applySwap balance0 balance1 amount0In amount1In).run s).snd + applySwap_sets_reserve0_spec balance0 s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold applySwap_sets_reserve0_spec + grind [PairFeeAdjustedSwap.applySwap, PairFeeAdjustedSwap.reserve0, PairFeeAdjustedSwap.reserve1] + +end Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap diff --git a/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserve1.lean b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserve1.lean new file mode 100644 index 00000000..9aecda24 --- /dev/null +++ b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserve1.lean @@ -0,0 +1,28 @@ +import Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `applySwap` stores the observed `balance1` as `reserve1`. +-/ +theorem applySwap_sets_reserve1 + (balance0 balance1 amount0In amount1In : Uint256) (s : ContractState) + (hInput : amount0In != 0 || amount1In != 0) + (hFee0 : mul balance0 1000 >= mul amount0In 3) + (hFee1 : mul balance1 1000 >= mul amount1In 3) + (hK : mul (sub (mul balance0 1000) (mul amount0In 3)) + (sub (mul balance1 1000) (mul amount1In 3)) + >= mul (mul (s.storage 0) (s.storage 1)) 1000000) : + let s' := ((PairFeeAdjustedSwap.applySwap balance0 balance1 amount0In amount1In).run s).snd + applySwap_sets_reserve1_spec balance1 s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold applySwap_sets_reserve1_spec + grind [PairFeeAdjustedSwap.applySwap, PairFeeAdjustedSwap.reserve0, PairFeeAdjustedSwap.reserve1] + +end Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap diff --git a/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserveProduct.lean b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserveProduct.lean new file mode 100644 index 00000000..fc2e3581 --- /dev/null +++ b/Benchmark/GeneratedPreview/UniswapV2/PairFeeAdjustedSwap/Tasks/SwapSetsReserveProduct.lean @@ -0,0 +1,28 @@ +import Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap + +open Verity +open Verity.EVM.Uint256 + +/-- +Executing `applySwap` makes the stored reserve product match the post-swap balances. +-/ +theorem applySwap_sets_reserve_product + (balance0 balance1 amount0In amount1In : Uint256) (s : ContractState) + (hInput : amount0In != 0 || amount1In != 0) + (hFee0 : mul balance0 1000 >= mul amount0In 3) + (hFee1 : mul balance1 1000 >= mul amount1In 3) + (hK : mul (sub (mul balance0 1000) (mul amount0In 3)) + (sub (mul balance1 1000) (mul amount1In 3)) + >= mul (mul (s.storage 0) (s.storage 1)) 1000000) : + let s' := ((PairFeeAdjustedSwap.applySwap balance0 balance1 amount0In amount1In).run s).snd + applySwap_sets_reserve_product_spec balance0 balance1 s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold applySwap_sets_reserve_product_spec + grind [PairFeeAdjustedSwap.applySwap, PairFeeAdjustedSwap.reserve0, PairFeeAdjustedSwap.reserve1] + +end Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/BurnDecreasesSupply.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/BurnDecreasesSupply.lean new file mode 100644 index 00000000..3e44fd6b --- /dev/null +++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/BurnDecreasesSupply.lean @@ -0,0 +1,31 @@ +import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken + +open Verity +open Verity.EVM.Uint256 + +/-- +Successful burn decreases both sender balance and totalSupply. + +When the sender has sufficient balance (fromBalance >= amount), burning +decreases balances[from] by amount and totalSupply by amount. +-/ +theorem burn_decreases_supply + (holder : Address) (amount : Uint256) (s : ContractState) + (hFrom : (holder != zeroAddress) = true) + (hInit : s.storageMap 2 holder ≠ 0) + (hSufficient : s.storageMap 1 holder >= amount) + (hAmount64 : amount < UINT64_MOD) + (hFromBal64 : s.storageMap 1 holder < UINT64_MOD) + (hSupply64 : s.storage 0 < UINT64_MOD) : + let s' := ((ERC7984.burn holder amount).run s).snd + burn_decreases_supply_spec holder amount s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold burn_decreases_supply_spec + grind [ERC7984.burn, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators] + +end Benchmark.Cases.Zama.ERC7984ConfidentialToken diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/BurnInsufficient.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/BurnInsufficient.lean new file mode 100644 index 00000000..df425975 --- /dev/null +++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/BurnInsufficient.lean @@ -0,0 +1,33 @@ +import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken + +open Verity +open Verity.EVM.Uint256 + +/-- +When the holder has insufficient balance, burn silently burns nothing. + +If `balances[holder] < amount`, then both the holder's balance and +totalSupply are unchanged. This mirrors the FHE.select pattern used +in transfer: the balance comparison cannot cause a revert or leak +information; it only chooses between transferring `amount` and `0`. +-/ +theorem burn_insufficient + (holder : Address) (amount : Uint256) (s : ContractState) + (hFrom : (holder != zeroAddress) = true) + (hInit : s.storageMap 2 holder ≠ 0) + (hInsufficient : ¬(s.storageMap 1 holder >= amount)) + (hAmount64 : amount < UINT64_MOD) + (hFromBal64 : s.storageMap 1 holder < UINT64_MOD) + (hSupply64 : s.storage 0 < UINT64_MOD) : + let s' := ((ERC7984.burn holder amount).run s).snd + burn_insufficient_spec holder amount s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold burn_insufficient_spec + grind [ERC7984.burn, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators] + +end Benchmark.Cases.Zama.ERC7984ConfidentialToken diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/MintIncreasesSupply.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/MintIncreasesSupply.lean new file mode 100644 index 00000000..04e58939 --- /dev/null +++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/MintIncreasesSupply.lean @@ -0,0 +1,31 @@ +import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken + +open Verity +open Verity.EVM.Uint256 + +/-- +Successful mint increases totalSupply and receiver balance by amount. + +When totalSupply + amount does not overflow uint64 (tryIncrease64 succeeds), +minting produces exactly `amount` new tokens: totalSupply increases by amount +and balances[to] increases by amount (mod 2^64). +-/ +theorem mint_increases_supply + (to : Address) (amount : Uint256) (s : ContractState) + (hTo : (to != zeroAddress) = true) + (hNoOverflow : (tryIncrease64 (s.storage 0) amount).1 = true) + (hAmount64 : amount < UINT64_MOD) + (hSupply64 : s.storage 0 < UINT64_MOD) + (hToBal64 : s.storageMap 1 to < UINT64_MOD) : + let s' := ((ERC7984.mint to amount).run s).snd + mint_increases_supply_spec to amount s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold mint_increases_supply_spec + grind [ERC7984.mint, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators] + +end Benchmark.Cases.Zama.ERC7984ConfidentialToken diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/MintOverflowProtection.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/MintOverflowProtection.lean new file mode 100644 index 00000000..89c22139 --- /dev/null +++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/MintOverflowProtection.lean @@ -0,0 +1,33 @@ +import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken + +open Verity +open Verity.EVM.Uint256 + +/-- +Mint overflow protection: when totalSupply + amount overflows uint64, +no tokens are minted. + +FHESafeMath.tryIncrease detects overflow by checking whether +(oldValue + delta) mod 2^64 >= oldValue. On overflow, the wrapped sum +is less than oldValue, so tryIncrease returns (false, oldValue). +Then FHE.select picks 0 as the transferred amount. +-/ +theorem mint_overflow_protection + (to : Address) (amount : Uint256) (s : ContractState) + (hTo : (to != zeroAddress) = true) + (hOverflow : (tryIncrease64 (s.storage 0) amount).1 = false) + (hAmount64 : amount < UINT64_MOD) + (hSupply64 : s.storage 0 < UINT64_MOD) + (hToBal64 : s.storageMap 1 to < UINT64_MOD) : + let s' := ((ERC7984.mint to amount).run s).snd + mint_overflow_protection_spec to amount s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold mint_overflow_protection_spec + grind [ERC7984.mint, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators] + +end Benchmark.Cases.Zama.ERC7984ConfidentialToken diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/SetOperatorUpdates.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/SetOperatorUpdates.lean new file mode 100644 index 00000000..64e8003c --- /dev/null +++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/SetOperatorUpdates.lean @@ -0,0 +1,27 @@ +import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken + +open Verity +open Verity.EVM.Uint256 + +/-- +setOperator(operator, expiry) writes `expiry` into `_operators[msg.sender][operator]` +and leaves all other operator entries unchanged. + +This is the functional-correctness property for the operator registration +function: the caller can set an expiry for a specific operator, but cannot +affect authorizations granted by other holders or to other operators. +-/ +theorem setOperator_updates + (operator : Address) (expiry : Uint256) (s : ContractState) : + let s' := ((ERC7984.setOperator operator expiry).run s).snd + setOperator_updates_spec s.sender operator expiry s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold setOperator_updates_spec + grind [ERC7984.setOperator, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators] + +end Benchmark.Cases.Zama.ERC7984ConfidentialToken diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferConservation.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferConservation.lean new file mode 100644 index 00000000..6dfce253 --- /dev/null +++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferConservation.lean @@ -0,0 +1,35 @@ +import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken + +open Verity +open Verity.EVM.Uint256 + +/-- +Transfer conserves the sum of sender and receiver balances. + +After transfer(from, to, amount), `balances[from] + balances[to]` is unchanged. +This holds regardless of whether the sender has sufficient balance: +- Sufficient: from loses `amount`, to gains `amount` → sum preserved +- Insufficient: both balances unchanged → sum trivially preserved +-/ +theorem transfer_conservation + (sender recipient : Address) (amount : Uint256) (s : ContractState) + (hFrom : (sender != zeroAddress) = true) + (hTo : (recipient != zeroAddress) = true) + (hInit : s.storageMap 2 sender ≠ 0) + (hDistinct : sender ≠ recipient) + (hAmount64 : amount < UINT64_MOD) + (hFromBal64 : s.storageMap 1 sender < UINT64_MOD) + (hToBal64 : s.storageMap 1 recipient < UINT64_MOD) + (hToNoWrap : s.storageMap 1 recipient + amount < UINT64_MOD) : + let s' := ((ERC7984.transfer sender recipient amount).run s).snd + transfer_conservation_spec sender recipient s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold transfer_conservation_spec + grind [ERC7984.transfer, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators] + +end Benchmark.Cases.Zama.ERC7984ConfidentialToken diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferFromConservation.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferFromConservation.lean new file mode 100644 index 00000000..af1b6a27 --- /dev/null +++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferFromConservation.lean @@ -0,0 +1,40 @@ +import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken + +open Verity +open Verity.EVM.Uint256 + +/-- +Operator-gated transferFrom preserves balance conservation. + +When the caller is authorized (either `holder == msg.sender` or +`block.timestamp <= operators[holder][msg.sender]`), transferFrom +preserves the sum `balances[holder] + balances[recipient]`. + +This ensures that delegating transfer authority via the operator +pattern does not allow creation or destruction of tokens. +-/ +theorem transferFrom_conservation + (holder recipient : Address) (amount blockTimestamp : Uint256) + (s : ContractState) + (hFrom : (holder != zeroAddress) = true) + (hTo : (recipient != zeroAddress) = true) + (hInit : s.storageMap 2 holder ≠ 0) + (hDistinct : holder ≠ recipient) + (hAuthorized : + holder == s.sender ∨ blockTimestamp <= s.storageMap2 3 holder s.sender) + (hAmount64 : amount < UINT64_MOD) + (hHolderBal64 : s.storageMap 1 holder < UINT64_MOD) + (hRecipientBal64 : s.storageMap 1 recipient < UINT64_MOD) + (hToNoWrap : s.storageMap 1 recipient + amount < UINT64_MOD) : + let s' := ((ERC7984.transferFrom holder recipient amount blockTimestamp).run s).snd + transferFrom_conservation_spec holder recipient s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold transferFrom_conservation_spec + grind [ERC7984.transferFrom, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators] + +end Benchmark.Cases.Zama.ERC7984ConfidentialToken diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferInsufficient.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferInsufficient.lean new file mode 100644 index 00000000..f3ca6c04 --- /dev/null +++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferInsufficient.lean @@ -0,0 +1,34 @@ +import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken + +open Verity +open Verity.EVM.Uint256 + +/-- +When the sender has insufficient balance, no tokens move. + +If `balances[from] < amount`, then both balances are unchanged. +This is the defining semantic difference from ERC-20: insufficient +balance causes a silent 0-transfer (via FHE.select) instead of a revert. +-/ +theorem transfer_insufficient + (sender recipient : Address) (amount : Uint256) (s : ContractState) + (hFrom : (sender != zeroAddress) = true) + (hTo : (recipient != zeroAddress) = true) + (hInit : s.storageMap 2 sender ≠ 0) + (hDistinct : sender ≠ recipient) + (hInsufficient : ¬(s.storageMap 1 sender >= amount)) + (hAmount64 : amount < UINT64_MOD) + (hFromBal64 : s.storageMap 1 sender < UINT64_MOD) + (hToBal64 : s.storageMap 1 recipient < UINT64_MOD) : + let s' := ((ERC7984.transfer sender recipient amount).run s).snd + transfer_insufficient_spec sender recipient amount s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold transfer_insufficient_spec + grind [ERC7984.transfer, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators] + +end Benchmark.Cases.Zama.ERC7984ConfidentialToken diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferNoBalanceRevert.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferNoBalanceRevert.lean new file mode 100644 index 00000000..f90273f7 --- /dev/null +++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferNoBalanceRevert.lean @@ -0,0 +1,39 @@ +import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken + +open Verity +open Verity.EVM.Uint256 + +/-- +Transfer never reverts based on balance sufficiency. + +Given that all plaintext preconditions hold (non-zero addresses, +initialized sender balance), the transfer always succeeds — it +returns `ContractResult.success`, never `ContractResult.revert`. + +This is the contract-level non-leakage invariant for ERC-7984: +an on-chain observer cannot learn whether the sender had sufficient +balance by checking if the transaction reverted. + +Note: NO hypothesis about `fromBalance >= amount` is provided. +The theorem must hold for BOTH sufficient and insufficient balances. +-/ +theorem transfer_no_balance_revert + (sender recipient : Address) (amount : Uint256) (s : ContractState) + (hFrom : (sender != zeroAddress) = true) + (hTo : (recipient != zeroAddress) = true) + (hInit : s.storageMap 2 sender ≠ 0) + (hDistinct : sender ≠ recipient) + (hAmount64 : amount < UINT64_MOD) + (hFromBal64 : s.storageMap 1 sender < UINT64_MOD) + (hToBal64 : s.storageMap 1 recipient < UINT64_MOD) : + transfer_no_balance_revert_spec sender recipient amount s := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold transfer_no_balance_revert_spec + grind + +end Benchmark.Cases.Zama.ERC7984ConfidentialToken diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferPreservesSupply.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferPreservesSupply.lean new file mode 100644 index 00000000..d6b43503 --- /dev/null +++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferPreservesSupply.lean @@ -0,0 +1,32 @@ +import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken + +open Verity +open Verity.EVM.Uint256 + +/-- +Transfer does not modify totalSupply. + +The transfer function only writes to balances (storageMap slot 1) and +balanceInitialized (storageMap slot 2). It never touches slot 0 (totalSupply). +Only mint and burn paths modify totalSupply. +-/ +theorem transfer_preserves_supply + (sender recipient : Address) (amount : Uint256) (s : ContractState) + (hFrom : (sender != zeroAddress) = true) + (hTo : (recipient != zeroAddress) = true) + (hInit : s.storageMap 2 sender ≠ 0) + (hAmount64 : amount < UINT64_MOD) + (hFromBal64 : s.storageMap 1 sender < UINT64_MOD) + (hToBal64 : s.storageMap 1 recipient < UINT64_MOD) : + let s' := ((ERC7984.transfer sender recipient amount).run s).snd + transfer_preserves_supply_spec s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold transfer_preserves_supply_spec + grind [ERC7984.transfer, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators] + +end Benchmark.Cases.Zama.ERC7984ConfidentialToken diff --git a/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferSufficient.lean b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferSufficient.lean new file mode 100644 index 00000000..f0c775ab --- /dev/null +++ b/Benchmark/GeneratedPreview/Zama/ERC7984ConfidentialToken/Tasks/TransferSufficient.lean @@ -0,0 +1,34 @@ +import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs +import Benchmark.Grindset + +namespace Benchmark.Cases.Zama.ERC7984ConfidentialToken + +open Verity +open Verity.EVM.Uint256 + +/-- +When the sender has sufficient balance, transfer moves exactly `amount` tokens. + +If `balances[from] >= amount`, then: +- `balances[from]` decreases by `amount` +- `balances[to]` increases by `amount` (mod 2^64) +-/ +theorem transfer_sufficient + (sender recipient : Address) (amount : Uint256) (s : ContractState) + (hFrom : (sender != zeroAddress) = true) + (hTo : (recipient != zeroAddress) = true) + (hInit : s.storageMap 2 sender ≠ 0) + (hDistinct : sender ≠ recipient) + (hSufficient : s.storageMap 1 sender >= amount) + (hAmount64 : amount < UINT64_MOD) + (hFromBal64 : s.storageMap 1 sender < UINT64_MOD) + (hToBal64 : s.storageMap 1 recipient < UINT64_MOD) : + let s' := ((ERC7984.transfer sender recipient amount).run s).snd + transfer_sufficient_spec sender recipient amount s s' := by + -- Grindset-first skeleton. See harness/PROOF_PATTERNS.md. + -- Try `grind` with contract symbol hints; fall back to `simp` / + -- `by_cases` if grind leaves goals. Use `grind?` for hints. + unfold transfer_sufficient_spec + grind [ERC7984.transfer, ERC7984.totalSupply, ERC7984.balances, ERC7984.balanceInitialized, ERC7984.operators] + +end Benchmark.Cases.Zama.ERC7984ConfidentialToken diff --git a/Benchmark/Grindset.lean b/Benchmark/Grindset.lean new file mode 100644 index 00000000..fed535f1 --- /dev/null +++ b/Benchmark/Grindset.lean @@ -0,0 +1,28 @@ +import Benchmark.Grindset.Invariants +import Benchmark.Grindset.Reach +import Benchmark.Grindset.Attr +import Benchmark.Grindset.Monad +import Benchmark.Grindset.Core +import Benchmark.Grindset.Tests +import Benchmark.Grindset.Arith + +/-! +# Benchmark.Grindset — umbrella module + +Single entry point for the Verity grindset. Downstream proofs can write +`import Benchmark.Grindset` and immediately use `grind` to discharge +slot-write, monad-bind, and spec-unfolding obligations. + +Contents: +- `Grindset.Attr` (S1): `grind_norm` simp set attribute. +- `Grindset.Monad` (S1): `Verity.bind` / `ContractResult.snd` / `Contract.run` + normalization lemmas. +- `Grindset.Core` (S1): storage + mapping operational lemmas. +- `Grindset.Tests` (S1): three demo proofs closed by `grind`. +- `Grindset.Invariants` (A1): 118 `@[grind =] / @[grind →] / @[grind]` + tagged invariant lemmas across all benchmark contracts. +- `Grindset.Reach` (A3): reachability lemma pack and the + `verity_reach_grind` tactic for `safe/owner_manager_reach` chain proofs. +- `Grindset.Arith` (A4): arithmetic grind pack for `lido/vaulthub_locked` + — ceilDiv unfolding, sandwich, monotonicity, Uint256↔Nat wrappers. +-/ diff --git a/Benchmark/Grindset/Arith.lean b/Benchmark/Grindset/Arith.lean new file mode 100644 index 00000000..1bed835e --- /dev/null +++ b/Benchmark/Grindset/Arith.lean @@ -0,0 +1,236 @@ +/- + Benchmark.Grindset.Arith — arithmetic grind pack for Lido VaulthubLocked. + + Mission A4: provide `@[grind]` / `@[simp]` / `@[grind_norm]`-tagged lemmas + that help `grind` and `omega` close the three supporting arithmetic obligations + in the `lido/vaulthub_locked` case: + + 1. `ceildiv_sandwich_spec` — ceilDiv(x,d) * d ≥ x + 2. `shares_conversion_monotone_spec` — getPooledEthBySharesRoundUp is monotone + 3. `locked_funds_solvency_spec` — solvency after syncLocked + + Lemma inventory: + • `mul_val_of_no_overflow` — Uint256 mul → Nat mul under overflow guard + • `sub_val_of_le` — Uint256 sub → Nat sub when b ≤ a + • `div_val` — Uint256 div → Nat div when b ≠ 0 + • `add_val_of_no_overflow` — Uint256 add → Nat add under overflow guard + • `ceilDiv_val_eq` — ceilDiv a b = (a.val + b.val - 1) / b.val (Nat level) + • `ceilDiv_le_numerator` — ceilDiv a b ≤ a (Nat-val level) + • `ceilDiv_mul_ge` — ceilDiv(x,d) * d ≥ x (the sandwich, key lemma) + • `ceilDiv_monotone` — a ≥ b → ceilDiv a d ≥ ceilDiv b d + + All lemmas carry `@[grind_norm, simp]` so that downstream proofs can + write `simp only [grind_norm, ]; grind` or `omega`. + + Status: zero `sorry`, zero new axioms. +-/ + +import Benchmark.Cases.Lido.VaulthubLocked.Specs +import Benchmark.Grindset.Attr + +namespace Benchmark.Grindset.Arith + +open Verity +open Benchmark.Cases.Lido.VaulthubLocked + +/-! ## Uint256 → Nat wrapper lemmas -/ + +/-- Uint256 multiplication reduces to Nat multiplication when no overflow. -/ +@[grind_norm, simp] +theorem mul_val_of_no_overflow (a b : Uint256) + (h : a.val * b.val < Verity.Core.Uint256.modulus) : + (Verity.EVM.Uint256.mul a b).val = a.val * b.val := by + simp [HMul.hMul, Verity.Core.Uint256.mul, Verity.Core.Uint256.ofNat] + exact Nat.mod_eq_of_lt h + +/-- Uint256 subtraction reduces to Nat subtraction when b ≤ a. -/ +@[grind_norm, simp] +theorem sub_val_of_le (a b : Uint256) + (h : b.val ≤ a.val) : + (Verity.EVM.Uint256.sub a b).val = a.val - b.val := by + have hlt : a.val - b.val < Verity.Core.Uint256.modulus := + Nat.lt_of_le_of_lt (Nat.sub_le _ _) a.isLt + simp [HSub.hSub, Verity.Core.Uint256.sub, h, Verity.Core.Uint256.ofNat] + exact Nat.mod_eq_of_lt hlt + +/-- Uint256 division reduces to Nat division when divisor is nonzero. -/ +@[grind_norm, simp] +theorem div_val (a b : Uint256) (hb : b.val ≠ 0) : + (Verity.EVM.Uint256.div a b).val = a.val / b.val := by + have hlt : a.val / b.val < Verity.Core.Uint256.modulus := + Nat.lt_of_le_of_lt (Nat.div_le_self _ _) a.isLt + simp [HDiv.hDiv, Verity.Core.Uint256.div, hb, Verity.Core.Uint256.ofNat] + exact Nat.mod_eq_of_lt hlt + +/-- Uint256 addition reduces to Nat addition when no overflow. -/ +@[grind_norm, simp] +theorem add_val_of_no_overflow (a b : Uint256) + (h : a.val + b.val < Verity.Core.Uint256.modulus) : + (Verity.EVM.Uint256.add a b).val = a.val + b.val := by + simp [HAdd.hAdd, Verity.Core.Uint256.add, Verity.Core.Uint256.ofNat] + exact Nat.mod_eq_of_lt h + +/-! ## ceilDiv val-level unfolding -/ + +/-- Natural-number identity: for a > 0, b > 0, (a-1)/b + 1 = (a+b-1)/b. -/ +private theorem ceildiv_identity (a b : Nat) (ha : a > 0) (hb : b > 0) : + (a - 1) / b + 1 = (a + b - 1) / b := by + have h : a + b - 1 = (a - 1) + b := by omega + rw [h, Nat.add_div_right _ hb] + +/-- Nat-level: (a+b-1)/b ≤ a when b ≥ 1. -/ +private theorem ceilDiv_nat_le (a b : Nat) (hb : b ≥ 1) : + (a + b - 1) / b ≤ a := by + by_cases ha : a = 0 + · subst ha; simp + right; exact Nat.sub_lt (by omega) (by decide) + · have haPos : a > 0 := Nat.pos_of_ne_zero ha + have hRw : a + b - 1 = (a - 1) + b := by omega + rw [hRw, Nat.add_div_right _ (by omega : b > 0)] + have := Nat.div_le_self (a - 1) b; omega + +/-- ceilDiv(a,b).val = (a.val + b.val - 1) / b.val when b > 0. -/ +@[grind_norm, simp] +theorem ceilDiv_val_eq (a b : Uint256) (hb : b.val > 0) : + (ceilDiv a b).val = (a.val + b.val - 1) / b.val := by + by_cases ha : a.val = 0 + · -- a = 0 case + have haEq : a = 0 := Verity.Core.Uint256.ext (by simp [ha, Verity.Core.Uint256.val_zero]) + rw [haEq] + simp only [ceilDiv, ↓reduceIte, Verity.Core.Uint256.val_zero, Nat.zero_add] + exact (Nat.div_eq_of_lt (by omega)).symm + · -- a > 0 case + have haPos : a.val > 0 := Nat.pos_of_ne_zero ha + have haNe : a ≠ 0 := by + intro h; rw [h] at haPos; simp [Verity.Core.Uint256.val_zero] at haPos + simp only [ceilDiv, haNe, ↓reduceIte] + -- sub a 1 + have h1le : (1 : Uint256).val ≤ a.val := by + simp [Verity.Core.Uint256.val_one]; omega + have hSubVal : (Verity.EVM.Uint256.sub a 1).val = a.val - 1 := by + have := Verity.Core.Uint256.sub_eq_of_le h1le + simp [Verity.Core.Uint256.val_one] at this + exact this + -- div (sub a 1) b + have hbne : b.val ≠ 0 := by omega + have hDivVal : (Verity.EVM.Uint256.div (Verity.EVM.Uint256.sub a 1) b).val = (a.val - 1) / b.val := by + simp only [HDiv.hDiv, Verity.Core.Uint256.div, hbne, ↓reduceIte, Verity.Core.Uint256.ofNat, hSubVal] + have hDivLt : (a.val - 1) / b.val < Verity.Core.Uint256.modulus := by + calc (a.val - 1) / b.val ≤ a.val - 1 := Nat.div_le_self _ _ + _ < a.val := by omega + _ < Verity.Core.Uint256.modulus := a.isLt + exact Nat.mod_eq_of_lt hDivLt + -- add (div ...) 1 + have hAddLt : (a.val - 1) / b.val + 1 < Verity.Core.Uint256.modulus := by + have hCeil := ceilDiv_nat_le a.val b.val (by omega) + calc (a.val - 1) / b.val + 1 + ≤ a.val := by rw [ceildiv_identity a.val b.val haPos hb]; exact hCeil + _ < Verity.Core.Uint256.modulus := a.isLt + simp only [HAdd.hAdd, Verity.Core.Uint256.add, Verity.Core.Uint256.ofNat, hDivVal, + Verity.Core.Uint256.val_one] + rw [Nat.mod_eq_of_lt hAddLt] + exact ceildiv_identity a.val b.val haPos hb + +/-- ceilDiv(a,b) ≤ a (Nat val level) when b ≥ 1. -/ +@[grind_norm, simp] +theorem ceilDiv_le_numerator (a b : Uint256) (hb : b.val ≥ 1) : + (ceilDiv a b).val ≤ a.val := by + rw [ceilDiv_val_eq a b (by omega)] + exact ceilDiv_nat_le a.val b.val hb + +/-! ## The sandwich: ceilDiv(x,d) * d ≥ x -/ + +/-- ceilDiv(x,d) * d ≥ x when the product does not overflow. Core sandwich lemma. -/ +@[grind_norm, simp] +theorem ceilDiv_mul_ge (x d : Uint256) (hd : d.val > 0) + (hNoOverflow : (ceilDiv x d).val * d.val < Verity.Core.Uint256.modulus) : + (Verity.EVM.Uint256.mul (ceilDiv x d) d).val ≥ x.val := by + have hMulEq : (Verity.EVM.Uint256.mul (ceilDiv x d) d).val = (ceilDiv x d).val * d.val := by + simp [HMul.hMul, Verity.Core.Uint256.mul, Verity.Core.Uint256.ofNat] + exact Nat.mod_eq_of_lt hNoOverflow + rw [hMulEq, ceilDiv_val_eq x d hd] + let q := (x.val + d.val - 1) / d.val + let r := (x.val + d.val - 1) % d.val + show x.val ≤ q * d.val + have hEuclid : d.val * q + r = x.val + d.val - 1 := Nat.div_add_mod .. + have hRem : r < d.val := Nat.mod_lt _ hd + have hComm : q * d.val = d.val * q := Nat.mul_comm q d.val + omega + +/-! ## Monotonicity of ceilDiv in the numerator -/ + +/-- ceilDiv is monotone in the numerator: a ≥ b → ceilDiv a d ≥ ceilDiv b d. -/ +@[grind_norm, simp] +theorem ceilDiv_monotone (a b d : Uint256) (hd : d.val > 0) + (hab : a.val ≥ b.val) : + (ceilDiv a d).val ≥ (ceilDiv b d).val := by + rw [ceilDiv_val_eq a d hd, ceilDiv_val_eq b d hd] + exact Nat.div_le_div_right (by omega) + +/-! ## Spec-level convenience lemmas -/ + +/-- ceildiv_sandwich_spec stated directly for grind consumption. -/ +@[grind_norm, simp] +theorem ceildiv_sandwich_spec_holds (x d : Uint256) + (hd : d > 0) + (hNoOverflow : (ceilDiv x d).val * d.val < Verity.Core.Uint256.modulus) : + ceildiv_sandwich_spec x d := by + unfold ceildiv_sandwich_spec + intro _ _ + simp [Verity.Core.Uint256.le_def] + exact ceilDiv_mul_ge x d (by simp [Verity.Core.Uint256.lt_def] at hd; exact hd) hNoOverflow + +/-- shares_conversion_monotone_spec stated directly for grind consumption. -/ +@[grind_norm, simp] +theorem shares_conversion_monotone_spec_holds + (a b totalPooledEther totalShares : Uint256) + (hTS : totalShares.val > 0) + (hNoOverflow : a.val * totalPooledEther.val < Verity.Core.Uint256.modulus) : + shares_conversion_monotone_spec a b totalPooledEther totalShares := by + unfold shares_conversion_monotone_spec + intro hab hNoOv + unfold getPooledEthBySharesRoundUp + simp [Verity.Core.Uint256.le_def] + have habVal : b.val ≤ a.val := by + simp [Verity.Core.Uint256.le_def] at hab; exact hab + have hBNoOverflow : b.val * totalPooledEther.val < Verity.Core.Uint256.modulus := + Nat.lt_of_le_of_lt (Nat.mul_le_mul_right _ habVal) hNoOverflow + have hMulA : (Verity.EVM.Uint256.mul a totalPooledEther).val = a.val * totalPooledEther.val := by + simp [HMul.hMul, Verity.Core.Uint256.mul, Verity.Core.Uint256.ofNat] + exact Nat.mod_eq_of_lt hNoOverflow + have hMulB : (Verity.EVM.Uint256.mul b totalPooledEther).val = b.val * totalPooledEther.val := by + simp [HMul.hMul, Verity.Core.Uint256.mul, Verity.Core.Uint256.ofNat] + exact Nat.mod_eq_of_lt hBNoOverflow + rw [ceilDiv_val_eq (Verity.EVM.Uint256.mul a totalPooledEther) totalShares hTS, + ceilDiv_val_eq (Verity.EVM.Uint256.mul b totalPooledEther) totalShares hTS, + hMulA, hMulB] + exact Nat.div_le_div_right (by + have : b.val * totalPooledEther.val ≤ a.val * totalPooledEther.val := + Nat.mul_le_mul_right _ habVal + omega) + +/-! ## Demo theorems -/ + +/-- Demo: ceildiv_sandwich_spec is closable with the grindset. -/ +theorem demo_ceildiv_sandwich (x d : Uint256) + (hd : d > 0) + (hNoOverflow : (ceilDiv x d).val * d.val < Verity.Core.Uint256.modulus) : + ceildiv_sandwich_spec x d := + ceildiv_sandwich_spec_holds x d hd hNoOverflow + +/-- Demo: shares_conversion_monotone_spec is closable with the grindset. -/ +theorem demo_shares_conversion_monotone + (a b totalPooledEther totalShares : Uint256) + (hTS : totalShares.val > 0) + (hNoOverflow : a.val * totalPooledEther.val < Verity.Core.Uint256.modulus) : + shares_conversion_monotone_spec a b totalPooledEther totalShares := + shares_conversion_monotone_spec_holds a b totalPooledEther totalShares hTS hNoOverflow + +/-- Demo: ceilDiv_mul_ge directly yields the sandwich inequality. -/ +theorem demo_sandwich_direct (x d : Uint256) + (hd : d.val > 0) + (hNoOverflow : (ceilDiv x d).val * d.val < Verity.Core.Uint256.modulus) : + (Verity.EVM.Uint256.mul (ceilDiv x d) d).val ≥ x.val := + ceilDiv_mul_ge x d hd hNoOverflow + +end Benchmark.Grindset.Arith diff --git a/Benchmark/Grindset/Attr.lean b/Benchmark/Grindset/Attr.lean new file mode 100644 index 00000000..0f272e88 --- /dev/null +++ b/Benchmark/Grindset/Attr.lean @@ -0,0 +1,26 @@ +/- + Benchmark.Grindset.Attr — registers the `grind_norm` simp attribute. + + Kept in a separate file because Lean 4 does not allow using an attribute in + the same file where it is registered. +-/ + +import Lean.Meta.Tactic.Simp.SimpTheorems +import Lean.Meta.Tactic.Simp.RegisterCommand + +/-- Simp set for the Verity grindset. Unfolds the `Contract` monad + scaffolding (`bind`, `pure`, `Contract.run`, `ContractResult.snd`, + `ContractResult.fst`) and the primitive `*_run` reductions so that a + benchmark task goal of shape + + ((Contract.f args).run s).snd.storage n = v + + collapses to plain record-update reasoning over `s`. Usage: + + ``` + simp only [grind_norm] + ``` + + Members are registered across `Benchmark.Grindset.Monad` and + `Benchmark.Grindset.Core`. -/ +register_simp_attr grind_norm diff --git a/Benchmark/Grindset/Core.lean b/Benchmark/Grindset/Core.lean new file mode 100644 index 00000000..e61e1771 --- /dev/null +++ b/Benchmark/Grindset/Core.lean @@ -0,0 +1,214 @@ +/- + Benchmark.Grindset.Core — operational lemmas tagged for `grind`. + + The lemmas here are the stock facts needed to close a slot-write / + spec-unfolding obligation in one line once the monadic scaffolding has been + collapsed (see `Benchmark.Grindset.Monad`). They rewrite the shape + + { s with storage := fun k => if k == slot then v else s.storage k }.storage n + + into either `v` (when `n = slot`) or `s.storage n` (when `n ≠ slot`). The + same pattern is covered for `storageMap`, `storageAddr`, and the mapping + variants. + + Every lemma in this module carries both `@[simp]` and `@[grind_norm]`. A + couple of fully-ground forms also carry `@[grind =]`. + + Status: zero `sorry`, zero new axioms. +-/ + +import Verity.Core +import Benchmark.Grindset.Monad + +namespace Benchmark.Grindset + +open Verity + +/-! ## Uint256 slot storage -/ + +/-- Reading the slot just written returns the written value. -/ +@[grind_norm, simp] +theorem storage_setStorage_eq + (s : ContractState) (slot : Nat) (v : Uint256) : + ({ s with + storage := fun k => if k == slot then v else s.storage k } : ContractState).storage slot + = v := by + simp + +/-- Reading a different slot from a `setStorage`-style update ignores the + update. -/ +@[grind_norm, simp] +theorem storage_setStorage_ne + (s : ContractState) (slot n : Nat) (v : Uint256) (h : n ≠ slot) : + ({ s with + storage := fun k => if k == slot then v else s.storage k } : ContractState).storage n + = s.storage n := by + have : (n == slot) = false := by + simpa [Nat.beq_eq_true_eq] using h + simp [this] + +/-! ## Address slot storage -/ + +@[grind_norm, simp] +theorem storageAddr_setStorageAddr_eq + (s : ContractState) (slot : Nat) (v : Address) : + ({ s with + storageAddr := fun k => if k == slot then v else s.storageAddr k } : ContractState).storageAddr slot + = v := by + simp + +@[grind_norm, simp] +theorem storageAddr_setStorageAddr_ne + (s : ContractState) (slot n : Nat) (v : Address) (h : n ≠ slot) : + ({ s with + storageAddr := fun k => if k == slot then v else s.storageAddr k } : ContractState).storageAddr n + = s.storageAddr n := by + have : (n == slot) = false := by + simpa [Nat.beq_eq_true_eq] using h + simp [this] + +/-! ## Mapping storage (Address → Uint256) -/ + +@[grind_norm, simp] +theorem storageMap_setMapping_eq + (s : ContractState) (slot : Nat) (key : Address) (v : Uint256) : + ({ s with + storageMap := fun sl addr => + if sl == slot && addr == key then v else s.storageMap sl addr, + knownAddresses := fun sl => + if sl == slot then (s.knownAddresses sl).insert key + else s.knownAddresses sl } : ContractState).storageMap slot key + = v := by + simp + +/-- Writing `setMapping` at `(slot, key)` and reading the same slot at a + different key yields the pre-state value at that key. -/ +@[grind_norm, simp] +theorem storageMap_setMapping_ne_key + (s : ContractState) (slot : Nat) (key key' : Address) (v : Uint256) + (h : key' ≠ key) : + ({ s with + storageMap := fun sl addr => + if sl == slot && addr == key then v else s.storageMap sl addr, + knownAddresses := fun sl => + if sl == slot then (s.knownAddresses sl).insert key + else s.knownAddresses sl } : ContractState).storageMap slot key' + = s.storageMap slot key' := by + have : (key' == key) = false := by + simpa [beq_iff_eq] using h + simp [this] + +@[grind_norm, simp] +theorem storageMap_setMapping_ne_slot + (s : ContractState) (slot n : Nat) (key key' : Address) (v : Uint256) + (h : n ≠ slot) : + ({ s with + storageMap := fun sl addr => + if sl == slot && addr == key then v else s.storageMap sl addr, + knownAddresses := fun sl => + if sl == slot then (s.knownAddresses sl).insert key + else s.knownAddresses sl } : ContractState).storageMap n key' + = s.storageMap n key' := by + have : (n == slot) = false := by + simpa [Nat.beq_eq_true_eq] using h + simp [this] + +/-! +## Specialised helper for the "set-mapping-under-sender" pattern + +Every bench task that uses a mapping keyed by `s.sender` reads back the +mapping at `s.sender` afterwards. This specialised rewrite collapses the +pattern in a single step. -/ + +@[grind_norm, simp] +theorem storageMap_setMapping_sender_eq + (s : ContractState) (slot : Nat) (v : Uint256) : + ({ s with + storageMap := fun sl addr => + if sl == slot && addr == s.sender then v else s.storageMap sl addr, + knownAddresses := fun sl => + if sl == slot then (s.knownAddresses sl).insert s.sender + else s.knownAddresses sl } : ContractState).storageMap slot s.sender + = v := by + simp + +/-! +## `sender` is preserved by every primitive storage write. + +These are implicit record-update facts, but tagging them means `simp` does +not have to fight the elaborator to see that the final state's `.sender` +field is still the original `.sender`. -/ + +@[grind_norm, simp] +theorem sender_after_setStorage + (s : ContractState) (slot : Nat) (v : Uint256) : + ({ s with + storage := fun k => if k == slot then v else s.storage k } : ContractState).sender + = s.sender := rfl + +@[grind_norm, simp] +theorem sender_after_setMapping + (s : ContractState) (slot : Nat) (key : Address) (v : Uint256) : + ({ s with + storageMap := fun sl addr => + if sl == slot && addr == key then v else s.storageMap sl addr, + knownAddresses := fun sl => + if sl == slot then (s.knownAddresses sl).insert key + else s.knownAddresses sl } : ContractState).sender + = s.sender := rfl + +@[grind_norm, simp] +theorem sender_after_setStorageAddr + (s : ContractState) (slot : Nat) (v : Address) : + ({ s with + storageAddr := fun k => if k == slot then v else s.storageAddr k } : ContractState).sender + = s.sender := rfl + +/-! +## Cross-type preservation — reading `storage` after a mapping write, etc. + +These are trivial by `rfl`, but they help `simp`/`grind` traverse +multi-write contracts without getting lost in record syntax. -/ + +@[grind_norm, simp] +theorem storage_after_setMapping + (s : ContractState) (n slot : Nat) (key : Address) (v : Uint256) : + ({ s with + storageMap := fun sl addr => + if sl == slot && addr == key then v else s.storageMap sl addr, + knownAddresses := fun sl => + if sl == slot then (s.knownAddresses sl).insert key + else s.knownAddresses sl } : ContractState).storage n + = s.storage n := rfl + +@[grind_norm, simp] +theorem storageMap_after_setStorage + (s : ContractState) (slot n : Nat) (v : Uint256) (addr : Address) : + ({ s with + storage := fun k => if k == slot then v else s.storage k } : ContractState).storageMap n addr + = s.storageMap n addr := rfl + +/-! ## `require` reductions tied to a hypothesis -/ + +/-- When the condition of `require` is definitely `true`, the monadic step + reduces to `pure ()`. Useful for branch-heavy contracts where the + precondition fires a `require`. -/ +@[grind_norm, simp] +theorem require_of_true_run (s : ContractState) (msg : String) : + (require true msg).run s = ContractResult.success () s := rfl + +@[grind_norm, simp] +theorem require_of_false_run (s : ContractState) (msg : String) : + (require false msg).run s = ContractResult.revert msg s := rfl + +/-! +## `StorageSlot` slot-projection equalities + +The macro-generated storage field identifiers (e.g. `SideEntrance.poolBalance`) +are `StorageSlot`s whose `.slot` literal is the slot number. -/ + +@[grind_norm, simp] +theorem StorageSlot.slot_mk (n : Nat) : + ({ slot := n } : StorageSlot Uint256).slot = n := rfl + +end Benchmark.Grindset diff --git a/Benchmark/Grindset/INVARIANTS_AUDIT.md b/Benchmark/Grindset/INVARIANTS_AUDIT.md new file mode 100644 index 00000000..6fbee764 --- /dev/null +++ b/Benchmark/Grindset/INVARIANTS_AUDIT.md @@ -0,0 +1,431 @@ +# Mission A1 — Verity Invariants / Spec Helpers Grind Audit + +**Author:** grindset-a1-worker +**Scope:** read-only audit of `Verity` library (`.lake/packages/verity/Verity/**`) and case-local +`Benchmark/Cases/**/Specs.lean`. Goal: identify **invariant-style lemmas and domain predicates** +worth exposing to the `grind` tactic via `attribute [grind …]`, complementary to sibling worker S1 +(who is tagging core operational primitives in `Benchmark/Grindset`). + +**Ground rules followed:** + +- No file under `.lake/packages/verity/**` was modified. +- No `Benchmark/Cases/**/Proofs.lean` was opened. +- `Benchmark/Cases/**/Specs.lean` **content** was not modified; tags are applied solely via + `attribute [grind …] Benchmark.Cases.…` in `Benchmark/Grindset/Invariants.lean`. +- Grind is orthogonal to simp: tagging a `@[simp]` lemma with `[grind =]` is not a double-tag + conflict (they feed different automation pipes). However, we are conservative: for ubiquitous + already-simp lemmas whose shape is a trivial identity (e.g. `mem_def : a ∈ s ↔ a ∈ s.elements`) + we skip the extra `grind` tag because simp + basic grind reasoning already normalize them. + +## Legend + +| Attribute form | Meaning | +|---|---| +| `@[grind]` | Default bundle — equations as bidirectional rewrites, implications as match rules. Only safe for non-looping shapes. | +| `@[grind =]` | Equation, bidirectional — good for LHS = RHS where neither side contains the other's head pattern. | +| `@[grind →]` | Forward implication / directional — premise patterns match the hypotheses in the goal; conclusion is introduced. Use when backward direction would loop or introduces too many variables. | +| `@[grind ←]` | Backward — conclusion drives matching (useful for existentials and disjunctions). | +| `NOT TAGGED` | Deliberately left alone: E-match loop risk, overly specific preconditions, constant, or redundant with existing `@[simp]`. | + +## Executive summary + +Final numbers after the `lake build Benchmark.Grindset.Invariants` iteration loop. The initial +candidate list was trimmed twice when grind's E-matcher rejected tags (either because hypotheses +lacked matchable patterns, because the conclusion was a non-equality inequality incompatible +with `[grind =]`, or because the equation's LHS didn't mention every bound parameter). + +| Bucket | Scanned | Candidates surfaced | Tagged in `Invariants.lean` | Deliberately rejected / dropped | +|---|---|---|---|---| +| Verity core (Uint256 / FiniteSet / Address / Semantics) | ~1100 lines | 17 | **2** | 15 (already `@[simp]` or trivial rfl) | +| Verity Proofs.Stdlib.Math (ceil/floor div, wad, safe*) | 909 lines | 65 | **55** | 10 (commutativity → E-match loop traps; a handful of overly-specific shapes) | +| Verity Proofs.Stdlib.ListSum | 161 lines | 7 | **4** | 3 (`map_sum_point_update/decrease/transfer_eq` — LHS of equation doesn't mention bound `delta`/`src`/`dst`; grind refuses to register. Use manually via `grind [map_sum_transfer_eq]`.) | +| Verity Proofs.Stdlib.MappingAutomation | 371 lines | ~50 | **25** | ~25 (context-preservation lemmas covered or redundant; we cherry-pick the core shapes per mapping family) | +| Verity Specs.Common / Specs.Common.Sum | ~470 lines | 5 | **2** | 3 (`sumBalances_insert_new`, `sumBalances_update_existing`, `balancesFinite_preserved_deposit` — fresh parameters not covered by pattern LHS; use manually) | +| Case-local `Specs.lean` defs (predicates/accessors across 10 cases) | ~1200 lines | 22 definitions worth unfolding | **17** | 5 (loop risk — `acyclic`, `freshInList`, `reachable`, multi-branch `calculateBuyReserve/SellReserve`, `spotPrices`) | +| **Totals** | | **~166 candidates** | **118 tagged** | **48 rejected / dropped** | + +**Tag-kind breakdown:** 49 × `[grind =]`, 48 × `[grind →]`, 21 × `[grind]` +(plain — for δ-unfold on case `def`s and for the 4 mulDivDown inequality lemmas whose +conclusions are `≤` / `<` rather than `=`). + +### Top 5 most impactful tagged invariants (by expected obligation coverage) + +1. **`Verity.Proofs.Stdlib.MappingAutomation.setMapping{,Uint,2}_getMapping{,Uint,2}_same`** — + store-load identity across all three mapping families (Addr→Uint256, Uint256→Uint256, + Addr→Addr→Uint256). Every case with an obligation of the form "after setting mapping[k] := v, + reading mapping[k] = v" reduces to one of these three shapes. All tagged `[grind =]`. +2. **`Verity.Proofs.Stdlib.MappingAutomation.setMapping{,Uint,2}_getMapping{,Uint,2}_diff*`** — + cross-key non-interference. Paired with (1), these form the "mapping core" that drives the + bulk of post-write state reasoning. Tagged `[grind =]` (the `≠` antecedent lacks an extractable + pattern for `→`, but the conclusion still rewrites). +3. **`Verity.Specs.Common.sumBalances_insert_existing` & `sumBalances_zero_of_all_zero`** — + the two sum-preservation identities whose LHS captures every bound parameter. + Directly usable by ERC20/ERC7984 balance-conservation obligations. +4. **`Verity.Proofs.Stdlib.Math.mulDivUp_mul_ge` / `wDivUp_mul_ge`** — `a * b ≤ mulDivUp a b c * c` + and `a * WAD ≤ wDivUp a b * b`. The "ceiling multiplies back up" sandwich used by Lido's + `locked_funds_solvency_spec`, NexusMutual price-band monotonicity, and Morpho-style + collateralization. Tagged `[grind →]`. +5. **Case-local `Benchmark.Cases.Safe.OwnerManagerReach.{next,isOwner,ownerListInvariant,isChain,inListReachable}`** — + all tagged plain `[grind]` so grind unfolds them opportunistically. Safe/OwnerManager proofs + hinge on unfolding `next` to a `storageMap 0 a` read and peeling `isChain`/`ownerListInvariant`. + Without these, grind cannot see the reachability structure. + +--- + +## Part I — Verity core library (read-only) + +### I.1 `Verity/Core/Uint256.lean` + +Almost every algebraic lemma (`add_comm`, `add_assoc`, `mul_comm`, `mul_one`, `sub_self`, +`sub_add_cancel_left`, `zero_add`, …) is already `@[simp]`. Tagging them with `grind` again would +be redundant noise. **Skipped.** + +| Lemma | Line | Shape | Existing attr | Grind decision | +|---|---|---|---|---| +| `add_comm`, `add_assoc`, `add_left_comm`, `zero_add`, `add_zero` | 198-262 | `+` identities | `@[simp]` | SKIP (simp already normalizes) | +| `sub_zero`, `sub_self`, `sub_add_cancel_left` | 269-357 | `-` identities | `@[simp]` | SKIP | +| `mul_comm`, `mul_one`, `one_mul`, `zero_mul`, `mul_zero`, `add_mul` | 289-339 | `*` identities | `@[simp]` | SKIP | +| `div_one`, `zero_div` | 412-425 | `/` identities | `@[simp]` | SKIP | +| **`sub_add_cancel`** (line **538**) | 538 | `(a + b) - b = a` | (none) | **`[grind =]`** — directly cancels the common Uint256 wrap-sub shape that simp sometimes misses because of normal-form ordering. | +| `add_right_cancel` | 549 | `a + c = b + c → a = b` | (none) | `[grind →]` — useful cancellation, forward-only to avoid grind trying to re-introduce `+ c` on both sides. | + +→ **2 tagged from Uint256.** (`sub_add_cancel` as `grind =`, `add_right_cancel` as `grind →`.) + +### I.2 `Verity/Core/FiniteSet.lean` + +Every `mem_insert / mem_inter / mem_union / mem_diff / mem_symmDiff / contains_eq_true / +contains_eq_false / isSubset_eq_{true,false}` is already `@[simp]`. These are pure `Iff` +definitions that simp handles perfectly; grind already invokes simp. **No additional tags.** + +One exception — `mem_elements_insert` (line 112) is **not** simp because on Lists it introduces a +head comparison. Since `FiniteAddressSet.mem_insert` (line 258) at the set level IS simp, we rely on +it in practice. **Skipped.** + +### I.3 `Verity/Core/Address.lean`, `Verity/Core/Semantics.lean`, `Verity/EVM/Uint256.lean` + +Scanned; almost entirely `def`s and `inductive`s. No plain lemmas beyond what already carries +`@[simp]`. **Nothing to tag.** + +### I.4 `Verity/Specs/Common.lean` + +Exclusively `*_rfl` lemmas that are already `@[simp]`. **Nothing to tag.** + +### I.5 `Verity/Specs/Common/Sum.lean` + +Five non-simp theorems — all **bona-fide invariants over `FiniteAddressSet`-indexed sums of +storage-mapping balances**. These are precisely the shapes balance-conservation obligations reduce +to. + +| Lemma | Line | Signature (abridged) | Category | Grind | +|---|---|---|---|---| +| `sumBalances_insert_existing` | 69 | `addr ∈ addrs → sumBalances slot (addrs.insert addr) b = sumBalances slot addrs b` | sum preserved by redundant insert | **`[grind →]`** (premise drives rewrite; reverse direction would lose info) | +| `sumBalances_insert_new` | 77 | `addr ∉ addrs → b slot addr = 0 → sumBalances slot (addrs.insert addr) (b[addr := amt]) = add (sumBalances slot addrs b) amt` | sum increment on fresh insert | **`[grind →]`** | +| `sumBalances_update_existing` | 179 | `addr ∈ addrs → sumBalances slot addrs (b[addr := new]) = add (sub (sumBalances slot addrs b) old) new` | sum delta on point-update | **`[grind →]`** | +| `sumBalances_zero_of_all_zero` | 212 | `(∀ a ∈ addrs, b slot a = 0) → sumBalances slot addrs b = 0` | zero-sum collapse | **`[grind →]`** | +| `balancesFinite_preserved_deposit` | 221 | `balancesFinite s → balancesFinite (…deposit state…)` | storage-set finiteness preservation | **`[grind →]`** | + +→ **5 tagged.** All directional because the preconditions (`addr ∈ addrs`, `addr ∉ addrs`, …) are +driving. + +### I.6 `Verity/Proofs/Stdlib/ListSum.lean` + +``` +countOcc_cons_eq, countOcc_cons_ne, countOccU_cons_eq, countOccU_cons_ne +map_sum_point_update, map_sum_point_decrease, map_sum_transfer_eq +``` + +The `countOcc*` recurrences: LHS `countOcc target (target :: rest)` unfolds to `1 + countOcc target +rest`. The RHS pattern is a strict sub-term of the LHS, so these are safe as `[grind =]`. + +The three big preservation theorems (`map_sum_point_{update,decrease}`, `map_sum_transfer_eq`) are +heavily-premised: they take pointwise hypotheses like `f' target = f target + delta` and +`∀ addr, addr ≠ target → f' addr = f addr`. For `grind`, tagging these as plain `@[grind]` would +make grind try to e-match on `(addrs.map ?f').sum` everywhere, which occurs **very** often and would +blow up backward search. We tag them as `[grind →]`: grind uses them forward once the pointwise +hypotheses are in context, which is the exact usage pattern in the benchmark proofs. + +| Lemma | Line | Shape | Grind | +|---|---|---|---| +| `countOcc_cons_eq` | 27 | `countOcc t (t :: rest) = 1 + countOcc t rest` | **`[grind =]`** | +| `countOcc_cons_ne` | 31 | `a ≠ t → countOcc t (a :: rest) = countOcc t rest` | **`[grind →]`** (conditional eq) | +| `countOccU_cons_eq` | 35 | Uint256 variant of above | **`[grind =]`** | +| `countOccU_cons_ne` | 39 | conditional Uint256 variant | **`[grind →]`** | +| `map_sum_point_update` | 58 | sum eq after pointwise add at target | **`[grind →]`** | +| `map_sum_point_decrease` | 85 | sum eq after pointwise sub at target | **`[grind →]`** | +| `map_sum_transfer_eq` | 117 | sum eq after transfer src → dst | **`[grind →]`** | + +→ **7 tagged.** + +### I.7 `Verity/Proofs/Stdlib/MappingAutomation.lean` — 40+ theorems, tag the core shapes + +This file is ~370 lines of `setX_getX_{same,diff}` and `setX_preserves_{storage,events,…}` for the +three mapping families (`Address → Uint256`, `Uint256 → Uint256`, `Address → Address → Uint256`), +plus `setStorage/setStorageAddr` cross-family preservations. + +**Rejected pattern — `setMapping_knownAddresses_*`**: these deal with a separate `knownAddresses` +field that only a subset of cases use; tagging them broadly would add grind noise for cases that +never touch it. + +**Tagged core shapes (`[grind =]` for the "same" identities, `[grind →]` for disequality-gated +"diff" / "preserves"):** + +| Lemma | Line | Shape | Grind | +|---|---|---|---| +| `getMapping_runValue` | 32 | `(getMapping slot key).runValue s = s.storageMap slot.slot key` | `[grind =]` | +| `setMapping_getMapping_same` | 52 | set-then-get-same-key → value | `[grind =]` | +| `setMapping_getMapping_diff` | 57 | `k₁ ≠ k₂ → get after set = original` | `[grind →]` | +| `setMapping_preserves_other_slot` | 66 | cross-slot preservation | `[grind →]` | +| `getMappingUint_runValue` | 110 | Uint256-keyed accessor | `[grind =]` | +| `setMappingUint_getMappingUint_same` | 125 | store-load identity | `[grind =]` | +| `setMappingUint_getMappingUint_diff` | 131 | disjoint-key preservation | `[grind →]` | +| `setMappingUint_preserves_storage` | 140 | cross-field preservation | `[grind →]` | +| `setMappingUint_preserves_storageAddr` | 146 | cross-field preservation | `[grind →]` | +| `setMappingUint_preserves_storageMap` | 152 | cross-field preservation | `[grind →]` | +| `setMappingUint_preserves_storageMap2` | 158 | cross-field preservation | `[grind →]` | +| `setMappingUint_preserves_sender` | 164 | context preservation | `[grind →]` | +| `setMappingUint_preserves_thisAddress` | 170 | context preservation | `[grind →]` | +| `getMapping2_runValue` | 189 | 2-key accessor | `[grind =]` | +| `setMapping2_getMapping2_same` | 204 | 2-key store-load identity | `[grind =]` | +| `setMapping2_getMapping2_diff_key1` | 210 | disjoint-key1 preservation | `[grind →]` | +| `setMapping2_getMapping2_diff_key2` | 219 | disjoint-key2 preservation | `[grind →]` | +| `setMapping2_preserves_storage` | 228 | cross-field | `[grind →]` | +| `setMapping2_preserves_storageAddr` | 234 | cross-field | `[grind →]` | +| `setMapping2_preserves_storageMap` | 240 | cross-field | `[grind →]` | +| `setMapping2_preserves_storageMapUint` | 246 | cross-field | `[grind →]` | +| `setMappingUint_preserves_events` | 360 | event preservation | `[grind →]` | +| `setMapping2_preserves_events` | 366 | event preservation | `[grind →]` | +| `setMapping_preserves_storageMapUint` | 314 | cross-family | `[grind →]` | +| `setMapping_preserves_storageMap2` | 320 | cross-family | `[grind →]` | + +→ **25 tagged** (the "same" equalities + "preserves" directionals; skipping `_msgValue / +_blockTimestamp / _blockNumber / _knownAddresses` which are adequately covered by a weaker set and +would duplicate the context-preservation cluster without adding coverage). + +### I.8 `Verity/Proofs/Stdlib/Math.lean` — 65 theorems + +Triage: + +- **`*_comm` (commutativity) lemmas** (`mulDivDown_comm`, `mulDivUp_comm`, `wMulDown_comm`, + `safeAdd_comm`, `safeMul_comm`): **NOT tagged as `[grind =]`** — commutativity rules under + e-matching can drive unbounded rewriting if the RHS normal form isn't fixed. These are + traditionally `@[simp]` in other libraries for AC-normalization, but here they are not simp. + Tagging them `[grind]` is an E-match loop trap. **Skipped.** + +- **`*_nat_eq` bridging lemmas** (`mulDivDown_nat_eq`, `mulDivUp_nat_eq`, `wMulDown_nat_eq`, + `wDivUp_nat_eq`): exact equality of Uint256 op with Nat op, gated by a "fits within MAX" hypothesis. + Tagged `[grind →]`: when grind has the fits-within hypothesis, it can substitute the Nat form. + +- **`*_zero_{left,right}` / `*_one_{left,right}` / `*_by_wad` / `*_by_one`**: clean identity + rewrites, tagged `[grind =]` when they have no preconditions, `[grind →]` when gated. + +- **Monotonicity / antitonicity** (`mulDivDown_monotone_left`, `mulDivUp_antitone_divisor`, + `wMulDown_monotone_*`, `wDivUp_monotone_left`, `wDivUp_antitone_right`): preconditions are + driving; tagged `[grind →]`. + +- **Bound lemmas** (`mulDivDown_mul_le`, `mulDivUp_mul_ge`, `mulDivDown_mul_lt_add`, + `mulDivUp_mul_lt_add`, `wMulDown_mul_le`, `wMulDown_mul_lt_add`, `wDivUp_mul_ge`, + `wDivUp_mul_lt_add`, `mulDivDown_le_mulDivUp`, `mulDivUp_le_mulDivDown_add_one`): tagged + `[grind →]` — pure inequalities, no LHS ↔ RHS. + +- **Cancellation lemmas** (`mulDivDown_cancel_{left,right}`, `mulDivUp_cancel_{left,right}`): + tagged `[grind →]` — cancellations are gated by `c ≠ 0` + fits-within; forward only. + +- **Exactness disjunction** (`mulDivUp_eq_mulDivDown_or_succ`): tagged `[grind →]` — grind will + case-split on the disjunction. + +- **Safe-op lemmas** (`safeAdd_{some,none,zero_left,zero_right,result_bounded}`, + `safeSub_{some,none,zero,self,result_le}`, `safeMul_{some,none,zero_left,zero_right,one_left,one_right,result_bounded}`, + `safeDiv_{some,none,zero_numerator,by_one,self,result_le_numerator}`): **tagged `[grind →]`** — + these discharge option-elimination of the safe ops when the overflow hypothesis is present. + +Concrete tagged list: + +| Lemma | Grind | +|---|---| +| `mulDivDown_nat_eq`, `mulDivUp_nat_eq`, `wMulDown_nat_eq`, `wDivUp_nat_eq` | `[grind →]` (4) | +| `mulDivDown_zero_left`, `mulDivDown_zero_right`, `mulDivUp_zero_left`, `mulDivUp_zero_right`, `wMulDown_zero_left`, `wMulDown_zero_right`, `wDivUp_zero` | `[grind =]` (7) | +| `wMulDown_one_left`, `wMulDown_one_right`, `wDivUp_by_wad` | `[grind →]` (3) — gated by fits-within | +| `mulDivDown_monotone_left/right`, `mulDivUp_monotone_left/right`, `wMulDown_monotone_left/right`, `wDivUp_monotone_left`, `wDivUp_antitone_right`, `mulDivDown_antitone_divisor`, `mulDivUp_antitone_divisor` | `[grind →]` (10) | +| `mulDivDown_mul_le`, `mulDivUp_mul_ge`, `mulDivDown_mul_lt_add`, `mulDivUp_mul_lt_add`, `wMulDown_mul_le`, `wMulDown_mul_lt_add`, `wDivUp_mul_ge`, `wDivUp_mul_lt_add`, `mulDivDown_le_mulDivUp`, `mulDivUp_le_mulDivDown_add_one` | `[grind →]` (10) | +| `mulDivUp_eq_mulDivDown_of_dvd`, `mulDivUp_eq_mulDivDown_add_one_of_not_dvd`, `mulDivUp_eq_mulDivDown_or_succ` | `[grind →]` (3) | +| `mulDivDown_cancel_left/right`, `mulDivUp_cancel_left/right` | `[grind →]` (4) — conditional cancellation | +| `mulDivDown_pos`, `mulDivUp_pos`, `wMulDown_pos`, `wDivUp_pos` | `[grind →]` (4) — positivity entailment | +| `safeAdd_some/none/zero_left/zero_right/result_bounded` | `[grind →]` (5) | +| `safeSub_some/none/zero/self/result_le` | `[grind →]` (5) | +| `safeMul_some/none/zero_left/zero_right/one_left/one_right/result_bounded` | `[grind →]` (7) | +| `safeDiv_some/none/zero_numerator/by_one/self/result_le_numerator` | `[grind →]` (6) | + +→ **~68 tagged** (approximately; exact count in `Invariants.lean`). + +**Deliberately skipped:** +- `safeAdd_comm`, `safeMul_comm`, `mulDivDown_comm`, `mulDivUp_comm`, `wMulDown_comm` — **E-match loop risk**. Grind + commutativity in a rewrite bundle leads to swapping back and forth. + +--- + +## Part II — Case-local `Specs.lean` + +Per-case namespace summary (all live under `Benchmark.Cases.*`): + +| Case file | Namespace(s) | +|---|---| +| `DamnVulnerableDeFi/SideEntrance/Specs.lean` | `Benchmark.Cases.DamnVulnerableDeFi.SideEntrance` | +| `Ethereum/DepositContractMinimal/Specs.lean` | `Benchmark.Cases.Ethereum.DepositContractMinimal` | +| `Kleros/SortitionTrees/Specs.lean` | `Benchmark.Cases.Kleros.SortitionTrees` | +| `Lido/VaulthubLocked/Specs.lean` | `Benchmark.Cases.Lido.VaulthubLocked` | +| `NexusMutual/RammPriceBand/Specs.lean` | `Benchmark.Cases.NexusMutual.RammPriceBand` + `Benchmark.Cases.NexusMutual.RammSpotPrice` | +| `OpenZeppelin/ERC4626VirtualOffsetDeposit/Specs.lean` | `Benchmark.Cases.OpenZeppelin.ERC4626VirtualOffsetDeposit` | +| `PaladinVotes/StreamRecoveryClaimUsdc/Specs.lean` | `Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc` | +| `Safe/OwnerManagerReach/Specs.lean` | `Benchmark.Cases.Safe.OwnerManagerReach` | +| `UniswapV2/PairFeeAdjustedSwap/Specs.lean` | `Benchmark.Cases.UniswapV2.PairFeeAdjustedSwap` | +| `Zama/ERC7984ConfidentialToken/Specs.lean` | `Benchmark.Cases.Zama.ERC7984ConfidentialToken` | + +**Important clarification:** the Specs files contain `def`-based predicates rather than `theorem` +lemmas. For grind, tagging a `def` with `@[grind]` registers it as an **unfolding candidate** — when +grind sees the definition applied at the head of a term, it can β/δ-reduce it. This is exactly what +we want for the invariant predicates (e.g. `ownerListInvariant`, `isOwner`, `balanceOf`, `supply`, +`computedClaimAmount`, `next`, `isChain`, `ceilDiv`, `getPooledEthBySharesRoundUp`, …): grind needs +to peel the definition to reach the storage-level equations. + +### II.1 Kleros / SortitionTrees + +| Name | Kind | Purpose | Grind | +|---|---|---|---| +| `leaf_sum` | `def` (Uint256) | sum of 4 leaf weights | `[grind]` unfold | +| `parent_equals_sum_of_children_spec` | `def` (Prop) | tree balance between parents/children | SKIP — it IS the main obligation, better not auto-unfold | +| `root_equals_sum_of_leaves_spec` | `def` (Prop) | root invariant | SKIP — main obligation | +| `draw_selects_valid_leaf_spec` | `def` (Prop) | bounds 3 ≤ selected ≤ 6 | SKIP — main obligation | +| `node_id_bijection_spec` | `def` (Prop) | id-mapping bijection | SKIP — main obligation | +| `root_minus_left_equals_right_subtree_spec` | `def` (Prop) | right = root - left | SKIP — main obligation | + +→ **1 tagged:** `leaf_sum` (auxiliary aggregator that appears inside `root_equals_sum_of_leaves_spec`). + +### II.2 Lido / VaulthubLocked + +Helpers live in the adjacent `Contract.lean` (readable — not `Proofs.lean`). + +| Name | Kind | Purpose | Grind | +|---|---|---|---| +| `TOTAL_BASIS_POINTS` | `def` (Uint256 constant) | 10000 | SKIP (constant) | +| `ceilDiv` | `def` (Uint256 → Uint256 → Uint256) | ceil-div helper | `[grind]` unfold | +| `getPooledEthBySharesRoundUp` | `def` | share → ether round-up | `[grind]` unfold | +| `ceildiv_sandwich_spec` | `def` (Prop) | `ceilDiv(x,d) * d ≥ x` when no overflow | SKIP — main obligation | +| `shares_conversion_monotone_spec` | `def` (Prop) | share conversion monotonicity | SKIP — main obligation | +| `locked_funds_solvency_spec` | `def` (Prop) | solvency invariant | SKIP — main obligation | + +→ **2 tagged:** `ceilDiv`, `getPooledEthBySharesRoundUp`. + +### II.3 Zama / ERC7984ConfidentialToken + +| Name | Kind | Purpose | Grind | +|---|---|---|---| +| `balanceOf` | `def` (accessor) | `s.storageMap 1 addr` | `[grind]` unfold | +| `supply` | `def` (accessor) | `s.storage 0` | `[grind]` unfold | +| `operatorExpiry` | `def` (accessor) | `s.storageMap2 3 holder spender` | `[grind]` unfold | +| other specs | `def` (Prop) | main obligations | SKIP | + +→ **3 tagged.** + +### II.4 PaladinVotes / StreamRecoveryClaimUsdc + +| Name | Kind | Purpose | Grind | +|---|---|---|---| +| `computedClaimAmount` | `def` (Uint256) | `shareWad * s.storage 0 / 1e18` | `[grind]` unfold | +| `computedWethClaimAmount` | `def` (Uint256) | WETH analog | `[grind]` unfold | + +→ **2 tagged.** + +### II.5 Safe / OwnerManagerReach — the rich one + +| Name | Kind | Purpose | Grind | +|---|---|---|---| +| `next` | `def` (accessor) | `wordToAddress (s.storageMap 0 a)` | `[grind]` unfold | +| `isChain` | `def` (List → Prop, recursive) | pairwise-next consistency | `[grind]` unfold | +| `reachable` | `def` (Prop, ∃ chain …) | existential chain | **NOT TAGGED** — unfolding an existential makes grind try to fabricate chains; leads to loop. Keep opaque. | +| `inListReachable` | `def` (Prop) | Certora-style list invariant | `[grind]` unfold | +| `reachableInList` | `def` (Prop) | inverse invariant | `[grind]` unfold | +| `ownerListInvariant` | `def` (Prop) | bundled iff invariant | `[grind]` unfold | +| `noDuplicates` | `def` (List → Prop, recursive) | list is nodup | `[grind]` unfold | +| `acyclic` | `def` (Prop, ∀ chain …) | universal over chains | **NOT TAGGED** — universally quantified over chain structures; unfolding inside grind explodes. Keep opaque. | +| `uniquePredecessor` | `def` (Prop) | at-most-one incoming edge | `[grind]` unfold | +| `freshInList` | `def` (Prop, ∀ chain …) | absence from any chain | **NOT TAGGED** — same reason as `acyclic`. | +| `noSelfLoops` | `def` (Prop) | no self-edges | `[grind]` unfold | +| `isOwner` | `def` (Prop) | non-zero successor + ≠ SENTINEL | `[grind]` unfold | + +→ **9 tagged, 3 intentionally left opaque** (`reachable`, `acyclic`, `freshInList`). + +### II.6 NexusMutual / RammPriceBand + +Contract.lean has `PRICE_BUFFER`, `PRICE_BUFFER_DENOMINATOR`, `ONE_ETHER` (constants — SKIP) and +`calculateBuyReserve`, `calculateSellReserve`, `spotPrices` (multi-branch functions — SKIP because +unfolding them inside grind would thrash on case splits). + +Specs.lean predicates are main obligations (SKIP). + +→ **0 tagged.** (Documented reasoning: multi-branch computational helpers are antipattern for +grind.) + +### II.7 DamnVulnerableDeFi, Ethereum/DepositContractMinimal, OpenZeppelin, UniswapV2 + +These Specs.lean files contain only **main obligation predicates** (`deposit_sets_pool_balance_spec`, +`deposit_increments_deposit_count_spec`, etc.) — no auxiliary helpers. Tagging them for grind unfold +would be circular (we'd unfold the obligation into its body). **0 tagged** from these cases. + +--- + +## Part III — Rationale for rejections and "NOT TAGGED" entries + +1. **Already `@[simp]` on trivial shapes** — FiniteSet membership lemmas, `Specs.Common *_rfl`. + Simp runs inside grind, so double-tagging is redundant noise. + +2. **Commutativity rewrites** — `*_comm` lemmas are E-match loop magnets. Skip. + +3. **Existentially- or universally-quantified predicates over chains** (`reachable`, `acyclic`, + `freshInList`) — unfolding them mid-grind creates a witness search that cannot be bounded. + +4. **Multi-branch computation functions** (`calculateBuyReserve`, `spotPrices`) — unfolding + explodes the proof state with case splits that grind has no oracle for. + +5. **Plain numeric constants** (`TOTAL_BASIS_POINTS`, `PRICE_BUFFER`, `ONE_ETHER`) — no domain + content; simp-unfolding when needed is cheaper than grind tagging. + +6. **Main obligation predicates** (everything named `*_spec` that is a top-level proof + obligation) — these are the theorems we prove; we should not make grind unfold them when proving + something else. + +--- + +## Part IV — Coordination with worker S1 + +S1 is building `Benchmark/Grindset/` on branch `grindset/s1-verity-grindset` and tagging **core +operational primitives** (likely: Uint256 arithmetic, FiniteSet ops, storage context manipulation, +Free monad step semantics). Our A1 coverage is complementary: + +- A1 owns **invariant-level** lemmas (`sumBalances_*`, `map_sum_*`, `setMapping*_same/diff`, + mulDivUp/Down bound + cancellation + monotonicity, safe-op Option elimination). +- A1 owns **case-local predicate unfolding** for the 7 active cases with non-trivial helpers. +- S1 presumably owns operational primitives (`.runState`, `.runValue`, basic Uint256 `add/mul/sub` + identities). + +If both branches tag the same lemma, Lean will accept the second tag as a no-op (attribute is +idempotent for `grind` equal-orientation); if S1 tags the Uint256 commutativity set as `grind` we +rely on S1's choice (we document this as deferred). + +The stub `Benchmark/Grindset.lean` on A1's branch imports only `Benchmark.Grindset.Invariants`; S1 +will merge later. + +--- + +## Build verification + +`lake build Benchmark.Grindset.Invariants` must succeed. The `attribute [grind …] X` syntax +requires `X` to already be imported. We import: + +- `Verity.Core.Uint256` +- `Verity.Core.FiniteSet` *(transitively)* +- `Verity.Proofs.Stdlib.Math` +- `Verity.Proofs.Stdlib.ListSum` +- `Verity.Proofs.Stdlib.MappingAutomation` +- `Verity.Specs.Common.Sum` +- `Benchmark.Cases.*.Specs` for the 7 active cases + +See `Benchmark/Grindset/Invariants.lean` for the complete, grouped attribute application. diff --git a/Benchmark/Grindset/Invariants.lean b/Benchmark/Grindset/Invariants.lean new file mode 100644 index 00000000..71a17221 --- /dev/null +++ b/Benchmark/Grindset/Invariants.lean @@ -0,0 +1,321 @@ +/- + Benchmark.Grindset.Invariants + + Mission A1 (grindset/a1-invariant-tags): re-export and tag domain-level invariant lemmas and + case-local spec helpers with `@[grind …]` so the `grind` tactic can use them during proof search. + + Complementary to sibling worker S1 (`grindset/s1-verity-grindset`), who tags core operational + primitives. A1 focuses on: + + • Verity sum-preservation invariants (Verity.Proofs.Stdlib.ListSum, + Verity.Specs.Common.Sum) + • Verity mapping store/load identities (Verity.Proofs.Stdlib.MappingAutomation) + • Verity ceil/floor-div + wad + safe-op bounds + (Verity.Proofs.Stdlib.Math) + • A single Uint256 cancellation lemma (Verity.Core.Uint256.sub_add_cancel) + • Case-local predicate unfolding (Benchmark.Cases.*.Specs) + + See Benchmark/Grindset/INVARIANTS_AUDIT.md for per-entry rationale and rejection notes. + + Constraints honoured: + - No Verity library file (`.lake/packages/verity/**`) is modified. + - No `Benchmark/Cases/**/Specs.lean` or `Proofs.lean` is modified. + - Only `attribute [grind …] Name` re-exports are applied here. + + Orientation choices: + - `[grind =]` for equality lemmas whose conclusion is used as a bidirectional rewrite (the + safer default when the hypotheses lack matchable patterns or are non-propositional). + - `[grind →]` reserved for implications whose antecedents contain genuinely matchable + patterns distinct from the conclusion (`safeAdd_some`, `*_monotone_*` that ship with + `≤` antecedents containing the same `mulDiv` terms as the conclusion, etc.). + - Case-local `def`s get plain `[grind]` which registers them as δ-unfold candidates. +-/ + +import Verity.Core.Uint256 +import Verity.Proofs.Stdlib.Math +import Verity.Proofs.Stdlib.ListSum +import Verity.Proofs.Stdlib.MappingAutomation +import Verity.Specs.Common +import Verity.Specs.Common.Sum + +import Benchmark.Cases.Kleros.SortitionTrees.Specs +import Benchmark.Cases.Lido.VaulthubLocked.Specs +import Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.Specs +import Benchmark.Cases.Safe.OwnerManagerReach.Specs +import Benchmark.Cases.Zama.ERC7984ConfidentialToken.Specs + +namespace Benchmark.Grindset.Invariants + +/-! ## 1. Core Uint256 cancellations + +Almost all of `Verity.Core.Uint256`'s algebraic lemmas are already `@[simp]`. Two are not but are +genuinely useful for proof automation: the wrap-safe `sub_add_cancel` and the forward-only +`add_right_cancel`. -/ + +attribute [grind =] Verity.Core.Uint256.sub_add_cancel +attribute [grind →] Verity.Core.Uint256.add_right_cancel + + +/-! ## 2. ListSum — point-update / transfer conservation + +Core balance-conservation invariants. The `_eq` countOcc lemmas tag cleanly as `[grind =]`; the +conditional `_ne` variants (with an `a ≠ t` antecedent) are forward-only and tagged `[grind →]`. +The three `map_sum_*` preservation theorems can't be tagged with either `→` (antecedent patterns +aren't extractable) or `=` (the LHS of the concluding equality doesn't mention every bound +parameter like `delta`/`src`/`dst`, so grind can't instantiate them from an E-match). Callers +should pull them in manually (e.g. `grind [map_sum_point_update]`); NOT TAGGED here to avoid a +loud-but-useless global registration. -/ + +attribute [grind =] + Verity.Proofs.Stdlib.ListSum.countOcc_cons_eq + Verity.Proofs.Stdlib.ListSum.countOccU_cons_eq +-- Conditional (`a ≠ t → …`) equalities: forward-only per the audit. +attribute [grind →] + Verity.Proofs.Stdlib.ListSum.countOcc_cons_ne + Verity.Proofs.Stdlib.ListSum.countOccU_cons_ne + + +/-! ## 3. sumBalances preservation over FiniteAddressSet + +Namespace is `Verity.Specs.Common` (the file lives under Sum.lean but opens no sub-namespace). + +Only the two "pure rewrite" theorems (`sumBalances_insert_existing`, `sumBalances_zero_of_all_zero`) +tag cleanly as `[grind =]` — grind can E-match their LHS to the goal without unknown parameters. +The other three (`_insert_new`, `_update_existing`, `balancesFinite_preserved_deposit`) mention +fresh parameters (`amount`, `old_amount`, record-update on `knownAddresses`) that don't appear on +the pattern LHS, so grind refuses to register them. Callers invoke these manually. -/ + +attribute [grind =] + Verity.Specs.Common.sumBalances_insert_existing + Verity.Specs.Common.sumBalances_zero_of_all_zero + + +/-! ## 4. Mapping store/load identities (MappingAutomation) + +These are the single highest-impact cluster: every benchmark obligation of the form "after +`setMappingX slot k v`, reading back at the same key equals `v`, and reading at a distinct key +preserves the original" reduces to these core four shapes per mapping family. + +All tagged `[grind =]`: + - the `_same` / `_runValue` lemmas are pure equations; + - the `_diff` lemmas have an antecedent (`k1 ≠ k2`) whose pattern can't be extracted by grind →, + but tagging `=` still lets grind rewrite the `getMapping …` term and side-check the ineq; + - the `_preserves_*` lemmas have no propositional hypothesis at all, so `=` is the only + orientation accepted. +-/ + +-- 4a. Address → Uint256 mappings +attribute [grind =] + Verity.Proofs.Stdlib.MappingAutomation.getMapping_runValue + Verity.Proofs.Stdlib.MappingAutomation.setMapping_getMapping_same + Verity.Proofs.Stdlib.MappingAutomation.setMapping_getMapping_diff + Verity.Proofs.Stdlib.MappingAutomation.setMapping_preserves_other_slot + Verity.Proofs.Stdlib.MappingAutomation.setMapping_preserves_storageMapUint + Verity.Proofs.Stdlib.MappingAutomation.setMapping_preserves_storageMap2 + +-- 4b. Uint256 → Uint256 mappings +attribute [grind =] + Verity.Proofs.Stdlib.MappingAutomation.getMappingUint_runValue + Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_getMappingUint_same + Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_getMappingUint_diff + Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_preserves_storage + Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_preserves_storageAddr + Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_preserves_storageMap + Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_preserves_storageMap2 + Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_preserves_sender + Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_preserves_thisAddress + Verity.Proofs.Stdlib.MappingAutomation.setMappingUint_preserves_events + +-- 4c. Address → Address → Uint256 (nested) mappings +attribute [grind =] + Verity.Proofs.Stdlib.MappingAutomation.getMapping2_runValue + Verity.Proofs.Stdlib.MappingAutomation.setMapping2_getMapping2_same + Verity.Proofs.Stdlib.MappingAutomation.setMapping2_getMapping2_diff_key1 + Verity.Proofs.Stdlib.MappingAutomation.setMapping2_getMapping2_diff_key2 + Verity.Proofs.Stdlib.MappingAutomation.setMapping2_preserves_storage + Verity.Proofs.Stdlib.MappingAutomation.setMapping2_preserves_storageAddr + Verity.Proofs.Stdlib.MappingAutomation.setMapping2_preserves_storageMap + Verity.Proofs.Stdlib.MappingAutomation.setMapping2_preserves_storageMapUint + Verity.Proofs.Stdlib.MappingAutomation.setMapping2_preserves_events + + +/-! ## 5. Ceil / floor division + wad + safe ops + +All of `Verity.Proofs.Stdlib.Math` except commutativity rewrites (which are E-match loop traps). + +Groups: + • `*_nat_eq` — bridge Uint256 op to Nat op (equational, the fits-within side is + checked as a hypothesis but has no matchable pattern). + • `*_zero_*` — identities with no precondition (equational). + • `*_one_{left,right}` / `wDivUp_by_wad` — gated identities (forward, the gate has patterns). + • `*_monotone_*`, `*_antitone_*` — monotonicity (forward, antecedent shares `mulDiv` patterns + with conclusion). + • `*_mul_le / _mul_ge / _mul_lt_add` — sandwich bounds (mixed; those whose antecedents lack + matchable patterns fall back to `=`). + • `mulDivUp_eq_mulDivDown_*` — exactness disjunctions (forward). + • `*_cancel_*` — conditional cancellation (forward). + • `*_pos` — positivity entailment (forward). + • `safe{Add,Sub,Mul,Div}_*` — Option-elimination and result bounds (mix of `=` for identities + and `→` for bound-producing lemmas). +-/ + +-- 5a. Nat bridges (conditional on a `fits_within` hypothesis, forward-only per the audit). +attribute [grind →] + Verity.Proofs.Stdlib.Math.mulDivDown_nat_eq + Verity.Proofs.Stdlib.Math.mulDivUp_nat_eq + Verity.Proofs.Stdlib.Math.wMulDown_nat_eq + Verity.Proofs.Stdlib.Math.wDivUp_nat_eq + +-- 5b. Unconditional zero identities +attribute [grind =] + Verity.Proofs.Stdlib.Math.mulDivDown_zero_left + Verity.Proofs.Stdlib.Math.mulDivDown_zero_right + Verity.Proofs.Stdlib.Math.mulDivUp_zero_left + Verity.Proofs.Stdlib.Math.mulDivUp_zero_right + Verity.Proofs.Stdlib.Math.wMulDown_zero_left + Verity.Proofs.Stdlib.Math.wMulDown_zero_right + Verity.Proofs.Stdlib.Math.wDivUp_zero + +-- 5c. Gated identity rewrites +attribute [grind →] + Verity.Proofs.Stdlib.Math.wMulDown_one_left + Verity.Proofs.Stdlib.Math.wMulDown_one_right + Verity.Proofs.Stdlib.Math.wDivUp_by_wad + +-- 5d. Monotonicity / antitonicity (mulDivDown variants: antecedents lack patterns AND the +-- conclusion is `≤` not `=`, so neither `→` nor `=` works. Use plain `[grind]`.) +attribute [grind] + Verity.Proofs.Stdlib.Math.mulDivDown_monotone_left + Verity.Proofs.Stdlib.Math.mulDivDown_monotone_right +attribute [grind →] + Verity.Proofs.Stdlib.Math.mulDivUp_monotone_left + Verity.Proofs.Stdlib.Math.mulDivUp_monotone_right + Verity.Proofs.Stdlib.Math.wMulDown_monotone_left + Verity.Proofs.Stdlib.Math.wMulDown_monotone_right + Verity.Proofs.Stdlib.Math.wDivUp_monotone_left + Verity.Proofs.Stdlib.Math.wDivUp_antitone_right + Verity.Proofs.Stdlib.Math.mulDivDown_antitone_divisor + Verity.Proofs.Stdlib.Math.mulDivUp_antitone_divisor + +-- 5e. Sandwich bounds (mulDivDown variants: conclusions are `≤` / `<`, so use plain `[grind]`) +attribute [grind] + Verity.Proofs.Stdlib.Math.mulDivDown_mul_le + Verity.Proofs.Stdlib.Math.mulDivDown_mul_lt_add +attribute [grind →] + Verity.Proofs.Stdlib.Math.mulDivUp_mul_ge + Verity.Proofs.Stdlib.Math.mulDivUp_mul_lt_add + Verity.Proofs.Stdlib.Math.wMulDown_mul_le + Verity.Proofs.Stdlib.Math.wMulDown_mul_lt_add + Verity.Proofs.Stdlib.Math.wDivUp_mul_ge + Verity.Proofs.Stdlib.Math.wDivUp_mul_lt_add + Verity.Proofs.Stdlib.Math.mulDivDown_le_mulDivUp + Verity.Proofs.Stdlib.Math.mulDivUp_le_mulDivDown_add_one + +-- 5f. Exactness disjunctions +attribute [grind →] + Verity.Proofs.Stdlib.Math.mulDivUp_eq_mulDivDown_of_dvd + Verity.Proofs.Stdlib.Math.mulDivUp_eq_mulDivDown_add_one_of_not_dvd + Verity.Proofs.Stdlib.Math.mulDivUp_eq_mulDivDown_or_succ + +-- 5g. Conditional cancellations +attribute [grind →] + Verity.Proofs.Stdlib.Math.mulDivDown_cancel_left + Verity.Proofs.Stdlib.Math.mulDivDown_cancel_right + Verity.Proofs.Stdlib.Math.mulDivUp_cancel_left + Verity.Proofs.Stdlib.Math.mulDivUp_cancel_right + +-- 5h. Positivity +attribute [grind →] + Verity.Proofs.Stdlib.Math.mulDivDown_pos + Verity.Proofs.Stdlib.Math.mulDivUp_pos + Verity.Proofs.Stdlib.Math.wMulDown_pos + Verity.Proofs.Stdlib.Math.wDivUp_pos + +-- 5i. safeAdd +attribute [grind →] + Verity.Proofs.Stdlib.Math.safeAdd_some + Verity.Proofs.Stdlib.Math.safeAdd_none + Verity.Proofs.Stdlib.Math.safeAdd_zero_left + Verity.Proofs.Stdlib.Math.safeAdd_zero_right + Verity.Proofs.Stdlib.Math.safeAdd_result_bounded + +-- 5j. safeSub (zero/self are no-hypothesis identities → `=`) +attribute [grind =] + Verity.Proofs.Stdlib.Math.safeSub_zero + Verity.Proofs.Stdlib.Math.safeSub_self +attribute [grind →] + Verity.Proofs.Stdlib.Math.safeSub_some + Verity.Proofs.Stdlib.Math.safeSub_none + Verity.Proofs.Stdlib.Math.safeSub_result_le + +-- 5k. safeMul (zero identities → `=`, rest → `→`) +attribute [grind =] + Verity.Proofs.Stdlib.Math.safeMul_zero_left + Verity.Proofs.Stdlib.Math.safeMul_zero_right +attribute [grind →] + Verity.Proofs.Stdlib.Math.safeMul_some + Verity.Proofs.Stdlib.Math.safeMul_none + Verity.Proofs.Stdlib.Math.safeMul_one_left + Verity.Proofs.Stdlib.Math.safeMul_one_right + Verity.Proofs.Stdlib.Math.safeMul_result_bounded + +-- 5l. safeDiv (none/by_one are no-hypothesis identities, some/zero_num/self lack antecedent +-- patterns → all to `=`) +attribute [grind =] + Verity.Proofs.Stdlib.Math.safeDiv_some + Verity.Proofs.Stdlib.Math.safeDiv_none + Verity.Proofs.Stdlib.Math.safeDiv_zero_numerator + Verity.Proofs.Stdlib.Math.safeDiv_by_one + Verity.Proofs.Stdlib.Math.safeDiv_self +attribute [grind →] + Verity.Proofs.Stdlib.Math.safeDiv_result_le_numerator + + +/-! ## 6. Case-local predicate / accessor unfolding + +These are `def`s (not theorems) in the Specs.lean files of the 7 active cases. Tagging a `def` +with `@[grind]` registers it as an unfolding candidate for grind — it will δ-reduce the head +when it appears in the goal. This is essential so grind can see the underlying +`storage`/`storageMap`/… reads that the definitions abbreviate. + +Rejected on purpose: + • `reachable` / `acyclic` / `freshInList` (Safe.OwnerManagerReach) — existential / universal + over chain lists; unfolding inside grind creates unbounded witness search. + • `calculateBuyReserve`, `calculateSellReserve`, `spotPrices` (NexusMutual/RammPriceBand in + Contract.lean) — multi-branch computation, unfolding thrashes on case splits. + • Plain numeric constants — simp handles them better. + • Main obligation predicates (`*_spec` at top level) — we prove these, we don't unfold them. +-/ + +-- Kleros / SortitionTrees +attribute [grind] Benchmark.Cases.Kleros.SortitionTrees.leaf_sum + +-- PaladinVotes / StreamRecoveryClaimUsdc +attribute [grind] + Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.computedClaimAmount + Benchmark.Cases.PaladinVotes.StreamRecoveryClaimUsdc.computedWethClaimAmount + +-- Lido / VaulthubLocked (defs live in the adjacent Contract module) +attribute [grind] + Benchmark.Cases.Lido.VaulthubLocked.ceilDiv + Benchmark.Cases.Lido.VaulthubLocked.getPooledEthBySharesRoundUp + +-- Zama / ERC7984ConfidentialToken — storage accessors +attribute [grind] + Benchmark.Cases.Zama.ERC7984ConfidentialToken.balanceOf + Benchmark.Cases.Zama.ERC7984ConfidentialToken.supply + Benchmark.Cases.Zama.ERC7984ConfidentialToken.operatorExpiry + +-- Safe / OwnerManagerReach — linked-list reachability / invariant predicates +attribute [grind] + Benchmark.Cases.Safe.OwnerManagerReach.next + Benchmark.Cases.Safe.OwnerManagerReach.isChain + Benchmark.Cases.Safe.OwnerManagerReach.inListReachable + Benchmark.Cases.Safe.OwnerManagerReach.reachableInList + Benchmark.Cases.Safe.OwnerManagerReach.ownerListInvariant + Benchmark.Cases.Safe.OwnerManagerReach.noDuplicates + Benchmark.Cases.Safe.OwnerManagerReach.uniquePredecessor + Benchmark.Cases.Safe.OwnerManagerReach.noSelfLoops + Benchmark.Cases.Safe.OwnerManagerReach.isOwner + +end Benchmark.Grindset.Invariants diff --git a/Benchmark/Grindset/Monad.lean b/Benchmark/Grindset/Monad.lean new file mode 100644 index 00000000..f7cfc6a4 --- /dev/null +++ b/Benchmark/Grindset/Monad.lean @@ -0,0 +1,136 @@ +/- + Benchmark.Grindset.Monad — simp/grind normalization of the Contract monad + scaffolding. + + The Verity DSL elaborates `verity_contract` function bodies into do-notation + over the `Contract` monad, which in turn desugars to chains of + `Verity.bind`/`Verity.pure` wrapped by `Contract.run` and projected through + `ContractResult.snd` / `ContractResult.fst`. + + We register these identifiers as `@[simp]` (for the dedicated + `grind_norm` set) and also `@[grind]` / `@[grind =]` so that `grind` can + unfold / rewrite them on its own. The goal is that typical benchmark task + obligations of shape + + ((Contract.f arg).run s).snd.storage n = ... + + normalize down to plain record updates over `s`, at which point `grind` + can finish with the tagged storage/mapping simp-lemmas in `Core.lean`. +-/ + +import Verity.Core +import Benchmark.Grindset.Attr + +namespace Benchmark.Grindset + +open Verity + +/-! +## `grind_norm` simp set + +Unfolds the bind/pure/run scaffolding so that `Contract.run (do …) s` +collapses into nested applications of the underlying `*_run` lemmas. + +Downstream tactics can invoke these lemmas via: + +``` +simp only [grind_norm] at * +``` + +or implicitly via the `grind` tactic (all rules below are also tagged +`@[grind]`/`@[grind =]`). +-/ + + +/-! ### Bind and pure -/ + +@[grind_norm, simp] +theorem bind_def {α β : Type} (m : Contract α) (f : α → Contract β) : + (m >>= f) = Verity.bind m f := rfl + +@[grind_norm, simp] +theorem pure_def {α : Type} (a : α) : + (Pure.pure a : Contract α) = Verity.pure a := rfl + +@[grind_norm, simp] +theorem bind_success {α β : Type} (a : α) (s : ContractState) + (f : α → Contract β) : + Verity.bind (fun state => ContractResult.success a state) f s = + f a s := rfl + +/-! ### `Contract.run` against constructors -/ + +@[grind_norm, simp] +theorem Contract_run_success {α : Type} (a : α) (s : ContractState) : + Contract.run (fun state => ContractResult.success a state) s = + ContractResult.success a s := rfl + +/-! ### Projection-through-constructor lemmas + +The two core structural facts used by every spec-unfolding proof: after +reducing the monadic body to a `ContractResult.success a s'`, projecting out +`.snd` gives back `s'`. These are already `@[simp]` upstream, but we re-tag +them for `grind` so the tactic can apply them directly. -/ + +attribute [grind_norm] ContractResult.snd_success ContractResult.snd_revert +attribute [grind_norm] ContractResult.fst_success +attribute [grind_norm] Contract.bind_pure_left Contract.bind_pure_right +attribute [grind_norm] Contract.bind_assoc + +/-! ### Primitive operation `.run` lemmas. + +These are `@[simp]` upstream. Re-tagging into `grind_norm` keeps everything +accessible via one attribute when running the normalization pass. -/ + +attribute [grind_norm] getStorage_run setStorage_run +attribute [grind_norm] getStorageAddr_run setStorageAddr_run +attribute [grind_norm] getMapping_run setMapping_run +attribute [grind_norm] getMapping2_run setMapping2_run +attribute [grind_norm] getMappingUint_run setMappingUint_run +attribute [grind_norm] msgSender_run contractAddress_run msgValue_run +attribute [grind_norm] blockTimestamp_run blockNumber_run chainid_run +attribute [grind_norm] require_true require_false +attribute [grind_norm] pure_run + +/-! +### Definitional unfolds + +The Verity monadic primitives are ordinary `def`s; we need the simp set to +be able to unfold them so `Verity.bind (setStorage … …) f s` can reduce to +a `ContractResult.success …` pattern that the `*_run` lemmas (and the `.snd` +projection lemmas) can finish. -/ + +attribute [grind_norm] Verity.bind Verity.pure +attribute [grind_norm] Verity.Contract.run +attribute [grind_norm] Verity.getStorage Verity.setStorage +attribute [grind_norm] Verity.getStorageAddr Verity.setStorageAddr +attribute [grind_norm] Verity.getMapping Verity.setMapping +attribute [grind_norm] Verity.getMapping2 Verity.setMapping2 +attribute [grind_norm] Verity.getMappingUint Verity.setMappingUint +attribute [grind_norm] Verity.msgSender Verity.contractAddress +attribute [grind_norm] Verity.msgValue +attribute [grind_norm] Verity.blockTimestamp Verity.blockNumber Verity.chainid +attribute [grind_norm] Verity.require + +/-! ### `require` branch discharge + +The `verity_contract` macro elaborates `require (a <= b) msg` into +`Verity.require (decide (a ≤ b)) msg`, which after unfolding becomes +`fun s => if decide (a ≤ b) = true then ContractResult.success () s else …`. +A proof-side hypothesis `h : a ≤ b` passed into `simp only […, h]` rewrites +the inner `Prop` to `True`, leaving the residual guard +`if decide True = true then success … else revert …`. The ground +`simp only [grind_norm, …]` simp set does not include a rule that collapses +this guard — without it the enclosing `Verity.bind` / `Contract.run` matches +cannot commit to their success branch and `grind` is handed a large +unreduced term whose storage projection it cannot see through. + +The lemma below is the missing rewrite. It discharges the `require` in one +step, unblocking the rest of the monadic normalisation. -/ + +@[grind_norm, simp] +theorem ite_decide_True {α : Sort _} (a b : α) : + (if decide True = true then a else b) = a := by + simp + +end Benchmark.Grindset diff --git a/Benchmark/Grindset/REACH_NOTES.md b/Benchmark/Grindset/REACH_NOTES.md new file mode 100644 index 00000000..2bb78ba4 --- /dev/null +++ b/Benchmark/Grindset/REACH_NOTES.md @@ -0,0 +1,180 @@ +# Grindset Reach extension — design notes + +Worker **A3** (branch `grindset/a3-reach-grind-ext`). + +## TL;DR + +- **Reach shape in the benchmark is not inductive** — the one case + that genuinely uses reachability (`Safe/OwnerManagerReach`) encodes + it as an *existential over a witness list* (`List Address`), not as + `Relation.ReflTransGen` or a custom `inductive Reach` step closure. +- `Benchmark/Grindset/Reach.lean` ships **both** flavours of closure + lemmas (inductive `Relation.ReflTransGen` and witness-based + `Reachable`/`IsChain`) so the extension is future-proof. +- `@[grind]` tagging is **deliberately conservative**: only refl / + one-step / base facts are tagged. `trans` and `snoc` are not tagged + globally because they are too productive and cause E-matching to + explode on innocuous terms like `f (f (f a))`. +- The `verity_reach_grind` macro handles the actual closure + obligations by `apply`-ing `reachable_preserves_invariant` / + `reach_preserves_invariant` before handing off to `grind`. + +## The four flagged cases, reach-wise + +| Case | Reach? | +| ------------------------------------------------- | ---------------------------------------- | +| `Kleros/SortitionTrees` | No — sum/storage arithmetic only | +| `Safe/OwnerManagerReach` | **Yes — list-witness `reachable`** | +| `Lido/VaulthubLocked` | No — solvency arithmetic (F-01 / P-VH-*) | +| `PaladinVotes/StreamRecoveryClaimUsdc` | No — claim-state updates only | + +So only `Safe/OwnerManagerReach` actually benefits from a reach pack. +The other three were presumably flagged by keyword match alone. + +## The concrete Reach shape in `Safe/OwnerManagerReach` + +From `Benchmark/Cases/Safe/OwnerManagerReach/Specs.lean` (paraphrased): + +```lean +-- Linked-list next-pointer reader +def next (s : ContractState) (a : Address) : Address := + wordToAddress (s.storageMap 0 a) + +-- A list of addresses that walks the linked list correctly +def isChain (s : ContractState) : List Address → Prop + | [] | [_] => True + | a :: b :: rest => next s a = b ∧ isChain s (b :: rest) + +-- Reachability via a witness chain +def reachable (s : ContractState) (a b : Address) : Prop := + ∃ chain, chain.head? = some a ∧ chain.getLast? = some b ∧ isChain s chain +``` + +Key observation: **reach induction here is list induction**, not +inductive-predicate induction. This is a deliberate choice — Certora's +`reach` predicate was replaced with a witness-style existential +because the Safe linked list is naturally finite and the witness is a +first-class object proofs can manipulate. + +## What `Reach.lean` provides + +### Part 1 — Inductive reach (`Relation.ReflTransGen`) + +For future cases that *do* use the inductive formulation (none of the +four flagged cases do, but it's a common pattern). Lemmas tagged +`@[grind]`: + +| Lemma | Role | +| --------------------------- | --------------------------------------------- | +| `reach_refl` | `ReflTransGen r a a` | +| `reach_of_step` | single step ⇒ reach | +| `reach_tail` / `reach_head` | snoc / cons extension | +| `reach_trans` | transitivity | + +Plus an un-tagged closure lemma: + +| Lemma | Role | +| --------------------------- | --------------------------------------------- | +| `reach_preserves_invariant` | `(∀ x y, r x y → P x → P y) → ∀ a b, R* a b → P a → P b` | + +### Part 2 — Witness-based reach (`Reachable` / `IsChain`) + +Generic over `σ` (state) and `α` (node). Definitions mirror the Safe +case verbatim. Lemmas: + +| Lemma | Tagged `@[grind]`? | Role | +| ------------------------------ | ------------------ | -------------------------------------- | +| `isChain_nil`, `isChain_singleton` | yes | base cases | +| `isChain_cons_cons` | `@[simp]` only | Iff unfolding (pattern too generic for grind) | +| `isChain_tail` | no | structural lemma | +| `reachable_refl` | yes | `Reachable step s a a` | +| `reachable_step` | yes | `Reachable step s a (step s a)` | +| `reachable_of_step` | yes | alias of `reachable_step` | +| `reachable_snoc` | **no** (loops) | extend reach by one step | +| `reachable_trans` | **no** (loops) | transitivity | +| `reachable_preserves_invariant`| no | the canonical closure lemma | + +### Part 3 — The `verity_reach_grind` tactic + +A macro that: + +1. First tries `apply reachable_preserves_invariant <;> grind` — this + is the canonical shape of nearly every reach-closure obligation. +2. Falls back to `apply reach_preserves_invariant <;> grind` for the + inductive `ReflTransGen` variant. +3. Falls back to plain `grind` (base facts are already tagged). +4. As a last resort, retries `grind` with `snoc`/`trans` as explicit + hints (will usually time out — only useful for tiny chains). + +## Why trans/snoc are **not** globally `@[grind]` + +Empirically, tagging `reachable_trans` and `reachable_snoc` makes +`grind`'s E-matcher produce thousands of spurious instances such as + +``` +Reachable chainStep f (chainStep f (chainStep f (chainStep f b))) (chainStep f (chainStep f a)) +``` + +because every existing `Reachable …` fact matches their first hypothesis +pattern and every `chainStep _ _` term plausibly matches the step +pattern. The E-matching "maximum rounds" threshold is hit in <1s. + +Leaving them as explicit hints (or arguments to +`verity_reach_grind`'s inner `grind`) scopes them to situations where +a manual `apply` has already fixed the relevant endpoints. + +## Demo proofs + +`Benchmark/Grindset/ReachTests.lean` contains: + +1. `demo_reach_preserves_P` — `Relation.ReflTransGen`-style invariant + preservation, closed by `verity_reach_grind`. +2. `demo_chain_reach_preserves_membership` — the witness-based analogue + (`Reachable chainStep f a b → a ∈ S → b ∈ S` assuming `S` + step-closed), also closed by `verity_reach_grind`. This is the + exact shape used in the Safe case. + +Both are authored from the specs + contract side only — no peeking at +`Proofs.lean`. + +There is also a concrete three-step chain example using +`reachable_step` + `reachable_trans` to sanity-check composition. + +## Applicability estimate + +| Case | Helps via this pack? | +| -------------------------------------- | ------------------------------------------------------- | +| `Safe/OwnerManagerReach` | **Partially.** `reachable_preserves_invariant` closes generic closure obligations (e.g. `reachableInList` propagation), but the *non-trivial* Safe theorems (`inListReachable`, acyclicity, unique predecessor after `addOwner`/`removeOwner`/`swapOwner`) require case-specific reasoning about how `next` is mutated at a handful of specific keys. The pack turns "induction on reach" into one-liner `verity_reach_grind`, but the surrounding `next`-mutation algebra is still the hard part. Estimate: closes ≤ 30–40% of obligations end-to-end. | +| `Kleros/SortitionTrees` | No — no reach relation. Needs S1's arithmetic grindset. | +| `Lido/VaulthubLocked` | No — no reach relation. Needs S1's arithmetic grindset. | +| `PaladinVotes/StreamRecoveryClaimUsdc` | No — no reach relation. Needs S1's arithmetic grindset. | + +So exactly **one** of the four cases actually benefits from the reach +pack. The other three were misclassified as reach-heavy. + +## Limitations + +- The witness-based lemmas are generic over `step : σ → α → α`. Safe's + `next s a = wordToAddress (s.storageMap 0 a)` fits this shape, but + any case using a *relational* step (`next s a = b` as an arbitrary + predicate, not a function) would need a small adapter to bridge to + `Relation.ReflTransGen`. Not currently needed. +- `verity_reach_grind` will happily spin on goals that are **not** + reach-closure shaped (plain `grind` will then hit limits); it is not + a universal solver. +- The E-matching patterns for `reachable_trans`/`reachable_snoc` are + intentionally omitted — re-adding them as `@[grind →]` would loop. + If a future need arises, attach an explicit `grind_pattern` tied to + a unique top-level symbol. +- `isChain_cons_cons` is only `@[simp]`, not `@[grind]` — its pattern + is too unconstrained for the E-matcher (matches every cons-cons + expression). + +## Open questions for S1 + +- If the merged grindset adds a general `Verity.Specs`-level + `Reachable` alias, `Benchmark.Grindset.Reach.Reachable` can be + re-expressed as a direct `attribute [grind]` re-tag rather than a + new namespaced definition. +- Worth checking whether mathlib's `Relation.TransGen`/`EqvGen` need + analogous packs — not currently exercised by any benchmark case. diff --git a/Benchmark/Grindset/Reach.lean b/Benchmark/Grindset/Reach.lean new file mode 100644 index 00000000..f5cd0be5 --- /dev/null +++ b/Benchmark/Grindset/Reach.lean @@ -0,0 +1,345 @@ +import Verity.Specs.Common +import Mathlib.Logic.Relation +import Mathlib.Data.List.Basic + +/-! +# Grindset: Reach closure extension + +Custom `grind` attribute pack and a bespoke tactic (`verity_reach_grind`) +for discharging reachability / reach-closure obligations that recur +across several Verity benchmark cases. + +## Reach shapes actually found in the benchmark + +We inspected the four cases flagged as reachability-heavy. Only one of +them uses a real reach relation; the others turned out to be arithmetic +or ownership specs with no transitive closure: + +* `Benchmark/Cases/Safe/OwnerManagerReach` — **does** use reach. The + shape is *witness-based*, not inductive: + + ``` + def isChain (s : ContractState) : List Address → Prop + | [] => True + | [_] => True + | a :: b :: t => next s a = b ∧ isChain s (b :: t) + + def reachable (s : ContractState) (a b : Address) : Prop := + ∃ chain, chain.head? = some a + ∧ chain.getLast? = some b + ∧ isChain s chain + ``` + +* `Benchmark/Cases/Kleros/SortitionTrees` — storage arithmetic + invariants, no reach relation. +* `Benchmark/Cases/Lido/VaulthubLocked` — solvency arithmetic (F-01), + no reach relation. +* `Benchmark/Cases/PaladinVotes/StreamRecoveryClaimUsdc` — claim-state + updates, no reach relation. + +Because only `Safe/OwnerManagerReach` is genuinely reach-heavy we focus +on its shape. We *also* provide a generic pack for +`Relation.ReflTransGen` (the standard mathlib inductive transitive +closure) so that future cases that pick the inductive formulation will +be covered out of the box. +-/ + +set_option linter.unusedSectionVars false + +namespace Benchmark.Grindset.Reach + +open Verity +open Verity.EVM.Uint256 + +/-! ## Part 1 — Generic inductive reach via `Relation.ReflTransGen` + +`Relation.ReflTransGen r a b` is the reflexive–transitive closure of a +step relation `r : α → α → Prop`. Useful closure lemmas are already +provided by mathlib; we re-export them under `@[grind]` so `grind` can +chain steps and preserve step-wise invariants automatically. +-/ + +section ReflTransGen +variable {α : Type*} {r : α → α → Prop} + +-- Reflexivity is the obvious "no step" base case. +@[grind] +theorem reach_refl (a : α) : Relation.ReflTransGen r a a := + Relation.ReflTransGen.refl + +-- One step is already reach. +@[grind] +theorem reach_of_step {a b : α} (h : r a b) : Relation.ReflTransGen r a b := + Relation.ReflTransGen.single h + +-- Snoc: extend a reach by a final step (native mathlib shape). +@[grind] +theorem reach_tail {a b c : α} + (h₁ : Relation.ReflTransGen r a b) (h₂ : r b c) : + Relation.ReflTransGen r a c := + Relation.ReflTransGen.tail h₁ h₂ + +-- Cons: prefix a reach by an initial step. +@[grind] +theorem reach_head {a b c : α} + (h₁ : r a b) (h₂ : Relation.ReflTransGen r b c) : + Relation.ReflTransGen r a c := + Relation.ReflTransGen.head h₁ h₂ + +-- Transitivity. +@[grind] +theorem reach_trans {a b c : α} + (h₁ : Relation.ReflTransGen r a b) (h₂ : Relation.ReflTransGen r b c) : + Relation.ReflTransGen r a c := + Relation.ReflTransGen.trans h₁ h₂ + +/-- +Invariant preservation under `ReflTransGen`. If `P` is preserved by +every `r`-step, then `P` is preserved by `ReflTransGen r`. + +This is the *canonical* "reach-closure" lemma and the thing `grind` +has the hardest time synthesising on its own, because it hides an +induction on the reach derivation. +-/ +theorem reach_preserves_invariant + {P : α → Prop} + (hStep : ∀ x y, r x y → P x → P y) + {a b : α} (hR : Relation.ReflTransGen r a b) (hP : P a) : P b := by + induction hR with + | refl => exact hP + | tail _ hrxy ih => exact hStep _ _ hrxy ih + +end ReflTransGen + +/-! ## Part 2 — Witness-based reach (`isChain` / `reachable` shape) + +This is the shape actually used in `Safe/OwnerManagerReach`. We don't +import that module (we want `Grindset.Reach` to be self-contained and +reusable), so we reproduce the shape generically over a *step function* +`step : σ → α → α` and derive the same closure theorems. A user who +has their own `reachable` and `isChain` can then just plumb through +these lemmas with a one-line adapter. +-/ + +section ChainReach +variable {σ : Type*} {α : Type*} + +/-- A chain is a list where consecutive elements are connected by +`step s`. Mirrors `Safe.OwnerManagerReach.isChain` generically. -/ +def IsChain (step : σ → α → α) (s : σ) : List α → Prop + | [] => True + | [_] => True + | a :: b :: t => step s a = b ∧ IsChain step s (b :: t) + +@[grind, simp] +theorem isChain_nil (step : σ → α → α) (s : σ) : + IsChain step s ([] : List α) := trivial + +@[grind, simp] +theorem isChain_singleton (step : σ → α → α) (s : σ) (a : α) : + IsChain step s [a] := trivial + +@[simp] +theorem isChain_cons_cons (step : σ → α → α) (s : σ) (a b : α) (t : List α) : + IsChain step s (a :: b :: t) ↔ + step s a = b ∧ IsChain step s (b :: t) := Iff.rfl + +/-- Tail of a chain is a chain. Useful for inducting over chain length. -/ +theorem isChain_tail (step : σ → α → α) (s : σ) : + ∀ {a : α} {t : List α}, IsChain step s (a :: t) → IsChain step s t + | _, [], _ => trivial + | _, _ :: _, h => h.2 + +/-- Append a `step s b` tail to a chain ending at `b`. -/ +private theorem isChain_append_step (step : σ → α → α) (s : σ) (b : α) : + ∀ (chain : List α), + IsChain step s chain → chain.getLast? = some b → + IsChain step s (chain ++ [step s b]) + | [], _, h => by simp [List.getLast?] at h + | [a], _, hlast => by + have ha : a = b := by simpa [List.getLast?] using hlast + subst ha + exact ⟨rfl, trivial⟩ + | a₁ :: a₂ :: t, hch, hlast => by + have hstep : step s a₁ = a₂ := hch.1 + have hrest : IsChain step s (a₂ :: t) := hch.2 + have hlast' : (a₂ :: t).getLast? = some b := by + simpa [List.getLast?] using hlast + have ih := isChain_append_step step s b (a₂ :: t) hrest hlast' + -- (a₁ :: a₂ :: t) ++ [step s b] = a₁ :: ((a₂ :: t) ++ [step s b]) + show IsChain step s (a₁ :: ((a₂ :: t) ++ [step s b])) + exact ⟨hstep, ih⟩ + +/-- Witness-based reachability: there is a chain from `a` to `b`. -/ +def Reachable (step : σ → α → α) (s : σ) (a b : α) : Prop := + ∃ chain : List α, + chain.head? = some a ∧ + chain.getLast? = some b ∧ + IsChain step s chain + +theorem reachable_refl (step : σ → α → α) (s : σ) (a : α) : + Reachable step s a a := + ⟨[a], rfl, rfl, isChain_singleton step s a⟩ + +theorem reachable_step (step : σ → α → α) (s : σ) (a : α) : + Reachable step s a (step s a) := + ⟨[a, step s a], rfl, rfl, ⟨rfl, trivial⟩⟩ + +/-- +A single forward step preserves reachability: if `Reachable s a b` +then `Reachable s a (step s b)`. This is the most common closure +lemma in practice (the Safe proofs repeatedly extend a witnessed +chain by one hop). +-/ +theorem reachable_snoc (step : σ → α → α) (s : σ) + {a b : α} (h : Reachable step s a b) : + Reachable step s a (step s b) := by + obtain ⟨chain, hhd, hlast, hch⟩ := h + refine ⟨chain ++ [step s b], ?_, ?_, ?_⟩ + · -- head of chain ++ [x] is head of chain when chain ≠ [] + cases chain with + | nil => simp [List.head?] at hhd + | cons c cs => simpa [List.head?] using hhd + · -- last of chain ++ [x] is x + simp + · exact isChain_append_step step s b chain hch hlast + +/-- Transitivity of chain-reachability (concatenation of witnesses). -/ +theorem reachable_trans (step : σ → α → α) (s : σ) + {a b c : α} (h1 : Reachable step s a b) (h2 : Reachable step s b c) : + Reachable step s a c := by + obtain ⟨chain₂, hhd₂, hlast₂, hch₂⟩ := h2 + -- Auxiliary: walk `chain₂` and repeatedly extend the prefix reach + -- witness by `reachable_snoc`. + suffices aux : ∀ (chain : List α) (a b c : α), + chain.head? = some b → chain.getLast? = some c → + IsChain step s chain → Reachable step s a b → Reachable step s a c from + aux chain₂ a b c hhd₂ hlast₂ hch₂ h1 + intro chain + induction chain with + | nil => + intros _ _ _ hhd _ _ _ + simp [List.head?] at hhd + | cons x xs ih => + intros a b c hhd hlast hch h1 + have hx : x = b := by simpa [List.head?] using hhd + cases xs with + | nil => + have hxc : x = c := by simpa [List.getLast?] using hlast + have hbc : b = c := hx ▸ hxc + exact hbc ▸ h1 + | cons y ys => + have hstep : step s x = y := hch.1 + have hrest : IsChain step s (y :: ys) := hch.2 + have hlast' : (y :: ys).getLast? = some c := by + simpa [List.getLast?] using hlast + have hhd' : (y :: ys).head? = some y := rfl + have hstep_b : step s b = y := hx ▸ hstep + have hay : Reachable step s a y := by + have := reachable_snoc step s h1 + rw [hstep_b] at this + exact this + exact ih a y c hhd' hlast' hrest hay + +/-- +**The** reach-closure lemma for the chain-witness shape: +an invariant preserved by every `step` is preserved by `Reachable`. + +This is the `reach_preserves_invariant` counterpart for witness-based +reach — see `REACH_NOTES.md` for discussion. +-/ +theorem reachable_preserves_invariant + {step : σ → α → α} {s : σ} {P : α → Prop} + (hStep : ∀ x, P x → P (step s x)) + {a b : α} (h : Reachable step s a b) (hP : P a) : P b := by + obtain ⟨chain, hhd, hlast, hch⟩ := h + -- Auxiliary: for any chain with head = some a, last = some b, and + -- `IsChain`, `P a → P b`. Proven by induction on the chain. + suffices aux : ∀ (chain : List α) (a b : α), + chain.head? = some a → chain.getLast? = some b → + IsChain step s chain → P a → P b from aux chain a b hhd hlast hch hP + intro chain + induction chain with + | nil => + intros a b hhd _ _ _ + simp [List.head?] at hhd + | cons x xs ih => + intros a b hhd hlast hch hP + have hx : x = a := by simpa [List.head?] using hhd + cases xs with + | nil => + have hxb : x = b := by simpa [List.getLast?] using hlast + have hab : a = b := hx ▸ hxb + exact hab ▸ hP + | cons y ys => + have hstep : step s x = y := hch.1 + have hrest : IsChain step s (y :: ys) := hch.2 + have hlast' : (y :: ys).getLast? = some b := by + simpa [List.getLast?] using hlast + have hhd' : (y :: ys).head? = some y := rfl + have hstep_a : step s a = y := hx ▸ hstep + have hPy : P y := hstep_a ▸ hStep a hP + exact ih y b hhd' hlast' hrest hPy + +/-- Convenience: if reaching `a` from itself then extending by a step, +we land exactly at `step s a`. Useful sugar for `grind`. -/ +theorem reachable_of_step (step : σ → α → α) (s : σ) (a : α) : + Reachable step s a (step s a) := reachable_step step s a + +end ChainReach + +-- We intentionally do NOT tag `reachable_snoc` or `reachable_trans` +-- globally with `@[grind]` — they are too productive (each instance +-- fires on any reachability fact in context and can loop the +-- E-matcher). They are still handed to `grind` as explicit hints +-- inside the `verity_reach_grind` macro in controlled situations. +attribute [grind] reachable_refl +attribute [grind] reachable_step +attribute [grind] reachable_of_step + +/-! ## Part 3 — The `verity_reach_grind` tactic + +`grind`'s E-matcher is strong at rewriting and propagating equalities, +but it cannot synthesise inductions on reach derivations on its own. +The lemmas above ship the induction *result* as ordinary theorems, so +most concrete obligations of the form + + `Reachable step s a b → Inv a → Inv b` + +close via `reachable_preserves_invariant` plus `grind`'s usual +unfolding. For trickier goals we expose a tactic macro that tries a +plain `grind` first, then falls back to applying the closure lemmas +before re-invoking `grind`. + +We deliberately use a simple `macro` (not parameterised by extra +`grind` hints) — extra hypotheses can always be introduced by the user +before calling `verity_reach_grind` and `grind` will pick them up. +-/ + +/-- +`verity_reach_grind` is a small wrapper over `grind` that makes the +standard reach-closure lemmas available as hints. If the direct +`grind` attempt fails, it tries `reachable_preserves_invariant` / +`reach_preserves_invariant` and re-runs `grind` in each subgoal. +-/ +macro (name := verity_reach_grind) "verity_reach_grind" : tactic => + `(tactic| + first + -- 1. Try the canonical reach-preservation closure first. This + -- handles the overwhelmingly common "Reach … → Inv … → Inv …" + -- shape by applying `*_preserves_invariant` and dispatching + -- the step-preservation subgoal by `grind`. + | (apply Benchmark.Grindset.Reach.reachable_preserves_invariant <;> + first | assumption | grind) + | (apply Benchmark.Grindset.Reach.reach_preserves_invariant <;> + first | assumption | grind) + -- 2. Plain `grind` (no snoc/trans, to avoid E-matcher loops). The + -- cheap closure facts (`refl`, `step`, `of_step`) are already + -- globally tagged `@[grind]` and will fire automatically. + | grind + -- 3. Last-ditch: include the productive lemmas explicitly. Only + -- useful for tiny finite chains; usually hits thresholds. + | grind [reach_trans, reach_tail, reach_head, + reachable_snoc, reachable_trans]) + +end Benchmark.Grindset.Reach diff --git a/Benchmark/Grindset/ReachTests.lean b/Benchmark/Grindset/ReachTests.lean new file mode 100644 index 00000000..78e5bdb7 --- /dev/null +++ b/Benchmark/Grindset/ReachTests.lean @@ -0,0 +1,102 @@ +import Benchmark.Grindset.Reach +import Mathlib.Logic.Relation + +/-! +# Grindset: Reach closure — demo proofs + +These two tests demonstrate that the `Reach.lean` extension really +does close reach-closure obligations. They are *independent* of any +case's `Proofs.lean` — both theorems are authored from scratch using +only the specs side (an abstract `step` / `next` function and a +user-supplied step-preservation hypothesis). + +Both tests are closed using `verity_reach_grind`, the macro defined in +`Benchmark.Grindset.Reach`. +-/ + +set_option linter.unusedSectionVars false + +namespace Benchmark.Grindset.Reach.Tests + +open Benchmark.Grindset.Reach + +/-! ## Demo 1 — inductive `ReflTransGen` invariant preservation + +A small linked-list style state: the state is a function `Nat → Nat` +mapping each slot to the "next" slot. The step relation says `a` can +step to `b` in state `f` iff `f a = b`. We prove that any invariant +which is preserved by one step is preserved under the full transitive +closure — the standard "reach-preserves-invariant" shape. + +This closes via the generic `ReflTransGen`-tagged lemmas. +-/ + +def stepRel (f : Nat → Nat) (a b : Nat) : Prop := f a = b + +/-- +If `P` is closed under `stepRel f` then `P` is closed under +`Relation.ReflTransGen (stepRel f)`. Closed by `verity_reach_grind`. +-/ +theorem demo_reach_preserves_P + (f : Nat → Nat) (P : Nat → Prop) + (hStep : ∀ x, P x → P (f x)) + (a b : Nat) (hR : Relation.ReflTransGen (stepRel f) a b) (hPa : P a) : + P b := by + have hStep' : ∀ x y, stepRel f x y → P x → P y := by + intro x y hxy hPx + -- stepRel f x y unfolds to f x = y + have : f x = y := hxy + exact this ▸ hStep x hPx + -- Our macro tries plain grind first, then the closure lemma. + verity_reach_grind + +/-! ## Demo 2 — chain-witness reach preserves a set-membership invariant + +Here we mirror the exact shape used in `Safe/OwnerManagerReach`: a +witnessed chain `Reachable step s a b`, a state-dependent step +function, and an invariant (membership in a step-closed set) that must +propagate along the chain. + +The proof is closed using `verity_reach_grind`, which invokes +`reachable_preserves_invariant` under the hood. +-/ + +/-- +State type: a function from `Nat` (a node) to its successor. The step +function is just state application. +-/ +def chainStep (f : Nat → Nat) (a : Nat) : Nat := f a + +/-- +If a set `S` is closed under `chainStep f` (i.e. `x ∈ S → f x ∈ S`) +and `Reachable (chainStep) f a b` holds, then `a ∈ S → b ∈ S`. + +This is the *exact* reach-closure obligation pattern from the Safe +OwnerManagerReach specs (once one specialises `σ := ContractState`, +`α := Address`, `chainStep := next`, and takes `S` to be any +`next`-closed address set such as "nodes reachable from SENTINEL"). +-/ +theorem demo_chain_reach_preserves_membership + (f : Nat → Nat) (S : Set Nat) + (hClosed : ∀ x, x ∈ S → f x ∈ S) + (a b : Nat) (hR : Reachable chainStep f a b) (hA : a ∈ S) : + b ∈ S := by + -- `chainStep f x = f x` by definition, so membership-closure under + -- `f` is exactly membership-closure under `chainStep`. + have hStep : ∀ x, x ∈ S → chainStep f x ∈ S := hClosed + verity_reach_grind + +/-! ## Sanity: the closure lemmas also let `grind` chain concrete steps -/ + +/-- Three-step chain: builds a reach by stacking `reachable_step`. -/ +example (f : Nat → Nat) (a : Nat) : + Reachable chainStep f a (f (f (f a))) := by + -- Each `reachable_step` gives one hop; the trans lemma chains them. + have h1 : Reachable chainStep f a (f a) := reachable_step chainStep f a + have h2 : Reachable chainStep f (f a) (f (f a)) := + reachable_step chainStep f (f a) + have h3 : Reachable chainStep f (f (f a)) (f (f (f a))) := + reachable_step chainStep f (f (f a)) + exact reachable_trans chainStep f (reachable_trans chainStep f h1 h2) h3 + +end Benchmark.Grindset.Reach.Tests diff --git a/Benchmark/Grindset/Tests.lean b/Benchmark/Grindset/Tests.lean new file mode 100644 index 00000000..89dbe044 --- /dev/null +++ b/Benchmark/Grindset/Tests.lean @@ -0,0 +1,89 @@ +/- + Benchmark.Grindset.Tests — demonstration proofs closed by a single `grind`. + + These proofs are written from scratch against `Specs.lean` + `Contract.lean`. + They deliberately do NOT import any `Proofs.lean` from under + `Benchmark/Cases/` — the held-out ground truth is never consulted. + + Each demo theorem has the same shape as the sorry-stubs in + `Benchmark/Generated/.../Tasks/*.lean`, and is discharged by a single + invocation of `grind` (plus, where needed, an `unfold` of the spec + predicate). +-/ + +import Benchmark.Grindset.Core +import Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.Specs +import Benchmark.Cases.Lido.VaulthubLocked.Specs + +namespace Benchmark.Grindset.Tests + +open Verity +open Verity.EVM.Uint256 + +/-! ## SideEntrance.deposit: slot-write spec -/ + +/-- +Demo #1: `deposit` writes `add oldPoolBalance amount` to `poolBalance`. +Closed by a single `grind` call once we unfold the spec predicate and +the contract function. +-/ +theorem demo_deposit_sets_pool_balance + (amount : Verity.Core.Uint256) + (s : ContractState) : + let s' := + ((Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.deposit amount).run s).snd + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.deposit_sets_pool_balance_spec + amount s s' := by + simp only [grind_norm, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.deposit_sets_pool_balance_spec, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.deposit, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.poolBalance, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.totalCredits, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.creditOf] + grind + +/-- +Demo #2: `deposit` credits the caller's mapping slot by `amount`. +This is the "mapping + sender" variant; we rely on +`storageMap_setMapping_sender_eq` (from `Core.lean`) plus `grind_norm` to +collapse the monadic do-block. +-/ +theorem demo_deposit_sets_sender_credit + (amount : Verity.Core.Uint256) + (s : ContractState) : + let s' := + ((Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.deposit amount).run s).snd + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.deposit_sets_sender_credit_spec + amount s s' := by + simp only [grind_norm, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.deposit_sets_sender_credit_spec, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.deposit, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.poolBalance, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.totalCredits, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.creditOf] + grind + +/-- +Demo #3: `flashLoanViaDeposit` preserves pool balance. This is a branchy +case because the function body starts with a `require (amount <= oldPoolBalance)`. +The precondition `hBorrow` discharges the branch; the remaining reasoning is +the same slot-write logic as `deposit`. +-/ +theorem demo_flashLoanViaDeposit_preserves_pool_balance + (amount : Verity.Core.Uint256) + (s : ContractState) + (hBorrow : amount <= s.storage 0) : + let s' := + ((Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.flashLoanViaDeposit + amount).run s).snd + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.flashLoanViaDeposit_preserves_pool_balance_spec + amount s s' := by + simp only [grind_norm, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.flashLoanViaDeposit_preserves_pool_balance_spec, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.flashLoanViaDeposit, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.poolBalance, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.totalCredits, + Benchmark.Cases.DamnVulnerableDeFi.SideEntrance.SideEntrance.creditOf, hBorrow] + grind + +end Benchmark.Grindset.Tests diff --git a/harness/PROMPT.md b/harness/PROMPT.md index a38026de..7a08a75d 100644 --- a/harness/PROMPT.md +++ b/harness/PROMPT.md @@ -6,6 +6,46 @@ Each task gives the agent: - one editable proof file - one theorem target -The agent must return the full proof file. It must not change specs, change implementations, or rely on hidden solved proofs. +The agent must return the full proof file. It must not change specs, change +implementations, or rely on hidden solved proofs. -The harness rejects placeholders, runs Lean in a temp workspace, and checks the target theorem. +The harness rejects placeholders, runs Lean in a temp workspace, and checks +the target theorem. + +## Proof strategy + +Every generated task skeleton already imports `Benchmark.Grindset` and starts +with a grind-first body of the form: + +```lean +theorem foo ... := by + unfold foo_spec + grind [ContractName.fn, ContractName.fieldA, ContractName.fieldB] +``` + +That is the pattern to keep. Your first attempt should always be: + +1. Keep `unfold ` on the first line of the proof. +2. Call `grind [ContractName.fn, ]`. + Include every storage field declared inside `verity_contract ContractName` + — extra hints are cheap, missing hints are expensive. Do NOT hint the + generic operational lemmas (`getStorage`, `setStorage`, `Verity.bind`, + `Contract.run`, `ContractResult.snd`, …); they are already tagged + `@[grind]` by `Benchmark.Grindset`. +3. If the goal has a case split, introduce the branch hypotheses with + `by_cases` BEFORE the `grind` call and pass each hypothesis into the + `grind [...]` list alongside the contract hints. +4. If `grind` leaves goals open, call `grind?` once on the stuck state. It + prints the concrete lemma set grind chose; copy any additions you see back + into your `grind [...]` hint list, then retry. +5. Only if `grind` still fails after the above, fall back to the simp-heavy + recipe in `harness/PROOF_PATTERNS.md` (`simp` / `simp_all` with the + operational lemmas enumerated explicitly, optionally finished with + `native_decide`). + +Do not remove `import Benchmark.Grindset`, do not remove `unfold `, and +do not revert to a pure `simp`-only pattern unless you have first tried +`grind` with a complete hint list and observed it fail. + +See `harness/PROOF_PATTERNS.md` for worked examples of both the grind-first +primary pattern and the simp/`by_cases` fallback. diff --git a/harness/PROOF_PATTERNS.md b/harness/PROOF_PATTERNS.md index 8d2c337d..a47ae971 100644 --- a/harness/PROOF_PATTERNS.md +++ b/harness/PROOF_PATTERNS.md @@ -2,61 +2,125 @@ Use public operational proof patterns, not hidden case solutions. -Verity execution proofs often reduce with `simp` once the execution path is fixed. -Typical symbols to unfold or simplify are: - -- `getStorage`, `setStorage`, `setMapping`, `setMappingUint` -- `Verity.require`, `Verity.bind`, `Bind.bind` -- `Verity.pure`, `Pure.pure` -- `Contract.run`, `ContractResult.snd` -- the contract's storage labels, such as `ContractName.counter` - -The simp set MUST include ALL storage field definitions from the contract. Storage fields are declared as `fieldName : Uint256 := slot N` inside `verity_contract`. Include each one by name (e.g., `ContractName.depositCount`, `ContractName.chainStarted`) so that `.slot` reduces to the concrete slot number. Without these, simp leaves unresolved `if` expressions comparing `s.storage ContractName.field.slot` against constants. - -Common pattern for a successful-path slot-write theorem: +Lean 4.22's `grind` tactic is the primary closer for Verity execution proofs. +Every generated task skeleton imports `Benchmark.Grindset`, which bundles the +`@[grind]`-tagged operational lemmas (`getStorage`, `setStorage`, +`setMapping`, `setMappingUint`, `Verity.require`, `Verity.bind`, `Bind.bind`, +`Verity.pure`, `Pure.pure`, `Contract.run`, `ContractResult.snd`, and friends) +needed to reduce Verity execution terms. You should lean on `grind` first and +only fall back to `simp`/`by_cases` if grind leaves goals open. + +## Primary: grind-first pattern + +Start with `unfold` on the spec name followed by `grind [...]` passing the +contract function you are reasoning about and every storage field it touches. +Storage fields are declared as `fieldName : Uint256 := slot N` inside +`verity_contract`; hint each one by its fully-qualified name +(e.g. `ContractName.depositCount`, `ContractName.chainStarted`) so `grind` can +reduce `.slot` to the concrete slot number. ```lean -private theorem slot_write_helper +theorem slot_write_theorem (x : Uint256) (s : ContractState) (hGuard : ...) : let s' := ((ContractName.fn x).run s).snd - s'.storage slot = expected := by - simp [ContractName.fn, hGuard, ContractName.slotField, - getStorage, setStorage, Verity.require, Verity.bind, Bind.bind, - Verity.pure, Pure.pure, Contract.run, ContractResult.snd] + spec_name x s s' := by + unfold spec_name + grind [ContractName.fn, + ContractName.fieldA, ContractName.fieldB, ContractName.fieldC] ``` -Common pattern for a branch theorem: +Rules of thumb for the grind hint list: + +- Always include `ContractName.fn` for the contract function under test. +- Always include every storage field of `ContractName` that the function + reads or writes (when in doubt, include them all — extra hints are cheap). +- If the spec references another helper function (e.g. `computedClaimAmount`), + add that helper name too so `grind` can unfold it. +- You do NOT need to hint the operational lemmas (`getStorage`, `setStorage`, + `Verity.bind`, `Contract.run`, `ContractResult.snd`, ...). They are already + tagged `@[grind]` via `Benchmark.Grindset`. + +If `grind` leaves the goal visibly closer but not closed, use `grind?` once +to print the actual lemma set it chose; copy any useful additions back into +your `grind [...]` hint list, then retry. + +## Branching with grind + +When the contract has a case split (an `ite`, a `require` with a non-trivial +condition, or nested `if`s in the spec), prove the branch facts first and +pass them to `grind` along with the usual hints: ```lean -by_cases hBranch : condition -· simp [ContractName.fn, hBranch, ...] -· have hNotBranch : ¬ condition := hBranch - simp [ContractName.fn, hNotBranch, ...] +theorem branch_theorem ... := by + by_cases hBranch : condition + · unfold spec_name + grind [ContractName.fn, ContractName.field, hBranch] + · have hNotBranch : ¬ condition := hBranch + unfold spec_name + grind [ContractName.fn, ContractName.field, hNotBranch] ``` -Do not use `split` on the final post-state goal unless the goal itself is explicitly a conjunction or a sum-type elimination. Generated Verity execution terms often simplify better if you first prove the exact branch facts used by the contract and then call `simp`. +For nested conditionals (e.g. a threshold check inside a deposit-size check), +nest `by_cases` the same way and put every branch hypothesis into the +`grind [...]` list: -For arithmetic threshold branches, the negated fact often needs to be restated in the comparator form used by the generated code. Example: +```lean +by_cases hBig : depositAmount >= 32000000000 +· by_cases hThresh : add (s.storage 1) 1 = 65536 + · grind [ContractName.fn, ContractName.field, hCount, hMin, hBig, hThresh] + · grind [ContractName.fn, ContractName.field, hCount, hMin, hBig, hThresh] +· grind [ContractName.fn, ContractName.field, hCount, hMin, hBig] +``` + +For arithmetic threshold branches, restate the negated fact in the comparator +form used by the generated code before handing it to `grind`: ```lean have hNotFull : ¬ 32000000000 ≤ depositAmount := Nat.not_le_of_lt hSmall -simp [ContractName.fn, hCount, hMin, hNotFull, ...] +grind [ContractName.fn, ContractName.field, hCount, hMin, hNotFull] ``` -If one theorem has to work for both sides of a branch, prove two private helpers first, one per branch, then use `by_cases` in the public theorem and `simpa using` the matching helper. +If one theorem has to work for both sides of a branch, prove two private +helpers first (one per branch, each closed by `grind`), then `by_cases` in +the public theorem and finish each branch with `exact helper_branch ...`. + +## Fallback: simp + by_cases -If `simp` leaves nested `match`/`if` expressions with free variables, use `by_cases` on each unresolved condition BEFORE calling `simp`, not `split` after. Pass all case hypotheses to `simp`. For contracts with nested conditionals (e.g., a threshold check inside a deposit-size check), nest `by_cases`: +If `grind` still leaves goals after you have unfolded the spec and hinted the +contract function plus every storage field, fall back to the pre-grindset +simp-heavy recipe. This is strictly a fallback; prefer to extend the `grind` +hint list first. ```lean -by_cases hBig : depositAmount >= 32000000000 -· by_cases hThresh : add (s.storage 1) 1 = 65536 - · simp [ContractName.fn, getStorage, setStorage, ..., hCount, hMin, hBig, hThresh] - · simp [ContractName.fn, getStorage, setStorage, ..., hCount, hMin, hBig, hThresh] -· simp [ContractName.fn, getStorage, setStorage, ..., hCount, hMin, hBig] +-- Fallback when grind alone does not close: +by_cases hBranch : condition +· simp [ContractName.fn, hBranch, ContractName.slotField, + getStorage, setStorage, Verity.require, Verity.bind, Bind.bind, + Verity.pure, Pure.pure, Contract.run, ContractResult.snd] +· have hNotBranch : ¬ condition := hBranch + simp [ContractName.fn, hNotBranch, ContractName.slotField, + getStorage, setStorage, Verity.require, Verity.bind, Bind.bind, + Verity.pure, Pure.pure, Contract.run, ContractResult.snd] ``` -If `simp` leaves unsolved goals because a hypothesis uses a spec helper name (e.g., `computedClaimAmount`) while the goal has the definition already unfolded, use `simp_all` instead of `simp`. `simp_all` rewrites hypotheses into the goal context, resolving name mismatches automatically. Pattern: +The simp set MUST include every storage field definition from the contract. +Without them, `simp` leaves unresolved `if` expressions comparing +`s.storage ContractName.field.slot` against constants. + +Do not use `split` on the final post-state goal unless the goal itself is +explicitly a conjunction or a sum-type elimination. Generated Verity +execution terms often simplify better if you first prove the exact branch +facts used by the contract and then call `simp`. + +If `simp` leaves nested `match`/`if` expressions with free variables, use +`by_cases` on each unresolved condition BEFORE calling `simp`, not `split` +after. Pass all case hypotheses to `simp`. + +If `simp` leaves unsolved goals because a hypothesis uses a spec helper name +(e.g., `computedClaimAmount`) while the goal has the definition already +unfolded, use `simp_all` instead of `simp`. `simp_all` rewrites hypotheses +into the goal context, resolving name mismatches automatically. ```lean unfold specName @@ -66,9 +130,9 @@ simp_all [ContractName.fn, getStorage, setStorage, getMapping, setMapping, specHelper] ``` -If `simp` reduces the goal to concrete slot equalities or a finite `if` over concrete slot numbers, `native_decide` or `decide` often closes the remaining goal. - -Typical shape: +If `simp` reduces the goal to concrete slot equalities or a finite `if` over +concrete slot numbers, `native_decide` or `decide` often closes the remaining +goal: ```lean have hSlot : s'.storage slot = expected := by @@ -76,7 +140,8 @@ have hSlot : s'.storage slot = expected := by native_decide ``` -If `simp` already solves the goal, do not leave a trailing `decide`, `exact`, or extra tactic line after it; Lean will report `no goals to be solved`. +If `simp` already solves the goal, do not leave a trailing `decide`, `exact`, +or extra tactic line after it; Lean will report `no goals to be solved`. If the public theorem is just a named spec, it is often cleaner to: diff --git a/harness/README.md b/harness/README.md index 71cb8a40..997bed04 100644 --- a/harness/README.md +++ b/harness/README.md @@ -23,10 +23,12 @@ Core files: - `harness/agents/*.json`: bundled profiles Bundled profiles: -- `default`: repo reference profile -- `interactive`: minimal-tool interactive profile +- `default`: repo reference profile (strict, builtin/fast via proxy) - `openai-compatible`: generic external OpenAI-compatible profile - `openai-proxy-fast`: pinned proxy profile +- `interactive-gpt`: interactive, OpenRouter `openai/gpt-5.4` +- `interactive-opus`: interactive, OpenRouter `anthropic/claude-opus-4.7` +- `interactive-smart`: interactive, `builtin/smart` via configured proxy Runtime modes: - `strict`: no agent tools diff --git a/harness/agents/builtin-smart.json b/harness/agents/builtin-smart.json deleted file mode 100644 index 1cf2d9e5..00000000 --- a/harness/agents/builtin-smart.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "schema_version": 1, - "agent_id": "verity-benchmark-builtin-smart", - "track": "reference", - "run_slug": "builtin-smart", - "adapter": "openai_compatible", - "base_url": "https://agent-backend.thomas.md/v1", - "base_url_env": null, - "model": "builtin/smart", - "model_env": null, - "api_key": null, - "api_key_env": "VERITY_BENCHMARK_AGENT_API_KEY", - "chat_completions_path": "/chat/completions", - "models_path": "/models", - "system_prompt_files": [ - "harness/PROMPT.md", - "harness/POLICY.md", - "harness/TOOLS.md", - "harness/PROOF_PATTERNS.md" - ], - "mode": "strict", - "temperature": 0.0, - "max_completion_tokens": 2000, - "max_attempts": 8, - "max_tool_calls": 24, - "headers": {}, - "header_envs": {}, - "extra_body": { - "thinking": { - "type": "disabled" - } - }, - "request_timeout_seconds": 120 -} diff --git a/harness/agents/combined-lean-tools.json b/harness/agents/interactive-gpt.json similarity index 67% rename from harness/agents/combined-lean-tools.json rename to harness/agents/interactive-gpt.json index e8fcfa17..2c1b823e 100644 --- a/harness/agents/combined-lean-tools.json +++ b/harness/agents/interactive-gpt.json @@ -1,11 +1,12 @@ { "schema_version": 1, - "agent_id": "combined-lean-tools", + "agent_id": "interactive-gpt", + "mode": "interactive", "track": "custom", - "run_slug": "combined-lean-tools", + "run_slug": "interactive-gpt-5-4", "adapter": "openai_compatible", "base_url": "https://openrouter.ai/api/v1", - "model": "google/gemini-3.1-flash-lite-preview", + "model": "openai/gpt-5.4", "api_key_env": "OPENROUTER_API_KEY", "chat_completions_path": "/chat/completions", "models_path": "/models", @@ -15,13 +16,12 @@ "harness/TOOLS.md", "harness/PROOF_PATTERNS.md" ], - "mode": "interactive", "temperature": 0.0, - "max_completion_tokens": 2000, - "max_attempts": 12, - "max_tool_calls": 24, + "max_completion_tokens": 4096, + "max_attempts": 32, + "max_tool_calls": 80, "headers": {}, "header_envs": {}, "extra_body": {}, - "request_timeout_seconds": 120 + "request_timeout_seconds": 180 } diff --git a/harness/agents/interactive-candidate.json b/harness/agents/interactive-opus.json similarity index 54% rename from harness/agents/interactive-candidate.json rename to harness/agents/interactive-opus.json index 217809e4..2e2d4a9f 100644 --- a/harness/agents/interactive-candidate.json +++ b/harness/agents/interactive-opus.json @@ -1,16 +1,13 @@ { "schema_version": 1, - "agent_id": "openai-interactive", + "agent_id": "interactive-opus", "mode": "interactive", "track": "custom", - "run_slug": "interactive-candidate", + "run_slug": "interactive-opus-4-7", "adapter": "openai_compatible", - "base_url": null, - "base_url_env": "VERITY_BENCHMARK_AGENT_BASE_URL", - "model": null, - "model_env": "VERITY_BENCHMARK_AGENT_MODEL", - "api_key": null, - "api_key_env": "VERITY_BENCHMARK_AGENT_API_KEY", + "base_url": "https://openrouter.ai/api/v1", + "model": "anthropic/claude-opus-4.7", + "api_key_env": "OPENROUTER_API_KEY", "chat_completions_path": "/chat/completions", "models_path": "/models", "system_prompt_files": [ @@ -20,9 +17,9 @@ "harness/PROOF_PATTERNS.md" ], "temperature": 0.0, - "max_completion_tokens": 3000, - "max_attempts": 16, - "max_tool_calls": 24, + "max_completion_tokens": 4096, + "max_attempts": 32, + "max_tool_calls": 80, "headers": {}, "header_envs": {}, "extra_body": {}, diff --git a/harness/agents/interactive-smart.json b/harness/agents/interactive-smart.json index 82d45275..b0095371 100644 --- a/harness/agents/interactive-smart.json +++ b/harness/agents/interactive-smart.json @@ -21,8 +21,8 @@ ], "temperature": 0.0, "max_completion_tokens": 2000, - "max_attempts": 16, - "max_tool_calls": 24, + "max_attempts": 32, + "max_tool_calls": 80, "headers": {}, "header_envs": {}, "extra_body": { diff --git a/harness/agents/interactive.json b/harness/agents/interactive.json deleted file mode 100644 index 8c9bf850..00000000 --- a/harness/agents/interactive.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "schema_version": 1, - "agent_id": "openai-interactive", - "mode": "interactive", - "track": "custom", - "run_slug": "interactive-proxy", - "adapter": "openai_compatible", - "base_url": null, - "base_url_env": "VERITY_BENCHMARK_AGENT_BASE_URL", - "model": null, - "model_env": "VERITY_BENCHMARK_AGENT_MODEL", - "api_key": null, - "api_key_env": "VERITY_BENCHMARK_AGENT_API_KEY", - "chat_completions_path": "/chat/completions", - "models_path": "/models", - "system_prompt_files": [ - "harness/PROMPT.md", - "harness/POLICY.md", - "harness/TOOLS.md", - "harness/PROOF_PATTERNS.md" - ], - "temperature": 0.0, - "max_completion_tokens": 2000, - "max_attempts": 16, - "max_tool_calls": 24, - "headers": {}, - "header_envs": {}, - "extra_body": { - "thinking": { - "type": "disabled" - } - }, - "request_timeout_seconds": 120 -} diff --git a/harness/agents/leanstral.json b/harness/agents/leanstral.json deleted file mode 100644 index a9a10779..00000000 --- a/harness/agents/leanstral.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "schema_version": 1, - "agent_id": "leanstral-completion", - "mode": "custom", - "track": "custom", - "run_slug": "leanstral", - "adapter": "command", - "base_url": "https://spark-de79.gazella-vector.ts.net", - "base_url_env": null, - "model": "mistralai_Leanstral-128x3.9B-2603-Q4_K_M.gguf", - "model_env": null, - "api_key": null, - "api_key_env": null, - "chat_completions_path": "/completion", - "models_path": "/models", - "system_prompt_files": [ - "harness/PROMPT.md", - "harness/POLICY.md", - "harness/TOOLS.md", - "harness/PROOF_PATTERNS.md" - ], - "temperature": 0.0, - "max_completion_tokens": 2000, - "max_attempts": 8, - "max_tool_calls": 24, - "headers": {}, - "header_envs": {}, - "extra_body": {}, - "command": [ - "python3", - "harness/leanstral_completion_adapter.py" - ], - "request_timeout_seconds": 120 -} diff --git a/harness/agents/openrouter-gemini-3.1-flash-lite-preview.json b/harness/agents/openrouter-gemini-3.1-flash-lite-preview.json deleted file mode 100644 index 2468ee7c..00000000 --- a/harness/agents/openrouter-gemini-3.1-flash-lite-preview.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "schema_version": 1, - "agent_id": "openrouter-gemini-3.1-flash-lite-preview", - "track": "custom", - "run_slug": "openrouter-gemini-3.1-flash-lite-preview", - "adapter": "openai_compatible", - "base_url": "https://openrouter.ai/api/v1", - "base_url_env": null, - "model": "google/gemini-3.1-flash-lite-preview", - "model_env": null, - "api_key": null, - "api_key_env": "OPENROUTER_API_KEY", - "chat_completions_path": "/chat/completions", - "models_path": "/models", - "system_prompt_files": [ - "harness/PROMPT.md", - "harness/POLICY.md", - "harness/TOOLS.md", - "harness/PROOF_PATTERNS.md" - ], - "mode": "strict", - "temperature": 0.0, - "max_completion_tokens": 2000, - "max_attempts": 8, - "max_tool_calls": 24, - "headers": {}, - "header_envs": {}, - "extra_body": { - "thinking": { - "type": "disabled" - } - }, - "request_timeout_seconds": 120 -} diff --git a/harness/default_agent.py b/harness/default_agent.py index fb237396..4d2b15ee 100644 --- a/harness/default_agent.py +++ b/harness/default_agent.py @@ -8,6 +8,7 @@ import re import subprocess import sys +import random import time from dataclasses import dataclass from datetime import datetime, timezone @@ -16,7 +17,14 @@ from urllib import error, request from benchmark_config import load_benchmark_agent_defaults -from interactive_runtime import TaskProofRuntime, tool_result_json, extract_contract_simp_terms, classify_failure +from interactive_runtime import ( + TaskProofRuntime, + classify_failure, + extract_contract_simp_terms, + prebuild_task_modules, + tool_result_json, + _PREFLIGHT_FAILURE_MODES as _RUNTIME_PREFLIGHT_FAILURE_MODES, +) from task_runner import ROOT, load_task_record, resolve_task_manifest AGENT_RESULTS_DIR = ROOT / "results" / "agent_runs" @@ -452,9 +460,49 @@ def resolve_config(path: Path, *, require_secrets: bool, profile: str | None = N ) +def _synthesized_interactive_tools_prompt() -> str: + """Render the real interactive tool surface from TaskProofRuntime.tool_specs(). + + Replaces the static harness/TOOLS.md which advertises `lake build`, `scripts/run_task.sh`, + and `scripts/run_all.sh` — none of which are actually callable in interactive mode. + """ + lines = [ + "# Interactive Tool Surface", + "", + "You have exactly these function tools. Call them; do NOT call shell commands:", + "", + ] + # Build a minimal task shim to get tool_specs without instantiating a real task. + # Note: tool_specs() uses self.paths.public_files for the read_public_file enum, + # so we enumerate generic names here instead of calling tool_specs() directly. + surface = [ + ("read_public_file(path)", "Read one of the task's public Lean files (impl/spec/editable)."), + ("write_editable_proof(content)", "Replace the editable proof file AND automatically run the Lean check. Response reports status (passed/failed), failure_mode, details, failure_class, and repair_hints. A separate run_lean_check call is not needed after this."), + ("run_lean_check()", "Re-run `lake env lean` without changing the file (redundant immediately after write_editable_proof)."), + ("inspect_lean_goals()", "Inspect goal state at explicit `?_` holes. Unsupported if no hole present."), + ("try_tactic_at_hole(tactic)", "Replace all `?_` holes with a tactic and check. Pass a raw tactic (e.g. `omega`, `simp_all`, `decide`); substitution auto-wraps as `(by tac)` at term positions like `exact ?_`. Preserves original proof on failure."), + ("search_public_defs(query)", "Search the task's public impl/spec files for def/theorem/lemma names. Does NOT search Lean core / Batteries / Mathlib — use `exact?`/`apply?`/`rw?` via `try_tactic_at_hole` for standard-library lemmas."), + ] + for name, desc in surface: + lines.append(f"- `{name}` — {desc}") + lines.extend([ + "", + "Typical loop: write_editable_proof (which runs Lean) → read repair_hints → iterate.", + "`?_` is a PROBE for `inspect_lean_goals` / `try_tactic_at_hole`, never a final proof — Lean rejects every submission containing `?_`.", + "Do NOT emit `lake build` or `scripts/...`; there is no shell tool.", + ]) + return "\n".join(lines) + + def build_system_prompt(config: ResolvedAgentConfig) -> str: sections = [] for rel_path in config.system_prompt_files: + # In interactive mode, replace the static TOOLS.md (which advertises shell + # commands that don't exist) with a synthesized description of the real + # function-tool surface. + if config.mode == "interactive" and rel_path.endswith("TOOLS.md"): + sections.append(f"[{rel_path}]\n{_synthesized_interactive_tools_prompt()}") + continue path = ROOT / rel_path sections.append(f"[{rel_path}]\n{path.read_text(encoding='utf-8').strip()}") return "\n\n".join(sections).strip() @@ -559,9 +607,9 @@ def build_user_prompt(task: dict[str, Any], *, interactive: bool) -> str: "You are in interactive mode with verification tools.\n" "All implementation, specification, and editable proof files are already provided below. " "Do NOT re-read them with read_public_file — start working immediately.\n" - "Workflow: call write_editable_proof with your complete proof file, then call run_lean_check to verify.\n" + "Workflow: call write_editable_proof with your complete proof file — it returns the Lean check result directly, you do NOT need a separate run_lean_check call afterward.\n" "If the check fails, read the failure_class and repair_hints in the result.\n" - "For unknown_identifier errors: use search_public_defs to find correct names.\n" + "For unknown_identifier errors: read the repair_hints before searching — the missing name may be a tactic in term position (wrap in `by`), a local binder (call inspect_lean_goals instead), or a Mathlib lemma (this workspace has NO Mathlib; use `omega`/`ring`/`simp arith`). Only call search_public_defs for a genuine project-defined name from the implementation or spec file.\n" "For unsolved_goals: use inspect_lean_goals with a ?_ hole to see the exact goal, then write targeted tactics.\n" "Fix the specific error, write the corrected proof, and re-check. Do not rewrite from scratch unless the approach is fundamentally wrong.\n" "Only use read_public_file or search_public_defs if you need a definition not shown below.\n" @@ -860,7 +908,13 @@ def build_attempt_trace( "candidate_sha256": stable_digest(candidate_text), "status": status, "failure_mode": failure_mode, - "candidate_changed_from_previous": None if previous_attempt is None else candidate_text != previous_candidate, + # Treat the first non-empty candidate as a change (previously was None, which + # broke candidate_change_count analytics — every successful run showed 0). + "candidate_changed_from_previous": ( + bool(candidate_text.strip()) + if previous_attempt is None + else candidate_text != previous_candidate + ), "failure_mode_changed_from_previous": ( None if previous_attempt is None else failure_mode != previous_trace.get("failure_mode") ), @@ -942,21 +996,42 @@ def build_run_analysis( reasoning_attempts = 0 candidate_change_count = 0 failure_mode_change_count = 0 + distinct_candidate_hashes: set[str] = set() + previous_candidate = "" for attempt in attempts: - trace = attempt.get("trace", {}) - if not isinstance(trace, dict): - continue - if int(trace.get("provider_reasoning_chars") or 0) > 0: - reasoning_attempts += 1 - if trace.get("candidate_changed_from_previous") is True: - candidate_change_count += 1 - if trace.get("failure_mode_changed_from_previous") is True: - failure_mode_change_count += 1 + trace = attempt.get("trace", {}) or {} + if isinstance(trace, dict): + if int(trace.get("provider_reasoning_chars") or 0) > 0: + reasoning_attempts += 1 + if trace.get("candidate_changed_from_previous") is True: + candidate_change_count += 1 + if trace.get("failure_mode_changed_from_previous") is True: + failure_mode_change_count += 1 + candidate_hash = trace.get("candidate_sha256") + if isinstance(candidate_hash, str) and candidate_hash and int(trace.get("candidate_chars") or 0) > 0: + distinct_candidate_hashes.add(candidate_hash) + # Fallback for interactive-mode attempts that do not populate `trace`: + # derive candidate changes/hashes directly from candidate_file_contents. + # Count every transition (incl. reverts like A -> B -> A), and record + # each distinct hash separately. Skip this block entirely when `trace` + # is already populated, so non-interactive traces are not redundantly + # re-hashed (which would be harmless while digests match but fragile + # if the two derivation paths ever diverge). + trace_has_hash = isinstance(trace, dict) and bool(trace.get("candidate_sha256")) + if not trace_has_hash: + candidate_text = str(attempt.get("candidate_file_contents", "")) + if candidate_text.strip(): + candidate_hash = stable_digest(candidate_text) + distinct_candidate_hashes.add(candidate_hash) + if candidate_text != previous_candidate: + candidate_change_count += 1 + previous_candidate = candidate_text return { "attempt_count": len(attempts), "tool_calls_used": tool_calls_used, "reasoning_attempt_count": reasoning_attempts, "candidate_change_count": candidate_change_count, + "distinct_candidate_count": len(distinct_candidate_hashes), "failure_mode_change_count": failure_mode_change_count, "final_failure_mode": evaluation.get("failure_mode"), "final_status": evaluation.get("status"), @@ -984,47 +1059,208 @@ def build_finalization_messages( ] +RETRY_STATUS_CODES = frozenset({408, 409, 425, 429, 500, 502, 503, 504}) +MAX_CHAT_COMPLETION_RETRIES = 6 + + +def _parse_retry_after(value: str | None) -> float | None: + """Parse an HTTP `Retry-After` header. + + Accepts both forms permitted by RFC 7231: + - delta-seconds (e.g. "120") + - HTTP-date (e.g. "Wed, 21 Oct 2015 07:28:00 GMT") + + Returns the number of seconds to wait, or None if the value cannot be + parsed. A date in the past is clamped to 0. + """ + if not value: + return None + value = value.strip() + if not value: + return None + try: + return max(0.0, float(value)) + except ValueError: + pass + try: + from email.utils import parsedate_to_datetime + import datetime as _dt + + parsed = parsedate_to_datetime(value) + if parsed is None: + return None + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=_dt.timezone.utc) + delta = (parsed - _dt.datetime.now(_dt.timezone.utc)).total_seconds() + return max(0.0, delta) + except (TypeError, ValueError): + return None + + +def _backoff_delay(attempt: int, retry_after: float | None) -> float: + if retry_after is not None: + # Honour the provider-requested wait. Clamp only at a safety ceiling + # (10 minutes) so a pathological header cannot stall the run + # indefinitely; the previous 60s clamp was too aggressive and caused + # retries to fire while the rate limit was still in force. Add a + # small additive jitter (up to 1s) so concurrent workers hitting the + # same Retry-After do not thunder back in lockstep. + clamped = min(retry_after, 600.0) + return clamped + random.random() + # Exponential backoff with jitter, capped at 30s. + base = min(30.0, 2.0 ** attempt) + return base * (0.5 + random.random() * 0.5) + + +def _post_chat_completion( + config: ResolvedAgentConfig, + payload: dict[str, Any], + model: str, +) -> dict[str, Any]: + """POST one chat completion request with retries on transient failures. + + Retries on HTTP 408/409/425/429/500/502/503/504 and URL-level errors (timeouts) + using exponential backoff with jitter, respecting Retry-After when present. + """ + url = f"{config.base_url}{config.chat_completions_path}" + body_payload = dict(payload) + body_payload["model"] = model + req_body = json.dumps(body_payload).encode("utf-8") + headers = { + "Authorization": f"Bearer {config.api_key}", + "Content-Type": "application/json", + "User-Agent": "verity-benchmark/0.1", + **config.headers, + } + last_error: str | None = None + for attempt in range(MAX_CHAT_COMPLETION_RETRIES): + req = request.Request(url, data=req_body, headers=headers, method="POST") + try: + with request.urlopen(req, timeout=config.request_timeout_seconds) as response: + body = response.read().decode("utf-8") + try: + return json.loads(body) + except json.JSONDecodeError as exc: + # Non-JSON 200 responses (HTML error pages from a CDN or load + # balancer mid-deploy are common) must be treated as transient + # failures so the retry loop and fallback-model chain can take + # over, not as SystemExit which aborts the whole task. + last_error = f"non-JSON response: {body[:200]!r}" + if attempt == MAX_CHAT_COMPLETION_RETRIES - 1: + raise _ChatCompletionError(status=0, detail=last_error, model=model) from exc + time.sleep(_backoff_delay(attempt, None)) + continue + except error.HTTPError as exc: + detail = exc.read().decode("utf-8", errors="replace") + last_error = f"HTTP {exc.code}: {detail[:400]}" + if exc.code not in RETRY_STATUS_CODES or attempt == MAX_CHAT_COMPLETION_RETRIES - 1: + raise _ChatCompletionError(status=exc.code, detail=detail, model=model) from exc + retry_after = _parse_retry_after(exc.headers.get("Retry-After") if exc.headers else None) + time.sleep(_backoff_delay(attempt, retry_after)) + continue + except error.URLError as exc: + last_error = f"URL error: {exc}" + if attempt == MAX_CHAT_COMPLETION_RETRIES - 1: + raise _ChatCompletionError(status=0, detail=str(exc), model=model) from exc + time.sleep(_backoff_delay(attempt, None)) + continue + except TimeoutError as exc: + # Python 3.10+: socket.timeout during SSL read surfaces as + # TimeoutError rather than urllib.error.URLError. Treat it as + # a transient network failure and retry with backoff. + last_error = f"Read timeout: {exc}" + if attempt == MAX_CHAT_COMPLETION_RETRIES - 1: + raise _ChatCompletionError(status=0, detail=str(exc), model=model) from exc + time.sleep(_backoff_delay(attempt, None)) + continue + raise _ChatCompletionError(status=0, detail=last_error or "unknown", model=model) + + +class _ChatCompletionError(Exception): + def __init__(self, *, status: int, detail: str, model: str) -> None: + super().__init__(f"chat completion failed with status {status}: {detail[:400]}") + self.status = status + self.detail = detail + self.model = model + + def send_chat_completion( config: ResolvedAgentConfig, messages: list[dict[str, Any]], *, tools: list[dict[str, Any]] | None = None, max_tokens_override: int | None = None, + temperature_override: float | None = None, ) -> dict[str, Any]: - url = f"{config.base_url}{config.chat_completions_path}" - payload = { - "model": config.model, - "messages": messages, - "temperature": config.temperature, - "max_tokens": max_tokens_override or config.max_completion_tokens, - } + payload: dict[str, Any] = {"messages": messages} if tools: payload["tools"] = tools payload["tool_choice"] = "auto" + # Apply extra_body first so computed overrides below win over any + # temperature/max_tokens keys the user may have stashed in extra_body. payload.update(config.extra_body) - req = request.Request( - url, - data=json.dumps(payload).encode("utf-8"), - headers={ - "Authorization": f"Bearer {config.api_key}", - "Content-Type": "application/json", - "User-Agent": "verity-benchmark/0.1", - **config.headers, - }, - method="POST", + payload["temperature"] = ( + config.temperature if temperature_override is None else temperature_override + ) + payload["max_tokens"] = max_tokens_override or config.max_completion_tokens + # Allow configuring a fallback chain via extra_body.fallback_models (list of model ids). + # This lets a rate-limited primary (e.g. "opus") degrade gracefully instead of failing the run. + # Normalize fallback_models: accept a list of strings (standard) or a + # single string (common operator shorthand). A bare string must not be + # iterated character-by-character, which would produce single-letter + # "models" like "g", "p", "t". + raw_fallback = config.extra_body.get("fallback_models") or [] + if isinstance(raw_fallback, str): + raw_fallback = [raw_fallback] + elif not isinstance(raw_fallback, (list, tuple)): + # extra_body is schema-free operator input; a truthy non-iterable + # (bool, int, dict, ...) must not blow up the iteration below. + raw_fallback = [] + # Trim each entry: the guard below already gates on `item.strip()` + # truthiness, but store the stripped form so leading/trailing whitespace + # in a config like `" gpt-4o-mini"` does not survive into the outbound + # request body (providers reject model ids they do not recognize, so an + # otherwise-valid fallback would fail with a 404 model-not-found). + fallback_models = [ + item.strip() + for item in raw_fallback + if isinstance(item, str) and item.strip() + ] + payload.pop("fallback_models", None) + # Benchmark-only knob consumed in execute_interactive_agent_task; strip + # it so providers don't reject the request with an unknown-field error. + payload.pop("length_retry_token_cap", None) + models_to_try: list[str] = [config.model, *fallback_models] + last_exc: _ChatCompletionError | None = None + # Status codes that are fatal for the whole chain — every model would + # get the same error, so no point in continuing to try fallbacks. + # 401 (bad/expired API key) and 403 (forbidden) are auth-level and + # apply account-wide; retrying a different model would just produce + # the same error. Every other non-transient 4xx is model-specific + # (404 model-not-found, 400 model-rejected-payload, 422 bad params + # for a model, 429 model-specific quota is in RETRY_STATUS_CODES + # already) and should fall through to the next fallback model. + _FATAL_AUTH_STATUSES = {401, 403} + for model in models_to_try: + try: + return _post_chat_completion(config, payload, model) + except _ChatCompletionError as exc: + last_exc = exc + # Fall back on the same transient statuses `_post_chat_completion` + # retries internally (plus status 0 for network/read errors), so a + # primary that keeps returning 408/409/425/429/5xx gets routed to + # the configured fallback chain instead of hard-failing. For a + # non-transient, non-auth error (e.g. 404 model-not-found on a + # typo'd fallback entry) keep trying later models — one bad + # fallback should not prevent subsequent configured backups. + if exc.status in _FATAL_AUTH_STATUSES: + break + continue + if last_exc is None: + raise SystemExit("chat completion request failed with no attempts") + raise SystemExit( + f"chat completion request failed with HTTP {last_exc.status} (model={last_exc.model}): {last_exc.detail[:400]}" ) - try: - with request.urlopen(req, timeout=config.request_timeout_seconds) as response: - body = response.read().decode("utf-8") - except error.HTTPError as exc: - detail = exc.read().decode("utf-8", errors="replace") - raise SystemExit(f"chat completion request failed with HTTP {exc.code}: {detail}") from exc - except error.URLError as exc: - raise SystemExit(f"chat completion request failed: {exc}") from exc - try: - return json.loads(body) - except json.JSONDecodeError as exc: - raise SystemExit(f"chat completion request returned non-JSON response: {body[:400]!r}") from exc def list_models(config: ResolvedAgentConfig) -> dict[str, Any]: @@ -1576,6 +1812,81 @@ def execute_strict_agent_task( return response, response_text, evaluation, attempts +# Set of failure_modes produced by write_editable_proof's preflight checks +# (before Lean ever runs). These are deterministic formatting/import/semantic +# rejects whose human-readable `details` classify as `other`, collapsing +# distinct failure modes into the same temperature-history bucket. Surface +# each preflight mode as its own history class so the repeated-class bump +# can fire correctly (and only) when the *same* preflight keeps recurring. +# Authoritative preflight failure-mode set lives in +# harness/interactive_runtime.py::_PREFLIGHT_FAILURE_MODES and is re-exported +# here so `_failure_history_class` can't drift out of sync with the runtime +# that actually produces these modes. An earlier duplicate definition lost +# `empty_response` during a refactor; importing removes that whole class of +# bug entirely. +_PREFLIGHT_FAILURE_MODES = _RUNTIME_PREFLIGHT_FAILURE_MODES + +# Canonical evaluation-contract keys, matching the top-level `evaluation` +# object in schemas/agent-run.schema.json (additionalProperties=false over +# {status, failure_mode, details, command, candidate_workspace}). Whenever +# the runtime returns a dict that will ultimately become a top-level or +# per-attempt `evaluation` record, filter it through these keys first so +# write-time metadata (path, bytes, lines, warnings, write_status, +# repair_hints) and tool-specific extras (e.g. try_tactic_at_hole's +# `tactic`) don't leak through and break JSON schema validation. +_EVAL_KEYS = ("status", "failure_mode", "details", "command", "candidate_workspace") + + +def _failure_history_class(result: dict) -> str: + """Return the failure-class label to append to temperature history. + + Empty string means "do not append" (no failure, or infra noise we filter). + Preflight failure_modes are surfaced with a `pf:` prefix so e.g. + `pf:placeholder_detected` does not collide with Lean-check classes like + `type_error`, while still allowing the repeated-class same-value + comparison to trigger when the same preflight recurs. + """ + if not isinstance(result, dict) or result.get("status") != "failed": + return "" + failure_mode = result.get("failure_mode") or "" + if failure_mode in _PREFLIGHT_FAILURE_MODES: + return f"pf:{failure_mode}" + # Lean-check failure (or any unclassified failure): derive from details. + fc = result.get("failure_class") or classify_failure(str(result.get("details", ""))) + fc = str(fc) + # Environment errors are infra noise that would break the sliding-window + # same-class check (["type_error","environment_error","type_error"] looks + # like a class change). Filter out. + if fc == "environment_error": + return "" + return fc + + +def _append_failure_class( + history: list, + fc_entry: str, + candidate_text: str, + last_key: list, +) -> None: + """Append `fc_entry` to `history` unless it's empty or a same-candidate duplicate. + + Dedupe guards against double-counting when a single turn fires both + `write_editable_proof` (which now runs the Lean check internally) and a + follow-up `run_lean_check` against the same failed candidate — that + would push two identical entries for one actual failure and prematurely + trigger the same-class temperature bump. + """ + if not fc_entry: + return + candidate_hash = hashlib.sha1(candidate_text.encode("utf-8", "replace")).hexdigest()[:16] + key = (candidate_hash, fc_entry) + if last_key and last_key[0] == key: + return + history.append(fc_entry) + last_key[0] = key + + + def execute_interactive_agent_task( config: ResolvedAgentConfig, task: dict[str, Any], @@ -1593,13 +1904,71 @@ def execute_interactive_agent_task( consecutive_length_stops = 0 max_total_turns = config.max_attempts * 2 # hard cap to prevent infinite loops token_budget = config.max_completion_tokens + # Ceiling for the length-retry silent bump. Read from config.extra_body so + # operators can opt into larger bumps for providers that accept them, but + # default to `max_completion_tokens` so models with a hard cap at that value + # don't get HTTP 400 when the bump kicks in. Stripped from the request + # payload in `send_chat_completion` so it never leaks to the provider. + _cap_raw = config.extra_body.get("length_retry_token_cap", config.max_completion_tokens) + try: + length_retry_token_cap = int(_cap_raw) + except (TypeError, ValueError): + # Invalid operator-edited value (e.g. null, "12k", nested object). + # Fall back silently rather than aborting the run. + length_retry_token_cap = config.max_completion_tokens + if length_retry_token_cap < config.max_completion_tokens: + length_retry_token_cap = config.max_completion_tokens + # Temperature schedule: escalate after repeated same-class failures to break out + # of deterministic loops where temperature=0 reproduces byte-identical responses. + current_temperature = config.temperature + failure_class_history: list[str] = [] + # Dedupe key for `failure_class_history` appends: (candidate_hash, class). + # When a model does write_editable_proof then run_lean_check in the same + # turn against the same (failed) candidate, both tool calls produce the + # same class entry for the same candidate. Without dedupe the history + # gets two entries for one actual failure, and the repeated-class + # temperature bump fires a turn too early. + # Scope: reset at the top of each model turn (see loop below) so + # cross-turn repeats on an unchanged candidate still register as genuine + # failures for the repeated-class temperature escalation. + _last_history_key: list = [None] # mutable cell so helper can update + # Track how many failures we have already applied the temperature-bump + # schedule to, so we don't keep escalating temperature on every iteration + # once the trigger condition is first met (it would otherwise run to the + # cap within a few turns regardless of intervening search/write activity). + temperature_schedule_applied_at = 0 turn = 0 while proof_attempts < config.max_attempts and turn < max_total_turns: turn += 1 + # Scope the failure-class dedupe to a single turn. The dedupe exists to + # coalesce same-candidate same-class duplicates emitted within one + # model turn (e.g. `write_editable_proof` + follow-up `run_lean_check` + # on the same candidate); it must not silence genuine cross-turn + # repeats where the candidate stays unchanged but the model tries + # again. Resetting here bounds the dedupe window to the current turn. + _last_history_key[0] = None + # Adjust temperature once per new failure entry when the last two + # proof attempts failed with the same class. + if ( + len(failure_class_history) > temperature_schedule_applied_at + and len(failure_class_history) >= 2 + and failure_class_history[-1] == failure_class_history[-2] + and failure_class_history[-1] not in ("", "environment_error") + ): + # Escalate toward 0.7 to break deterministic loops, but never + # DECREASE below the configured base temperature. A run with + # `config.temperature = 1.0` should stay at 1.0 (or higher) + # rather than dropping to 0.7 on the first stagnation trigger — + # the cap exists only to stop unbounded growth, not to override + # an operator who explicitly asked for a hotter sampler. + escalated = max(current_temperature + 0.2, 0.2) + current_temperature = max(min(0.7, escalated), config.temperature) + temperature_schedule_applied_at = len(failure_class_history) response = send_chat_completion( config, transcript, tools=runtime.tool_specs(), max_tokens_override=token_budget if token_budget != config.max_completion_tokens else None, + temperature_override=current_temperature if current_temperature != config.temperature else None, ) response_text = extract_text(response) tool_calls = extract_tool_calls(response) @@ -1613,9 +1982,12 @@ def execute_interactive_agent_task( finish_reason = choices[0].get("finish_reason", "") if finish_reason == "length" and not tool_calls and not response_text.strip(): consecutive_length_stops += 1 - if consecutive_length_stops == 1: - # First length stop: bump token budget once and retry silently - token_budget = min(int(token_budget * 1.5), 4500) + # Up to 3 silent budget bumps before nudging the model to simplify. + # Cap bump at `config.max_completion_tokens` so we never exceed the + # provider-enforced per-response limit (some models hard-cap at the + # configured value and would return HTTP 400 on anything larger). + if consecutive_length_stops <= 3: + token_budget = min(int(token_budget * 1.5), length_retry_token_cap) continue # Subsequent length stops: inject a nudge to simplify and use tools transcript.append({"role": "assistant", "content": ""}) @@ -1623,16 +1995,19 @@ def execute_interactive_agent_task( "role": "user", "content": ( "Your response was cut off. Do not over-think. " - "Immediately call write_editable_proof with a simple proof attempt, " - "then call run_lean_check. Keep the proof short." + "Immediately call write_editable_proof with a simple proof attempt " + "(it runs the Lean check automatically). Keep the proof short." ), }) - if consecutive_length_stops >= 3: - # Reset budget back to configured value after persistent overruns - token_budget = config.max_completion_tokens + # Reset budget back to configured value after persistent overruns + token_budget = config.max_completion_tokens continue else: + # Recovered from any length streak -- reset both the counter and + # the (possibly-elevated) token budget so we don't leak state into + # subsequent turns. consecutive_length_stops = 0 + token_budget = config.max_completion_tokens attempts.append( { @@ -1651,11 +2026,63 @@ def execute_interactive_agent_task( # Only overwrite the stored proof if the response looks like Lean code, # not natural-language explanation. if final_candidate.strip() and _looks_like_lean(final_candidate): - runtime.write_editable_proof(final_candidate) + # `write_editable_proof` already runs the Lean check + # internally (check=True default) and returns the merged + # write-metadata + run_lean_check result. Reuse that dict + # instead of calling `evaluate_current()` again — the + # previous double-invocation cost a second `lake env lean` + # per no-tool-calls attempt and pushed a spurious entry + # onto `_check_history`, which could trigger premature + # stagnation/temperature escalation. + # NOTE: local name is `write_payload` (not `write_result`) + # because `write_result` is a module-level function at + # line ~1530 (`write_result(task_ref, config, payload)`), + # and shadowing it with a local would silently break any + # future code in this function that tried to call the + # file-writer. The on-trace attempts record still exposes + # this payload under the `"write_result"` key for + # backward-compatible tooling. + write_payload = runtime.write_editable_proof(final_candidate) proof_attempts += 1 - evaluation = runtime.evaluate_current() + # `write_editable_proof` returns the full write payload + # merged with `run_lean_check` output (path, bytes, lines, + # warnings, write_status, repair_hints). These are not part + # of the top-level `evaluation` schema (which is strict: + # additionalProperties=false over {status, failure_mode, + # details, command, candidate_workspace}). Returning the + # raw dict upward — as was done before — made `build_result` + # forward it to `validate_result_payload` and fail schema + # validation with a SystemExit, aborting the entire run + # every time the model produced Lean text without tool + # calls (including successful proofs). Normalize here so + # both the nested `attempts[-1]["evaluation"]` record and + # the outward return have the contract shape, while + # preserving the rich write-time payload under a separate + # per-attempt key for debugging/analytics. + evaluation = { + k: write_payload[k] + for k in _EVAL_KEYS + if k in write_payload + } + evaluation.setdefault("failure_mode", None) + evaluation.setdefault("details", "") attempts[-1]["candidate_file_contents"] = runtime.current_proof_text attempts[-1]["evaluation"] = evaluation + attempts[-1]["write_result"] = write_payload + # Track model-driven failure classes for the temperature + # schedule's sliding window. `_failure_history_class` maps + # preflight modes (placeholder_detected, hidden_*_import, + # theorem_statement_mismatch) to distinct `pf:` labels + # so they don't all collapse into `other`, and filters out + # infra-noise environment errors that would break + # same-class detection. + fc_entry = _failure_history_class(write_payload) + _append_failure_class( + failure_class_history, + fc_entry, + runtime.current_proof_text, + _last_history_key, + ) if evaluation["status"] == "passed": return response, response_text, runtime.current_proof_text, evaluation, attempts, tool_calls_used # Failed candidate without tool calls: feed error back @@ -1669,16 +2096,42 @@ def execute_interactive_agent_task( ) if guidance: repair_msg += f"\nRepair guidance:\n{guidance}\n" - repair_msg += "\nUse write_editable_proof to write a corrected proof, then run_lean_check to verify." + repair_msg += "\nUse write_editable_proof to write a corrected proof (it runs the Lean check automatically; no separate run_lean_check needed)." transcript.append({"role": "assistant", "content": response_text or ""}) transcript.append({"role": "user", "content": repair_msg}) - elif failure_mode in ("placeholder_detected", "theorem_statement_mismatch"): + elif failure_mode in ( + "placeholder_detected", + "theorem_statement_mismatch", + "hidden_proof_import_detected", + "hidden_case_import_detected", + ): + # Preflight rejections (placeholder_detected, + # theorem_statement_mismatch, hidden_*_import_detected) are + # all recoverable by the model: the candidate file made it + # through the write path but was rejected before Lean saw + # it. Surface the rejection and give the model another + # turn to produce a clean candidate, instead of bailing + # out on the first hidden-import mistake. + extra_hint = "" + if failure_mode == "hidden_proof_import_detected": + extra_hint = ( + "\nRemove any `import`, `open`, or `export` of a " + "`Benchmark.Cases.*.Proofs` module — those hold " + "held-out ground truth and are not available to " + "the model." + ) + elif failure_mode == "hidden_case_import_detected": + extra_hint = ( + "\nOnly the public specification / implementation " + "modules for this task may be imported. Drop any " + "other `Benchmark.Cases.*` imports." + ) retry_msg = ( f"Your response did not produce a valid proof candidate (proof attempt {proof_attempts} of {config.max_attempts}, " f"failure: {failure_mode}).\n" - "Use the write_editable_proof tool to submit the complete editable Lean proof file, " - "then use run_lean_check to verify it.\n" - "Do not explain or analyze. Use the tools directly.\n" + "Use the write_editable_proof tool to submit the complete editable Lean proof file " + "(it runs the Lean check automatically; no separate run_lean_check needed).\n" + "Do not explain or analyze. Use the tools directly." + extra_hint + "\n" ) transcript.append({"role": "assistant", "content": response_text}) transcript.append({"role": "user", "content": retry_msg}) @@ -1687,8 +2140,8 @@ def execute_interactive_agent_task( else: # Empty response or no valid candidate: nudge model to use tools nudge_msg = ( - "You must use the write_editable_proof tool to submit your proof, " - "then call run_lean_check to verify it. Do not respond with text only.\n" + "You must use the write_editable_proof tool to submit your proof " + "(it runs the Lean check automatically). Do not respond with text only.\n" ) transcript.append({"role": "assistant", "content": response_text or ""}) transcript.append({"role": "user", "content": nudge_msg}) @@ -1701,7 +2154,6 @@ def execute_interactive_agent_task( "tool_calls": tool_calls, } ) - saw_lean_failure = False turn_had_proof_action = False for tool_call in tool_calls: if tool_calls_used >= config.max_tool_calls: @@ -1738,12 +2190,50 @@ def execute_interactive_agent_task( "result": result, } ) - if tool_name == "run_lean_check" and result.get("failure_mode") == "lean_check_failed": - saw_lean_failure = True - elif tool_name in ("run_lean_check", "try_tactic_at_hole") and result.get("status") == "passed": - # Normalize to evaluation schema (try_tactic_at_hole returns tactic/details without failure_mode) - evaluation = dict(result) + if tool_name in ("run_lean_check", "write_editable_proof") and result.get("status") == "failed": + # Track any write/check failure (Lean-check *and* preflight + # failures like placeholder_detected / + # hidden_case_import_detected). Previously only + # `failure_mode == "lean_check_failed"` was recorded, so a run + # stuck on repeated preflight failures never tripped the + # same-class temperature bump and stayed at deterministic + # temperature until attempt exhaustion. + fc_entry = _failure_history_class(result) + _append_failure_class( + failure_class_history, + fc_entry, + runtime.current_proof_text, + _last_history_key, + ) + # Persist candidate state even for failed proof-tool turns so + # `build_run_analysis` can hash intermediate drafts for the + # candidate_change_count / distinct_candidate_count analytics. + # Without this, only the last (passed or budget-exhausted) + # turn's candidate gets recorded and repeated unsuccessful + # edits look like zero churn. + attempts[-1]["candidate_file_contents"] = runtime.current_proof_text + # Normalize to the evaluation schema (same _EVAL_KEYS filter as + # the passed path below) so the nested per-attempt evaluation + # records have a consistent shape across passed / failed / + # budget-exhausted branches. The raw tool result carries + # write-time metadata (path, bytes, lines, warnings, + # repair_hints) that isn't part of the evaluation contract. + _failed_eval = { + k: result[k] + for k in _EVAL_KEYS + if k in result + } + _failed_eval.setdefault("failure_mode", None) + _failed_eval.setdefault("details", "") + attempts[-1]["evaluation"] = _failed_eval + elif tool_name in ("run_lean_check", "try_tactic_at_hole", "write_editable_proof") and result.get("status") == "passed": + # Normalize to evaluation schema. `try_tactic_at_hole` returns + # extra keys like `tactic` that must be stripped, otherwise the + # final result fails schema validation (additionalProperties: + # false) and the whole task aborts with no result file. + evaluation = {k: result[k] for k in _EVAL_KEYS if k in result} evaluation.setdefault("failure_mode", None) + evaluation.setdefault("details", "") attempts[-1]["candidate_file_contents"] = runtime.current_proof_text attempts[-1]["evaluation"] = evaluation return response, response_text, runtime.current_proof_text, evaluation, attempts, tool_calls_used @@ -1760,7 +2250,7 @@ def execute_interactive_agent_task( "content": ( "Stop searching and write a proof now. The search_public_defs tool only searches " "this task's implementation and specification files, not the Lean standard library. " - "Use write_editable_proof to submit your best proof attempt, then run_lean_check to verify." + "Use write_editable_proof to submit your best proof attempt (it runs the Lean check automatically)." ), } ) @@ -1808,7 +2298,12 @@ def execute_agent_task( return 0, result_path start = time.perf_counter() + # Pre-build implementation/specification modules so `lake env lean` inside + # TaskProofRuntime.evaluate_candidate does not race against on-the-fly + # compilation with fast agent retries. + prebuild_reports: list[dict[str, Any]] = [] if config.mode == "interactive": + prebuild_reports = prebuild_task_modules(task) response, response_text, candidate_text, evaluation, attempts, tool_calls_used = execute_interactive_agent_task( config, task, @@ -1850,6 +2345,8 @@ def execute_agent_task( result["attempts"] = attempts result["tool_calls_used"] = tool_calls_used result["analysis"] = build_run_analysis(attempts=attempts, evaluation=evaluation, tool_calls_used=tool_calls_used) + if prebuild_reports: + result["prebuild_reports"] = prebuild_reports validate_result_payload(result, task_ref) result_path = write_result(task_ref, config, result) return (0 if evaluation["status"] == "passed" else 1), result_path diff --git a/harness/interactive_runtime.py b/harness/interactive_runtime.py index 23420b59..fd3e99d5 100644 --- a/harness/interactive_runtime.py +++ b/harness/interactive_runtime.py @@ -1,5 +1,6 @@ from __future__ import annotations +import copy import json import os import re @@ -12,11 +13,68 @@ PLACEHOLDER_PATTERN = re.compile(r"\b(sorry|admit|axiom)\b") -HOLE_PATTERN = re.compile(r"\?(?:_|\w+)") +# Match standalone `?_` holes only (not `?x` metavariables used in valid tactics). +HOLE_PATTERN = re.compile(r"(? bool: + if name in _MATHLIB_SHAPE_EXACT: + return True + if _MATHLIB_SHAPE_PREFIX_RE.match(name): + return True + # `Nat.*` lemma guesses are overwhelmingly Mathlib-only in this corpus. + if name.startswith("Nat."): + return True + return False + @dataclass(frozen=True) class RuntimePaths: @@ -37,6 +95,44 @@ def __init__(self, task: dict[str, Any]) -> None: self._task = task # store for hint escalation self._best_error_count: int | None = None self._best_first_error_line: int | None = None + # Fingerprints of hint texts already surfaced this session. Used to + # avoid echoing the same repair advice verbatim across consecutive + # failures — repeated identical hints are pure noise and train the + # model to ignore the list instead of acting on it. + self._emitted_hint_keys: set[str] = set() + # Normalised fingerprint of the previous failing Lean details text, + # plus a count of how many times the same fingerprint has repeated + # in a row. Used to detect "no-progress loops" where the model + # resubmits a proof that yields byte-identical errors — corpus + # analysis found 12/29 failing tasks hit this pattern. + self._last_details_fp: str | None = None + self._same_details_streak: int = 0 + # Cache of the most recent run_lean_check evaluation keyed by the + # exact proof text that produced it. A redundant run_lean_check call + # against unchanged content (corpus analysis found 201/201 — 100% — + # of run_lean_check calls were immediately after a write_editable_proof + # that had already run Lean) returns this cached result instantly + # plus a `cached: true` marker telling the model the call was + # redundant, saving a full Lean invocation and a round. + self._last_eval_cache: tuple[str, dict[str, Any]] | None = None + # Count of consecutive failed try_tactic_at_hole calls. Corpus analysis + # of 83 runs: try_tactic_at_hole has a 0/76 (0%) success rate across + # the entire interactive-proxy corpus, but failed runs average 3-7 + # calls per task (14/29 failed runs have a ≥3-streak of failures) + # vs passed runs which max at a 2-streak (and never succeed when + # they do call it — they just move on after 1-2 attempts). Firing + # a pivot warning at the 3rd consecutive failure catches the stuck- + # loop pattern with zero false positives on the passed side. + self._try_tactic_failure_streak: int = 0 + # Cache of prior search_public_defs calls keyed by (query, limit). + # Corpus analysis of 83 runs found failed runs averaged 41.9 + # search_public_defs calls vs 1.5 on passing runs; 94% of those + # calls in failed runs were byte-identical re-queries (e.g. the same + # `"removeOwner_ownerListInvariant"` query 26 times in one run). The + # index is read-only within a session, so a cached hit with a + # `cached: true` + note tells the model the query yielded nothing + # new and it should pivot instead of re-asking. + self._search_cache: dict[tuple[str, int], dict[str, Any]] = {} self.paths = RuntimePaths( editable_rel_path=editable_rel_path, theorem_name=str(task["theorem_name"]), @@ -75,19 +171,103 @@ def read_public_file(self, rel_path: str) -> dict[str, Any]: except FileNotFoundError: return {"status": "missing", "path": rel_path} - def write_editable_proof(self, content: str) -> dict[str, Any]: + def write_editable_proof(self, content: str, *, check: bool = True) -> dict[str, Any]: self.current_proof_text = content if content.endswith("\n") else f"{content}\n" - return { - "status": "ok", + # Invalidate the run_lean_check fast-path cache. The cache is keyed on + # `current_proof_text`, so a repeat write of identical content (common + # during stagnation loops) would otherwise hit a stale cached + # evaluation and return `cached: true` with a note claiming this was + # a redundant `run_lean_check` follow-up — even though the model's + # intent is a fresh write. Drop the cache unconditionally here; the + # downstream `execute_tool("run_lean_check", ...)` call re-populates + # it for genuine no-op follow-ups. + self._last_eval_cache = None + warnings: list[dict[str, str]] = [] + if not self.current_proof_text.strip(): + warnings.append({"kind": "empty_content", "detail": "candidate is empty"}) + if PLACEHOLDER_PATTERN.search(self.current_proof_text): + warnings.append({ + "kind": "placeholder_detected", + "detail": "contains `sorry`/`admit`/`axiom`; Lean rejects these — replace with a real tactic or a `?_` hole.", + }) + if HIDDEN_PROOF_IMPORT_PATTERN.search(self.current_proof_text): + warnings.append({ + "kind": "hidden_proof_import_detected", + "detail": "remove Benchmark.Cases.*.Proofs import/open/export.", + }) + blocked = self._find_blocked_case_imports(self.current_proof_text) + if blocked: + warnings.append({ + "kind": "hidden_case_import_detected", + "detail": "non-public imports: " + ", ".join(blocked), + }) + if HOLE_PATTERN.search(self.current_proof_text): + warnings.append({ + "kind": "unfilled_hole", + "detail": "proof still contains `?_` holes; fill before submitting.", + }) + candidate_signature = self._extract_theorem_signature(self.current_proof_text) + if candidate_signature != self.expected_theorem_signature: + warnings.append({ + "kind": "theorem_statement_mismatch", + "detail": "editable theorem signature changed; revert to the original statement.", + }) + result: dict[str, Any] = { + "status": "ok_with_warnings" if warnings else "ok", "path": self.paths.editable_rel_path, "bytes": len(self.current_proof_text.encode("utf-8")), "lines": len(self.current_proof_text.splitlines()), } + if warnings: + result["warnings"] = warnings + # Fold the Lean check into the write. Each write+check used to cost + # two tool slots and two model round-trips; inlining saves one full + # round-trip (hundreds of ms to seconds of LLM latency per proof + # iteration) and doubles the effective budget for proof exploration. + # The caller can disable by passing check=False (kept for callers + # that only want to stage a draft without paying for Lean). + if check: + # Reuse the full run_lean_check pipeline (auto-heal + annotation + + # repair hints) so downstream success/failure detection is + # identical to a bare run_lean_check call. Write-time metadata + # (path, bytes, lines, warnings) stays visible in the result so + # the model still sees format warnings like non_public_imports + # alongside the Lean verdict. + pre_check_status = result["status"] + result.update(self.execute_tool("run_lean_check", {})) + # `run_lean_check` overwrites the `status` field, which drops the + # pre-check `ok_with_warnings` verdict. Callers that look for + # write-phase warnings (unfilled `?_` holes, non_public_imports, + # theorem_statement_mismatch) need a stable signal, so expose the + # pre-check verdict on `write_status`. The main `status` still + # reflects the Lean check so existing `status == "passed"` and + # `status == "failed"` branches keep working unchanged. + if pre_check_status != "ok": + result["write_status"] = pre_check_status + return result def search_public_defs(self, query: str, *, limit: int = 20) -> dict[str, Any]: query_text = query.strip() if not query_text: return {"status": "rejected", "reason": "query_must_not_be_empty"} + # The set of public impl/spec files does not change within a session, + # so the same (query, limit) will always return the same matches. + # Short-circuit repeat queries with a cached response + explicit note + # so the agent stops looping on an identical search. + cache_key = (query_text.lower(), limit) + cached = self._search_cache.get(cache_key) + if cached is not None: + reused = copy.deepcopy(cached) + reused["cached"] = True + reused["note"] = ( + "You already ran search_public_defs with this exact query " + "earlier in the session; the public impl/spec files are " + "static, so the result is identical. Try a different query " + "(e.g. a substring, a related concept, or a parameter name) " + "or switch to inspect_lean_goals / try_tactic_at_hole — " + "do not resubmit the same query." + ) + return reused lowered = query_text.lower() matches: list[dict[str, Any]] = [] for rel_path in self.paths.implementation_files + self.paths.specification_files: @@ -110,16 +290,52 @@ def search_public_defs(self, query: str, *, limit: int = 20) -> dict[str, Any]: } ) if len(matches) >= limit: - return {"status": "ok", "query": query_text, "matches": matches, "truncated": True} - return {"status": "ok", "query": query_text, "matches": matches, "truncated": False} + result = {"status": "ok", "query": query_text, "matches": matches, "truncated": True} + self._search_cache[cache_key] = copy.deepcopy(result) + return result + if not matches: + # Corpus analysis (83 runs) found 55/75 (73%) of search_public_defs + # calls returned empty — overwhelmingly because agents searched for + # Mathlib / core Lean library names like `Nat.div_mul_le`, + # `add_zero`, `div_pos`, etc. This tool only searches the task's + # public impl/spec files, not the standard library. Surface that + # scope limit explicitly so the agent stops burning rounds on + # library searches. + result = { + "status": "ok", + "query": query_text, + "matches": matches, + "truncated": False, + "hint": ( + "No match in the task's public impl/spec files. " + "`search_public_defs` only indexes definitions inside " + "implementation_files and specification_files for this " + "task — it does NOT search Lean core, Batteries, or " + "Mathlib (Mathlib is not a dependency of this project). " + "For standard-library lemmas use `exact?` / `apply?` / " + "`rw?` via `try_tactic_at_hole`, or rely on `simp` / " + "`omega` / `decide` which already know common arithmetic " + "and boolean facts. Retry this tool only with names you " + "expect to be defined in the current task's spec/impl." + ), + } + self._search_cache[cache_key] = copy.deepcopy(result) + return result + result = {"status": "ok", "query": query_text, "matches": matches, "truncated": False} + self._search_cache[cache_key] = copy.deepcopy(result) + return result def inspect_goals(self) -> dict[str, Any]: - holes = sorted(set(HOLE_PATTERN.findall(self.current_proof_text))) + # Detect `?_` AND named holes (`?h`, `?foo`). Named-hole detection was + # lost when HOLE_PATTERN was tightened for substitution safety; this + # tool is read-only so the broader pattern is safe and restores the + # recovery path for proofs that use named holes. + holes = sorted(set(ANY_HOLE_PATTERN.findall(self.current_proof_text))) if not holes: return { "status": "unsupported", "reason": "goal_inspection_requires_explicit_hole", - "details": "Write the proof with a `?_` or named hole first, then retry goal inspection.", + "details": "Write the proof with a `?_` or named hole (e.g. `?h`) first, then retry goal inspection.", } evaluation = self.evaluate_current(check_goals=True) return { @@ -138,8 +354,17 @@ def try_tactic_at_hole(self, tactic: str) -> dict[str, Any]: if not tactic.strip(): return {"status": "rejected", "reason": "tactic_must_not_be_empty"} original = self.current_proof_text - # Replace standalone ?_ holes (not named holes like ?_foo) - modified = re.sub(r"\?_(?!\w)", tactic.strip(), original) + # Substitute each `?_` with a context-adapted form of `tactic`. Corpus + # analysis of 72 failed try_tactic_at_hole calls found 47 (65%) passed + # a raw tactic (e.g. `omega`, `rfl`, `simp_all [...]`) into a proof + # where the hole sat at a TERM position like `exact ?_` — making the + # substituted proof read `exact omega`, which Lean rejects because + # `omega` is a tactic, not a term. Automatically wrap the substituted + # tactic with `(by ...)` at term-position holes, and strip an existing + # `by ` wrapper at tactic-position holes, so the model's intent + # survives context mismatches. Holes at other positions get the raw + # tactic. + modified = _substitute_holes(original, tactic.strip()) if modified == original: return { "status": "unsupported", @@ -148,18 +373,70 @@ def try_tactic_at_hole(self, tactic: str) -> dict[str, Any]: } evaluation = self.evaluate_candidate(modified) if evaluation.get("status") == "passed": + self._try_tactic_failure_streak = 0 self.current_proof_text = modified return { "status": "passed", "tactic": tactic.strip(), "details": "Tactic succeeded. Proof updated.", } - return { + self._try_tactic_failure_streak += 1 + # Produce the same class-based repair_hints as run_lean_check / + # write_editable_proof do on failure. Corpus analysis of 83 interactive + # runs found 76/76 (100%) of failed try_tactic_at_hole results returned + # no hints, even though the failure_class distribution (45 unknown_ + # identifier, 18 unsolved_goals, 7 type_mismatch, …) maps onto hints + # already produced by `_build_check_hints` when the same error comes + # from the other two tools. Reusing that helper keeps the advice + # consistent across the tool surface and gives the model a concrete + # next tactic to try instead of a bare error payload. + # `details` is already stripped of `linter.unusedSimpArgs` noise and + # capped at `_LEAN_OUTPUT_CAP_CHARS` (16 KB) by `evaluate_candidate`. + # Earlier code re-truncated to 2000 chars — a legacy band-aid from + # before the upstream cleanup pipeline existed. Corpus analysis of + # the 78 try_tactic_at_hole failures in the current corpus found + # 41/78 (53%) hit that 2000-char cap, chopping off already-cleaned + # diagnostic content (goal state, context, line numbers) that + # run_lean_check would have returned in full on the same failure. + # Drop the extra truncation so all three tools surface the same + # error fidelity; the 16 KB pipeline cap remains the backstop. + details = str(evaluation.get("details", "")) + failure_class = classify_failure(details) + result = { "status": "failed", "tactic": tactic.strip(), - "details": evaluation.get("details", "")[:2000], - "failure_class": classify_failure(str(evaluation.get("details", ""))), + "details": details, + "failure_class": failure_class, } + hints = _build_check_hints(failure_class, details) + # After 3 consecutive failed try_tactic_at_hole calls, inject a + # "pivot" hint. Corpus analysis: passed runs never exceed a 2-streak; + # failed runs hit ≥3 in 14/29 (48%) tasks, with some stacking 5-7 + # attempts of increasingly speculative tactics. The tool has a + # 0/76 (0%) corpus-wide success rate, so further attempts on the + # same hole are almost certainly wasted budget — the pivot hint + # tells the model to switch to write_editable_proof with explicit + # multi-step tactics and inspect_lean_goals between steps. + if self._try_tactic_failure_streak >= 3: + hints = list(hints) if hints else [] + hints.insert( + 0, + f"You have now run {self._try_tactic_failure_streak} consecutive " + "`try_tactic_at_hole` calls with no success. This tool only " + "closes a goal when a SINGLE tactic discharges it entirely; " + "for goals that need BEq↔Prop bridging, case analysis on " + "residual `if`/`match` arms, monadic-trace unfolding, or " + "multi-step arithmetic rewriting, no single tactic will " + "close them no matter how many more you try. PIVOT: write a " + "full multi-line proof body with `write_editable_proof` " + "(leaving `?_` ONLY at positions where you then " + "`inspect_lean_goals` to see the reduced state), and make " + "progress one step at a time. Do NOT continue cycling " + "single-tactic guesses here." + ) + if hints: + result["repair_hints"] = hints + return result def evaluate_current(self, *, check_goals: bool = False) -> dict[str, Any]: return self.evaluate_candidate(self.current_proof_text, check_goals=check_goals) @@ -231,6 +508,16 @@ def evaluate_candidate(self, candidate_text: str, *, check_goals: bool = False) ) command = ["lake", "env", "lean", "--root=.", str(check_path.relative_to(workspace))] code, output = lean_run_command(command, cwd=workspace) + # Strip the "This simp argument is unused" lint blocks from Lean + # output before returning. Corpus analysis of 37 failed-check + # detail blobs found 844/846 warnings (~99%) were this single + # linter, accounting for ~20 KB of the average 34 KB details + # blob. The noise drowns the real errors and trains the model + # to ignore the details block. Filtering preserves every real + # error and every other warning kind — only the known-useless + # linter goes away. + output = _strip_noise_warnings(output) + output = _cap_lean_output(output) if code != 0: return { "status": "failed", @@ -271,7 +558,7 @@ def tool_specs(self) -> list[dict[str, Any]]: "type": "function", "function": { "name": "write_editable_proof", - "description": "Replace the entire editable proof file with complete Lean code.", + "description": "Replace the entire editable proof file with complete Lean code and automatically run the Lean check. The response reports status (passed/failed/ok/ok_with_warnings) and, on failure, failure_mode, details, and failure_class. A separate run_lean_check call is not needed after this.", "parameters": { "type": "object", "additionalProperties": False, @@ -288,7 +575,7 @@ def tool_specs(self) -> list[dict[str, Any]]: "type": "function", "function": { "name": "run_lean_check", - "description": "Run the official harness Lean check for the current editable proof.", + "description": "Re-run the Lean check on the current editable proof without modifying it. Redundant immediately after `write_editable_proof`, which already runs the check — if the proof text is unchanged since the last evaluation, this call returns a cached result tagged `cached: true` rather than re-invoking Lean.", "parameters": { "type": "object", "additionalProperties": False, @@ -312,7 +599,7 @@ def tool_specs(self) -> list[dict[str, Any]]: "type": "function", "function": { "name": "search_public_defs", - "description": "Search public implementation/specification files for matching def/theorem/lemma names.", + "description": "Search the task's public implementation/specification files for matching def/theorem/lemma names. Scope is ONLY those task files — it does NOT search Lean core, Batteries, or Mathlib (Mathlib is not a dependency of this project). For standard-library lemmas, prefer `exact?` / `apply?` / `rw?` via `try_tactic_at_hole`, or tactics like `simp` / `omega` / `decide` that already know common arithmetic and boolean facts.", "parameters": { "type": "object", "additionalProperties": False, @@ -328,7 +615,7 @@ def tool_specs(self) -> list[dict[str, Any]]: "type": "function", "function": { "name": "try_tactic_at_hole", - "description": "Try replacing all `?_` holes in the current proof with a specific tactic and check if it compiles. Preserves the original proof if it fails. Useful for testing tactics like `simp_all [...]`, `omega`, `decide`, or `duper [...]`.", + "description": "Try replacing all `?_` holes in the current proof with a specific tactic and check if it compiles. Pass a raw tactic (e.g. `omega`, `simp_all [foo]`, `decide`, `exact h`); substitution auto-wraps as `(by tac)` when the hole is at a term position like `exact ?_`. Preserves the original proof if it fails.", "parameters": { "type": "object", "additionalProperties": False, @@ -350,19 +637,67 @@ def execute_tool(self, name: str, arguments: dict[str, Any]) -> dict[str, Any]: if name == "write_editable_proof": return self.write_editable_proof(str(arguments.get("content", ""))) if name == "run_lean_check": + # Short-circuit if the proof text is unchanged since the last + # evaluation. Corpus analysis of 83 interactive runs found that + # 201/201 (100%) of run_lean_check calls were made immediately + # after a write_editable_proof that had already run Lean on the + # same content. Returning the cached evaluation saves a full + # Lean invocation (seconds) and teaches the model the call was + # redundant via the `cached: true` marker + note. + if self._last_eval_cache is not None: + cached_text, cached_result = self._last_eval_cache + # Never serve an `environment_error` from cache. The write- + # side guard below already refuses to cache env errors, but + # treat the read side defensively too: if an env error ever + # ends up in the cache (e.g. via a future refactor), we + # must still re-run `evaluate_current` so `_attempt_lake_build` + # can retry the heal path instead of pinning the task to + # a stale infra failure that may have recovered. + cached_is_env_error = ( + isinstance(cached_result, dict) + and ( + cached_result.get("failure_class") == "environment_error" + or cached_result.get("environment_error") is True + ) + ) + if cached_text == self.current_proof_text and not cached_is_env_error: + reused = copy.deepcopy(cached_result) + reused["cached"] = True + reused["note"] = ( + "Proof text is unchanged since the last evaluation; " + "returning cached result without re-running Lean. " + "`write_editable_proof` already runs the Lean check — " + "a follow-up `run_lean_check` on unchanged content is " + "redundant." + ) + return reused result = self.evaluate_current() + # Auto-heal environment errors (missing .olean) once before annotating. + if result.get("status") == "failed" and result.get("failure_mode") == "lean_check_failed": + details = str(result.get("details", "")) + if classify_failure(details) == "environment_error": + module_name = _missing_olean_module(details) + healed = _attempt_lake_build(module_name) + if healed: + result = self.evaluate_current() if result.get("status") == "failed": result = self._annotate_check_result(result) - # Also add structured repair hints from main's guidance - if result.get("failure_mode") == "lean_check_failed": - guidance = _build_repair_guidance(str(result.get("details", ""))) - if guidance: - existing = result.get("repair_hints", []) - if isinstance(existing, list): - existing.append(guidance) - result["repair_hints"] = existing - else: - result["repair_hints"] = [existing, guidance] if existing else [guidance] + # Cache the fresh evaluation against the current proof text so a + # follow-up run_lean_check on unchanged content hits the fast path. + # Exception: do NOT cache `environment_error` results. Those are + # transient infrastructure failures (missing .olean, lake build + # contention) that the heal path above tries to recover from via + # `_attempt_lake_build`. Caching them would short-circuit every + # subsequent `run_lean_check` on unchanged proof text back to the + # stale env error, preventing the heal path from being re-entered + # if infra recovers. Re-evaluate every time for env errors so the + # heal path keeps getting a chance. + is_env_error = ( + result.get("failure_class") == "environment_error" + or result.get("environment_error") is True + ) + if not is_env_error: + self._last_eval_cache = (self.current_proof_text, copy.deepcopy(result)) return result if name == "inspect_lean_goals": return self.inspect_goals() @@ -380,11 +715,27 @@ def _annotate_check_result(self, result: dict[str, Any]) -> dict[str, Any]: # not preflight failures (empty_response, placeholder_detected, etc.) is_lean_failure = failure_mode == "lean_check_failed" details = str(result.get("details", "")) - failure_class = classify_failure(details) + # Preflight failures carry English-language details that classify_failure + # can't pattern-match, so they all collapse to "other" and the model gets + # no targeted hint. Map the failure_mode directly to a class name so the + # model sees e.g. "placeholder_detected" instead of "other" and + # _build_check_hints can dispatch a specific hint. + if not is_lean_failure and failure_mode in _PREFLIGHT_FAILURE_MODES: + failure_class = failure_mode + else: + failure_class = classify_failure(details) hints = _build_check_hints(failure_class, details) annotated = dict(result) annotated["failure_class"] = failure_class + # environment_error is infrastructure, not a proof problem. Don't track + # stagnation for it (retrying won't help) and tag the result clearly. + if failure_class == "environment_error": + annotated["environment_error"] = True + if hints: + annotated["repair_hints"] = hints + return annotated + if not is_lean_failure: if hints: annotated["repair_hints"] = hints @@ -402,6 +753,17 @@ def _annotate_check_result(self, result: dict[str, Any]) -> dict[str, Any]: else: break + # Detect true no-progress loops: the normalized error text matches the + # previous failure byte-for-byte. This is a much stronger signal than + # same-class stagnation — it proves the last edit had zero effect on + # what Lean actually saw. + details_fp = _normalize_details_fp(details) + if details_fp and details_fp == self._last_details_fp: + self._same_details_streak += 1 + else: + self._same_details_streak = 1 + self._last_details_fp = details_fp + # Escalate on either: 2+ consecutive same-class failures, or 4+ total failures if same_class_count >= 2 or total_failures >= 4: if same_class_count >= 2: @@ -414,10 +776,153 @@ def _annotate_check_result(self, result: dict[str, Any]) -> dict[str, Any]: f"You have failed {total_failures} times across different error classes. " "Step back and reconsider your proof strategy from scratch." ) - escalation = self._build_escalation_hint(failure_class) + escalation = self._build_escalation_hint(failure_class, details) if escalation: hints.append(escalation) + # When the error text is byte-identical to the previous attempt, the + # model's latest edit had zero effect — hints must call this out + # explicitly, not just repeat class-level advice. Keep this BEFORE + # the dedup so the fingerprint-unique streak count is surfaced fresh + # each time. + if self._same_details_streak >= 2: + hints.insert(0, ( + f"NO-PROGRESS LOOP DETECTED: your last {self._same_details_streak} " + "submissions produced byte-identical Lean errors. The changes you are " + "making do not reach the failing goal. Stop editing around the symptom. " + "Instead: (1) `write_editable_proof` with the failing tactic replaced by " + "`?_`, (2) `inspect_lean_goals` to read the real goal at that hole, " + "(3) `try_tactic_at_hole` with tactics you have NOT tried yet " + "(e.g. `simp_all`, `aesop`, `decide`, `exact?`, `constructor; all_goals ...`)." + )) + + # Dedupe hints we've already shown this session. Repeated-verbatim hints + # are noise: corpus analysis of failing tasks showed the same 4-5 hints + # echoed across 5+ stagnation events, training the model to skip the + # repair_hints list entirely. Only surface *new* advice each time. + hints = self._filter_seen_hints(hints) + + # Highest-leverage directive: corpus analysis of 83 runs shows 12/29 + # failed tasks (41%) ended with `?_` still in the submitted proof, and + # in every one of those runs the agent re-submitted a `?_`-containing + # proof 2–9 times after the first rejection. The hint BELOW already + # existed but was inserted BEFORE `_filter_seen_hints`, so dedup + # suppressed it on the 2nd–Nth resubmission and the agent got no + # feedback tying its specific, detectable mistake (still-unfilled hole) + # to the specific failure class. Insert AFTER the dedup filter so this + # safety-critical, state-conditional warning fires on EVERY submission + # that still contains `?_`. The hint is keyed to the literal proof + # text state, not to the abstract hint corpus, so it is not a "noise" + # dedup candidate — it tells the agent something about its concrete + # current submission. + if HOLE_PATTERN.search(self.current_proof_text): + hole_count = len(HOLE_PATTERN.findall(self.current_proof_text)) + hints.insert(0, ( + f"UNFILLED HOLE IN SUBMITTED PROOF: your proof still contains " + f"{hole_count} `?_` hole(s). `?_` is a PROBE for `inspect_lean_goals` " + "and `try_tactic_at_hole`, never a final proof — Lean will reject " + "every submission containing `?_`. Do not submit `?_` again. Next " + "move: call `try_tactic_at_hole` with one concrete tactic at a " + "time (`omega`, `simp_all`, `decide`, `rfl`, `assumption`, " + "`trivial`, `exact h`, `linarith`, `aesop`, `exact?`). If any " + "succeeds, the proof updates in place and the task closes. If " + "none do, use `inspect_lean_goals` to read each hole's goal, then " + "`write_editable_proof` with concrete tactics substituted for " + "every `?_`." + )) + + # Second safety-critical, state-conditional warning that must survive + # `_filter_seen_hints`: tactic-in-term-position. + # Corpus analysis of 29 failed runs: 19 tasks (66%) emit at least one + # `unknown identifier ''` diagnostic — 173 occurrences for + # 'simp', 100 for 'simpa', 52 for 'omega', 43 for 'native_decide', + # 24 for 'simp_all'. One task alone (safe/swap_owner_is_owner_correctness) + # emits 52 repeats of `unknown identifier 'simp'` in a single run. + # The existing tactic-in-term hint inside `_build_check_hints` + # (line ~1466) is suppressed by the dedup filter after its first + # emission, so the agent never gets feedback tying the specific + # mistake to each subsequent rejection. This is identical to the + # hole-warning failure mode: a state-conditional critical warning + # that must repeat as long as the state persists. Re-detect the + # tactic-in-term case against the current `details` and insert a + # persistent warning post-dedup. The hint is keyed to the concrete + # error-text state (which tactic is being misused), not the generic + # hint corpus, so it is not a "noise" dedup candidate. + _unknown_names = _UNKNOWN_IDENT_RE.findall(details) + _tactic_in_term = [n for n in _unknown_names if n in _LEAN_TACTIC_NAMES] + if _tactic_in_term: + _tactic_name = _tactic_in_term[0] + hints.insert(0, ( + f"TACTIC IN TERM POSITION: Lean reports `unknown identifier " + f"'{_tactic_name}'` because `{_tactic_name}` is a TACTIC, not " + f"a term. It appears in your proof after `exact` / `refine` / " + f"`apply` / `:=` or inside `⟨ ⟩` — all term positions. Fix: " + f"wrap the tactic in `by`, e.g. `exact by {_tactic_name} ...`, " + f"`refine ⟨by {_tactic_name}, ...⟩`, or drop the `exact` / " + f"`refine` prefix so `{_tactic_name}` runs as a tactic " + f"directly (`by {_tactic_name} ...` at the top of the proof " + f"body). Do NOT call search_public_defs for `{_tactic_name}` " + f"— it is not a definition, it is a tactic, and the only fix " + f"is the `by` wrapper." + )) + + # Third safety-critical, state-conditional warning: local-variable + # out-of-scope names. Corpus analysis of 29 failed runs: 6 tasks + # (21%) emit `unknown identifier ''` for names that + # are clearly binder-shaped (no dots, lowercase first char, no + # underscores) — up to 110 occurrences in a single run + # (safe/swap_owner_is_owner_correctness: 91×prevOwner, 19×oldOwner). + # The existing local-variable hint in `_build_check_hints` + # (~line 1475) is actionable ("call inspect_lean_goals / re-check + # the signature") but is suppressed by dedup after first emission. + # Same failure mode as tactic-in-term and unfilled-hole: state + # persists across re-submissions, warning must repeat. The hint + # is keyed to the specific out-of-scope name from the error text, + # not the generic corpus, so it is not a "noise" dedup candidate. + # Only fire when no tactic-hit is present so we never spam both + # warnings for the same line range — Lean reports tactic names + # the same way as local vars, and if a tactic mistake is present + # that's almost always the upstream cause. + if not _tactic_in_term: + _var_hits = [ + n for n in _unknown_names + if n not in _LEAN_TACTIC_NAMES + and "." not in n + and n + and n[0].islower() + and "_" not in n + ] + if _var_hits: + _var_name = _var_hits[0] + hints.insert(0, ( + f"LOCAL VARIABLE OUT OF SCOPE: Lean reports `unknown " + f"identifier '{_var_name}'` for a name that looks like " + f"a local binder, not a definition. `{_var_name}` is " + f"not in scope at the point it is used — common causes: " + f"(a) it was introduced inside a different `by_cases` / " + f"`rcases` / `·` branch and is not visible in the " + f"current branch; (b) the theorem signature uses a " + f"different parameter name (check the editable file " + f"header via `read_public_file`); (c) it was shadowed " + f"by a later `intro` / `rintro` / `obtain`. Fix: call " + f"`inspect_lean_goals` on a `?_` hole at this exact " + f"location to see the binders ACTUALLY in scope, then " + f"reference those names. Do NOT call search_public_defs " + f"for `{_var_name}` — it is a binder, not a definition, " + f"and search_public_defs cannot find binders." + )) + if not hints and same_class_count >= 3: + # All the standing advice has already been seen and isn't working. + # Issue a one-shot pivot directive rather than sending an empty list, + # which the model interprets as "nothing new, carry on". + hints = [ + f"All prior repair hints for '{failure_class}' have now been repeated " + f"{same_class_count} times without progress. Stop retrying variations of " + f"the same proof. Next move: write a minimal skeleton with a `?_` hole at " + f"the first failing step, call `inspect_lean_goals` to read the actual " + f"goal state, then use `try_tactic_at_hole` to probe tactics one at a time." + ] + if hints: annotated["repair_hints"] = hints @@ -455,7 +960,23 @@ def _annotate_check_result(self, result: dict[str, Any]) -> dict[str, Any]: return annotated - def _build_escalation_hint(self, failure_class: str) -> str | None: + def _filter_seen_hints(self, hints: list[str]) -> list[str]: + """Drop hints whose fingerprint has already been surfaced this session. + + Fingerprint = lowercased first 80 non-whitespace chars. Short enough + that wording tweaks still dedupe, long enough to distinguish genuinely + different hints. + """ + fresh: list[str] = [] + for hint in hints: + key = "".join(hint.lower().split())[:80] + if key in self._emitted_hint_keys: + continue + self._emitted_hint_keys.add(key) + fresh.append(hint) + return fresh + + def _build_escalation_hint(self, failure_class: str, details: str = "") -> str | None: """Build an escalation hint when the model is stagnating on a failure class.""" terms = extract_contract_simp_terms(self._task) if terms: @@ -465,6 +986,32 @@ def _build_escalation_hint(self, failure_class: str) -> str | None: full_set = "" if failure_class in ("simp_no_progress", "unsolved_goals", "rfl_failed", "unfold_failed"): + # If the stuck goal carries a `case