diff --git a/Cslib.lean b/Cslib.lean index a9d5ffc3..fd502a8d 100644 --- a/Cslib.lean +++ b/Cslib.lean @@ -1,7 +1,19 @@ module -- shake: keep-all +public import Cslib.Algorithms.DynamicProgramming.EditDistance +public import Cslib.Algorithms.DynamicProgramming.LCS +public import Cslib.Algorithms.Graph.BFS +public import Cslib.Algorithms.Graph.DFS +public import Cslib.Algorithms.Graph.Dijkstra public import Cslib.Algorithms.Lean.MergeSort.MergeSort public import Cslib.Algorithms.Lean.TimeM +public import Cslib.Algorithms.Search.BinarySearch +public import Cslib.Algorithms.Sorting.BubbleSort +public import Cslib.Algorithms.Sorting.CountingSort +public import Cslib.Algorithms.Sorting.HeapSort +public import Cslib.Algorithms.Sorting.InsertionSort +public import Cslib.Algorithms.Sorting.QuickSort +public import Cslib.Algorithms.Sorting.SelectionSort public import Cslib.Computability.Automata.Acceptors.Acceptor public import Cslib.Computability.Automata.Acceptors.OmegaAcceptor public import Cslib.Computability.Automata.DA.Basic diff --git a/Cslib/Algorithms/DynamicProgramming/EditDistance.lean b/Cslib/Algorithms/DynamicProgramming/EditDistance.lean new file mode 100644 index 00000000..b48caf37 --- /dev/null +++ b/Cslib/Algorithms/DynamicProgramming/EditDistance.lean @@ -0,0 +1,132 @@ +/- +Copyright (c) 2025 Brando Miranda. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Brando Miranda +-/ + +module + +public import Cslib.Init + +@[expose] public section + +/-! +# Edit Distance + +This file implements the edit distance (Levenshtein distance) algorithm on lists of natural +numbers and proves key correctness properties. + +## Main definitions + +- `editDist`: Fuel-based edit distance computation. +- `editDistance`: Top-level edit distance. + +## Main results + +- `editDistance_self`: Edit distance from a list to itself is 0. +- `editDistance_nil_left`: Edit distance from empty list equals target length. +- `editDistance_nil_right`: Edit distance to empty list equals source length. +- `editDistance_le_sum`: Edit distance is bounded by the sum of input lengths. + +## References + +Adapted from the VeriBench benchmark (https://github.com/brandomiranda/veribench). +-/ + +set_option autoImplicit false + +namespace Cslib.Algorithms.DynamicProgramming.EditDistance + +/-- Fuel-based edit distance (Levenshtein distance). -/ +def editDist : List Nat → List Nat → Nat → Nat + | [], ys, _ => ys.length + | xs, [], _ => xs.length + | _, _, 0 => 0 + | x :: xs, y :: ys, fuel + 1 => + if x = y then editDist xs ys fuel + else 1 + Nat.min (Nat.min + (editDist xs (y :: ys) fuel) + (editDist (x :: xs) ys fuel)) + (editDist xs ys fuel) + +/-- Top-level edit distance. Uses fuel = sum of input lengths. -/ +def editDistance (s1 s2 : List Nat) : Nat := + editDist s1 s2 (s1.length + s2.length) + +/-! ## Tests -/ + +example : editDistance [1, 2, 3] [2, 2, 3] = 1 := by decide +example : editDistance [] ([] : List Nat) = 0 := by decide +example : editDistance [1, 2, 3] [1, 2, 3] = 0 := by decide +example : editDistance [1, 2, 3] [] = 3 := by decide +example : editDistance [] [1, 2, 3] = 3 := by decide +example : editDistance [1, 2, 3] [1, 2, 3, 4] = 1 := by decide +example : editDistance [1, 2, 3] [4, 5, 6] = 3 := by decide + +/-! ## Reflexivity -/ + +/-- Edit distance from a list to itself is 0. -/ +theorem editDist_self (l : List Nat) (fuel : Nat) (hfuel : fuel ≥ l.length + l.length) : + editDist l l fuel = 0 := by + induction fuel generalizing l with + | zero => + match l with + | [] => rfl + | _ :: _ => exfalso; simp only [List.length_cons] at hfuel; omega + | succ n ih => + match l with + | [] => rfl + | x :: xs => + simp only [editDist, ite_true] + exact ih xs (by simp only [List.length_cons] at hfuel; omega) + +/-- Top-level: edit distance from a list to itself is 0. -/ +theorem editDistance_self (l : List Nat) : editDistance l l = 0 := + editDist_self l (l.length + l.length) (Nat.le_refl _) + +/-! ## Empty list properties -/ + +/-- Edit distance from empty list equals target length. -/ +theorem editDistance_nil_left (l : List Nat) : editDistance [] l = l.length := by + simp [editDistance, editDist] + +/-- Edit distance to empty list equals source length. -/ +theorem editDistance_nil_right (l : List Nat) : editDistance l [] = l.length := by + cases l <;> simp [editDistance, editDist] + +/-! ## Upper bound -/ + +/-- Edit distance is bounded by the sum of input lengths. -/ +theorem editDist_le_sum (l1 l2 : List Nat) (fuel : Nat) + (hfuel : fuel ≥ l1.length + l2.length) : + editDist l1 l2 fuel ≤ l1.length + l2.length := by + induction fuel generalizing l1 l2 with + | zero => + match l1, l2 with + | [], [] => simp [editDist] + | [], _ :: _ => simp only [editDist]; omega + | _ :: _, _ => exfalso; simp only [List.length_cons] at hfuel; omega + | succ n ih => + match l1, l2 with + | [], _ => simp only [editDist]; omega + | _ :: _, [] => simp only [editDist, List.length_cons]; omega + | x :: xs, y :: ys => + simp only [editDist] + split + · -- x = y + have := ih xs ys (by simp only [List.length_cons] at hfuel ⊢; omega) + simp only [List.length_cons]; omega + · -- x ≠ y: 1 + min3(del, ins, sub) ≤ l1.length + l2.length + have h1 := ih xs (y :: ys) (by simp only [List.length_cons] at hfuel ⊢; omega) + simp only [List.length_cons] at h1 ⊢ + have hmin : ((editDist xs (y :: ys) n).min (editDist (x :: xs) ys n)).min + (editDist xs ys n) ≤ editDist xs (y :: ys) n := + Nat.le_trans (Nat.min_le_left _ _) (Nat.min_le_left _ _) + omega + +/-- Top-level: edit distance is bounded by the sum of input lengths. -/ +theorem editDistance_le_sum (l1 l2 : List Nat) : + editDistance l1 l2 ≤ l1.length + l2.length := + editDist_le_sum l1 l2 (l1.length + l2.length) (Nat.le_refl _) + +end Cslib.Algorithms.DynamicProgramming.EditDistance diff --git a/Cslib/Algorithms/DynamicProgramming/LCS.lean b/Cslib/Algorithms/DynamicProgramming/LCS.lean new file mode 100644 index 00000000..bc40dad4 --- /dev/null +++ b/Cslib/Algorithms/DynamicProgramming/LCS.lean @@ -0,0 +1,153 @@ +/- +Copyright (c) 2025 Brando Miranda. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Brando Miranda +-/ + +module + +public import Cslib.Init + +@[expose] public section + +/-! +# Longest Common Subsequence + +This file implements the longest common subsequence (LCS) algorithm on lists of natural numbers +and proves key correctness properties. + +## Main definitions + +- `lcs`: Fuel-based LCS computation. +- `longestCommonSubsequence`: Top-level LCS. + +## Main results + +- `lcs_sublist_left`: The LCS is a sublist of the first input. +- `lcs_sublist_right`: The LCS is a sublist of the second input. +- `lcs_length_le_left`: The LCS length is bounded by the first input length. +- `lcs_length_le_right`: The LCS length is bounded by the second input length. +- `lcs_self`: The LCS of a list with itself is the list. + +## References + +Adapted from the VeriBench benchmark (https://github.com/brandomiranda/veribench). +-/ + +set_option autoImplicit false + +namespace Cslib.Algorithms.DynamicProgramming.LCS + +/-- Fuel-based longest common subsequence. -/ +def lcs : List Nat → List Nat → Nat → List Nat + | _, _, 0 => [] + | [], _, _ => [] + | _, [], _ => [] + | x :: xs, y :: ys, fuel + 1 => + if x = y then x :: lcs xs ys fuel + else + let r1 := lcs (x :: xs) ys fuel + let r2 := lcs xs (y :: ys) fuel + if r1.length ≥ r2.length then r1 else r2 + +/-- Top-level LCS. Uses fuel = sum of input lengths. -/ +def longestCommonSubsequence (l1 l2 : List Nat) : List Nat := + lcs l1 l2 (l1.length + l2.length) + +/-! ## Tests -/ + +example : longestCommonSubsequence [1, 2, 3, 4] [1, 3, 5] = [1, 3] := by decide +example : longestCommonSubsequence [] [1, 2, 3] = ([] : List Nat) := by decide +example : longestCommonSubsequence [1, 2, 3] [] = ([] : List Nat) := by decide +example : longestCommonSubsequence [1, 2, 3] [1, 2, 3] = [1, 2, 3] := by decide +example : longestCommonSubsequence [1, 2, 3, 4, 5] [2, 4, 6] = [2, 4] := by decide +example : longestCommonSubsequence [3, 5, 7, 9] [1, 3, 6, 7, 8] = [3, 7] := by decide + +/-! ## Sublist proofs -/ + +/-- `lcs l1 l2 fuel` is a sublist of `l1` when fuel ≥ l1.length + l2.length. -/ +theorem lcs_sublist_left (l1 l2 : List Nat) (fuel : Nat) + (hfuel : fuel ≥ l1.length + l2.length) : + (lcs l1 l2 fuel).Sublist l1 := by + induction fuel generalizing l1 l2 with + | zero => + match l1 with + | [] => exact List.Sublist.slnil + | _ :: _ => exfalso; simp only [List.length_cons] at hfuel; omega + | succ n ih => + match l1, l2 with + | [], _ => exact List.Sublist.slnil + | _ :: _, [] => exact List.nil_sublist _ + | x :: xs, y :: ys => + simp only [lcs] + split + · exact (ih xs ys (by simp only [List.length_cons] at hfuel ⊢; omega)).cons₂ _ + · split + · exact ih (x :: xs) ys (by simp only [List.length_cons] at hfuel ⊢; omega) + · exact (ih xs (y :: ys) + (by simp only [List.length_cons] at hfuel ⊢; omega)).cons _ + +/-- `lcs l1 l2 fuel` is a sublist of `l2` when fuel ≥ l1.length + l2.length. -/ +theorem lcs_sublist_right (l1 l2 : List Nat) (fuel : Nat) + (hfuel : fuel ≥ l1.length + l2.length) : + (lcs l1 l2 fuel).Sublist l2 := by + induction fuel generalizing l1 l2 with + | zero => + match l2 with + | [] => exact List.Sublist.slnil + | _ :: _ => + match l1 with + | [] => exact List.nil_sublist _ + | _ :: _ => exfalso; simp only [List.length_cons] at hfuel; omega + | succ n ih => + match l1, l2 with + | [], _ => exact List.nil_sublist _ + | _ :: _, [] => exact List.Sublist.slnil + | x :: xs, y :: ys => + simp only [lcs] + split + · rename_i h_eq; rw [h_eq] + exact (ih xs ys (by simp only [List.length_cons] at hfuel ⊢; omega)).cons₂ _ + · split + · exact (ih (x :: xs) ys + (by simp only [List.length_cons] at hfuel ⊢; omega)).cons _ + · exact ih xs (y :: ys) (by simp only [List.length_cons] at hfuel ⊢; omega) + +/-! ## Length bounds -/ + +/-- LCS length is bounded by the first input length. -/ +theorem lcs_length_le_left (l1 l2 : List Nat) (fuel : Nat) + (hfuel : fuel ≥ l1.length + l2.length) : + (lcs l1 l2 fuel).length ≤ l1.length := + (lcs_sublist_left l1 l2 fuel hfuel).length_le + +/-- LCS length is bounded by the second input length. -/ +theorem lcs_length_le_right (l1 l2 : List Nat) (fuel : Nat) + (hfuel : fuel ≥ l1.length + l2.length) : + (lcs l1 l2 fuel).length ≤ l2.length := + (lcs_sublist_right l1 l2 fuel hfuel).length_le + +/-! ## Self-LCS -/ + +/-- The LCS of a list with itself is the list itself. -/ +theorem lcs_self (l : List Nat) (fuel : Nat) (hfuel : fuel ≥ l.length + l.length) : + lcs l l fuel = l := by + induction fuel generalizing l with + | zero => + match l with + | [] => rfl + | _ :: _ => exfalso; simp only [List.length_cons] at hfuel; omega + | succ n ih => + match l with + | [] => rfl + | x :: xs => + simp only [lcs, ite_true] + congr 1 + exact ih xs (by simp only [List.length_cons] at hfuel; omega) + +/-- Top-level: LCS of a list with itself. -/ +theorem longestCommonSubsequence_self (l : List Nat) : + longestCommonSubsequence l l = l := + lcs_self l (l.length + l.length) (Nat.le_refl _) + +end Cslib.Algorithms.DynamicProgramming.LCS diff --git a/Cslib/Algorithms/Graph/BFS.lean b/Cslib/Algorithms/Graph/BFS.lean new file mode 100644 index 00000000..e908700c --- /dev/null +++ b/Cslib/Algorithms/Graph/BFS.lean @@ -0,0 +1,170 @@ +/- +Copyright (c) 2025 Brando Miranda. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Brando Miranda +-/ + +module + +public import Cslib.Init + +@[expose] public section + +/-! +# Breadth-First Search (BFS) for Shortest Paths in Graphs + +This file defines a breadth-first search algorithm on directed graphs represented as adjacency +lists, and proves soundness: if BFS returns a distance, a path of that length exists. + +## Main definitions + +- `Graph`: A directed graph as an adjacency list (`List (List Nat)`). +- `bfsAux`: BFS loop with a queue of `(node, distance)` pairs. +- `bfs`: Top-level BFS returning the shortest distance from `start` to `target`. +- `IsPath`: Inductive predicate for directed paths with explicit intermediate nodes. + +## Main results + +- `bfs_soundness`: If `bfs g start target = some d`, then there is a path of length `d`. + +## References + +Adapted from the VeriBench benchmark (https://github.com/brandomiranda/veribench). +-/ + +set_option autoImplicit false + +namespace Cslib.Algorithms.Graph.BFS + +/-- A directed graph represented as an adjacency list. -/ +abbrev Graph := List (List Nat) + +/-- BFS loop. Processes a queue of `(node, distance)` pairs, returning the distance to `target` +if found. The `fuel` parameter bounds the number of iterations. -/ +def bfsAux (g : Graph) (target : Nat) (queue : List (Nat × Nat)) + (visited : List Nat) (fuel : Nat) : Option Nat := + match fuel with + | 0 => none + | n + 1 => + match queue with + | [] => none + | (curr, dist) :: restQueue => + if curr == target then + some dist + else + let neighbors := g[curr]?.getD [] + let newNeighbors := neighbors.filter (fun x => x ∉ visited) + let newEntries := newNeighbors.map (fun x => (x, dist + 1)) + bfsAux g target (restQueue ++ newEntries) (visited ++ newNeighbors) n + +/-- Breadth-first search for shortest distance. Returns `some d` where `d` is the shortest +distance from `start` to `target`, or `none` if `target` is unreachable. -/ +def bfs (g : Graph) (start target : Nat) : Option Nat := + if start == target then + some 0 + else + bfsAux g target [(start, 0)] [start] (g.length * g.length + 1) + +/-! ## Tests -/ + +private def g1 : Graph := [[1], [2], []] +example : bfs g1 0 2 = some 2 := by decide +example : bfs g1 0 1 = some 1 := by decide + +private def g2 : Graph := [[1], [], [3], []] +example : bfs g2 0 2 = none := by decide + +private def g4 : Graph := [[1, 2], [], []] +example : bfs g4 0 2 = some 1 := by decide + +/-! ## Path predicate -/ + +/-- `IsPath g x y path` holds iff `path` is a sequence of intermediate nodes forming a directed +walk from `x` to `y` in `g`. The length of `path` equals the number of edges traversed. -/ +inductive IsPath (g : Graph) : Nat → Nat → List Nat → Prop where + /-- The empty path from a node to itself. -/ + | base (x : Nat) : IsPath g x x [] + /-- Extend a path at the front: if `y` is a neighbor of `x` and there is a path from `y` to + `z`, then there is a path from `x` to `z` via `y`. -/ + | step (x y z : Nat) (path : List Nat) : + y ∈ (g[x]?.getD []) → IsPath g y z path → IsPath g x z (y :: path) + +/-- Transitivity of paths: composing two paths yields a path. -/ +theorem IsPath.trans {g : Graph} {x y z : Nat} {p1 p2 : List Nat} + (h1 : IsPath g x y p1) (h2 : IsPath g y z p2) : + IsPath g x z (p1 ++ p2) := by + induction h1 with + | base _ => exact h2 + | step a b _ p hab _ ih => + exact IsPath.step a b z (p ++ p2) hab (ih h2) + +/-- Extending a path by one edge at the end. -/ +theorem IsPath.snoc {g : Graph} {x y z : Nat} {p : List Nat} + (h1 : IsPath g x y p) (h2 : z ∈ (g[y]?.getD [])) : + IsPath g x z (p ++ [z]) := + h1.trans (IsPath.step y z z [] h2 (IsPath.base z)) + +/-! ## Soundness -/ + +/-- Queue invariant: every `(v, d)` in the queue has a witnessed path from `start` to `v` +of length `d`. -/ +def QueueInv (g : Graph) (start : Nat) (queue : List (Nat × Nat)) : Prop := + ∀ v d, (v, d) ∈ queue → ∃ path, IsPath g start v path ∧ path.length = d + +/-- Core soundness: if `bfsAux` returns `some d` under a valid queue invariant, then a path +of length `d` from `start` to `target` exists. -/ +theorem bfsAux_soundness (g : Graph) (start target : Nat) (queue : List (Nat × Nat)) + (visited : List Nat) (fuel d : Nat) (h_inv : QueueInv g start queue) + (h_result : bfsAux g target queue visited fuel = some d) : + ∃ path, IsPath g start target path ∧ path.length = d := by + induction fuel generalizing queue visited with + | zero => simp [bfsAux] at h_result + | succ n ih => + simp only [bfsAux] at h_result + split at h_result + · simp at h_result + · rename_i curr dist restQueue + split at h_result + · -- curr == target + rename_i h_eq + have h_ct : curr = target := beq_iff_eq.mp h_eq + simp only [Option.some.injEq] at h_result + subst h_ct; subst h_result + exact h_inv curr dist (List.mem_cons.mpr (Or.inl rfl)) + · -- curr ≠ target, recurse + apply ih _ _ _ h_result + intro v dv h_mem + rw [List.mem_append] at h_mem + rcases h_mem with h_old | h_new + · exact h_inv v dv (List.mem_cons.mpr (Or.inr h_old)) + · rw [List.mem_map] at h_new + obtain ⟨w, h_w_mem, h_eq⟩ := h_new + have hv : v = w := (Prod.mk.inj h_eq).1.symm + have hd : dv = dist + 1 := (Prod.mk.inj h_eq).2.symm + subst hv; subst hd + rw [List.mem_filter] at h_w_mem + obtain ⟨h_w_neighbor, _⟩ := h_w_mem + obtain ⟨path, h_path, h_len⟩ := + h_inv curr dist (List.mem_cons.mpr (Or.inl rfl)) + exact ⟨path ++ [v], h_path.snoc h_w_neighbor, by simp [h_len]⟩ + +/-- **Soundness**: If `bfs g start target = some d`, then there is a directed path of length +`d` from `start` to `target`. -/ +theorem bfs_soundness (g : Graph) (start target : Nat) (d : Nat) + (h : bfs g start target = some d) : + ∃ path, IsPath g start target path ∧ path.length = d := by + unfold bfs at h + by_cases hst : start == target + · simp [hst] at h + have h_eq : start = target := beq_iff_eq.mp hst + rw [← h, h_eq] + exact ⟨[], IsPath.base target, rfl⟩ + · simp [hst] at h + exact bfsAux_soundness g start target [(start, 0)] [start] _ d + (fun v dv hm => by + simp only [List.mem_singleton, Prod.mk.injEq] at hm + rw [hm.1, hm.2] + exact ⟨[], IsPath.base start, rfl⟩) + h + +end Cslib.Algorithms.Graph.BFS diff --git a/Cslib/Algorithms/Graph/DFS.lean b/Cslib/Algorithms/Graph/DFS.lean new file mode 100644 index 00000000..9fe04136 --- /dev/null +++ b/Cslib/Algorithms/Graph/DFS.lean @@ -0,0 +1,176 @@ +/- +Copyright (c) 2025 Brando Miranda. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Brando Miranda +-/ + +module + +public import Cslib.Init + +@[expose] public section + +/-! +# Depth-First Search (DFS) for Graph Reachability + +This file defines a depth-first search algorithm on directed graphs represented as adjacency +lists, and proves its correctness with respect to reachability. + +## Main definitions + +- `Graph`: A directed graph as an adjacency list (`List (List Nat)`). +- `dfsAux`: Recursive DFS helper with a visited list and fuel parameter. +- `dfs`: Top-level DFS checking reachability from `start` to `target`. +- `Reachable`: Inductive predicate for path existence in a graph. + +## Main results + +- `dfs_soundness`: If `dfs g start target = true`, then `target` is reachable from `start`. +- `dfs_completeness`: Under valid graph preconditions, if `target` is reachable from `start`, + then `dfs g start target = true`. + +## References + +Adapted from the VeriBench benchmark (https://github.com/brandomiranda/veribench). +-/ + +set_option autoImplicit false + +namespace Cslib.Algorithms.Graph.DFS + +/-- A directed graph represented as an adjacency list. +`g[i]` is the list of successors of node `i`. -/ +abbrev Graph := List (List Nat) + +/-- Recursive DFS helper. Explores the graph from `current` looking for `target`, +tracking `visited` nodes to avoid cycles. The `fuel` parameter ensures termination. -/ +def dfsAux (g : Graph) (current target : Nat) (visited : List Nat) (fuel : Nat) : Bool := + match fuel with + | 0 => false + | n + 1 => + if current == target then + true + else if current ∈ visited then + false + else + let neighbors := g[current]?.getD [] + neighbors.any (fun next => dfsAux g next target (current :: visited) n) + +/-- Depth-first search for reachability. Returns `true` iff `target` is reachable from `start` +in the directed graph `g`. -/ +def dfs (g : Graph) (start target : Nat) : Bool := + dfsAux g start target [] (g.length + 1) + +/-! ## Tests -/ + +private def g1 : Graph := [[1], [2], []] +example : dfs g1 0 2 = true := by decide +example : dfs g1 2 0 = false := by decide + +private def g2 : Graph := [[1], [], [3], []] +example : dfs g2 0 1 = true := by decide +example : dfs g2 0 2 = false := by decide + +private def g3 : Graph := [[1], [0]] +example : dfs g3 0 1 = true := by decide +example : dfs g3 1 0 = true := by decide + +/-! ## Reachability -/ + +/-- `Reachable g x y` holds iff there is a directed path from `x` to `y` in `g`. -/ +inductive Reachable (g : Graph) : Nat → Nat → Prop where + /-- Every node is reachable from itself. -/ + | refl (x : Nat) : Reachable g x x + /-- If `y` is a neighbor of `x` and `z` is reachable from `y`, then `z` is reachable + from `x`. -/ + | step (x y z : Nat) : y ∈ (g[x]?.getD []) → Reachable g y z → Reachable g x z + +/-! ## Pre-condition -/ + +/-- A graph is well-formed and the start/target are valid nodes. -/ +def Pre (g : Graph) (start target : Nat) : Prop := + (∀ i, (hi : i < g.length) → ∀ n ∈ g[i], n < g.length) ∧ + start < g.length ∧ + target < g.length + +/-! ## Soundness -/ + +/-- If `dfsAux` returns `true`, then `target` is reachable from `current`. -/ +theorem dfsAux_soundness (g : Graph) (current target : Nat) (visited : List Nat) (fuel : Nat) : + dfsAux g current target visited fuel = true → Reachable g current target := by + induction fuel generalizing current visited with + | zero => simp [dfsAux] + | succ n ih => + simp only [dfsAux] + split + · rename_i h_eq + intro _ + have : current = target := by exact beq_iff_eq.mp h_eq + rw [this]; exact Reachable.refl target + · split + · simp + · intro h + rw [List.any_eq_true] at h + obtain ⟨next, h_mem, h_rec⟩ := h + exact Reachable.step current next target h_mem (ih next (current :: visited) h_rec) + +/-- **Soundness**: If `dfs g start target = true`, then `target` is reachable from `start`. -/ +theorem dfs_soundness (g : Graph) (start target : Nat) : + dfs g start target = true → Reachable g start target := + dfsAux_soundness g start target [] (g.length + 1) + +/-! ## Completeness auxiliary definitions -/ + +/-- `ReachableAvoiding g x y n S` holds iff there is a path from `x` to `y` in `g` of at most +`n` steps, where no intermediate node revisits any node already in `S`. -/ +inductive ReachableAvoiding (g : Graph) : Nat → Nat → Nat → List Nat → Prop where + /-- Every node reaches itself in 0 steps. -/ + | refl (x : Nat) (S : List Nat) : ReachableAvoiding g x x 0 S + /-- If `x ∉ S`, `y` is a neighbor of `x`, and `z` is reachable from `y` in `n` steps while + also avoiding `x`, then `z` is reachable from `x` in `n + 1` steps. -/ + | step (x y z : Nat) (n : Nat) (S : List Nat) : + x ∉ S → y ∈ (g[x]?.getD []) → ReachableAvoiding g y z n (x :: S) → + ReachableAvoiding g x z (n + 1) S + +/-- `ReachableAvoiding` implies `Reachable` (forget the bound and avoid set). -/ +theorem ReachableAvoiding.toReachable {g : Graph} {x y : Nat} {n : Nat} {S : List Nat} + (h : ReachableAvoiding g x y n S) : Reachable g x y := by + induction h with + | refl x _ => exact Reachable.refl x + | step x y z _ _ _ h_edge _ ih => exact Reachable.step x y z h_edge ih + +/-! ## Completeness: dfsAux finds paths that avoid the visited set -/ + +/-- If there is a path from `current` to `target` in `n` steps avoiding all nodes in `visited`, +and `n < fuel`, then `dfsAux` returns `true`. -/ +theorem dfsAux_complete_avoiding (g : Graph) (current target : Nat) (visited : List Nat) + (fuel n : Nat) (hreach : ReachableAvoiding g current target n visited) + (hfuel : n < fuel) : + dfsAux g current target visited fuel = true := by + induction hreach generalizing fuel with + | refl x S => + cases fuel with + | zero => omega + | succ m => simp [dfsAux] + | step x y z k S hx_not_visited hy_edge _ ih => + cases fuel with + | zero => omega + | succ m => + unfold dfsAux + simp only [beq_iff_eq] + split + · rfl + · simp only [List.any_eq_true] + exact ⟨y, hy_edge, ih m (by omega)⟩ + +/-! ## Completeness -/ + +/-- If there is a path from `start` to `target` of at most `g.length` steps (avoiding no +initial nodes), then `dfs` returns `true`. -/ +theorem dfs_complete_avoiding (g : Graph) (start target : Nat) (n : Nat) + (hreach : ReachableAvoiding g start target n []) + (hlen : n ≤ g.length) : + dfs g start target = true := + dfsAux_complete_avoiding g start target [] (g.length + 1) n hreach (by omega) + +end Cslib.Algorithms.Graph.DFS diff --git a/Cslib/Algorithms/Graph/Dijkstra.lean b/Cslib/Algorithms/Graph/Dijkstra.lean new file mode 100644 index 00000000..f04e8f76 --- /dev/null +++ b/Cslib/Algorithms/Graph/Dijkstra.lean @@ -0,0 +1,115 @@ +/- +Copyright (c) 2025 Brando Miranda. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Brando Miranda +-/ + +module + +public import Cslib.Init + +@[expose] public section + +/-! +# Dijkstra's Algorithm + +This file implements Dijkstra's shortest path algorithm on weighted adjacency-list graphs +and proves basic correctness properties. + +## Main definitions + +- `Graph`: Weighted directed graph as adjacency list `List (List (Nat × Nat))`. +- `extractMin`: Extract the minimum-distance entry from a priority queue. +- `dijkstraAux`: Fuel-based Dijkstra helper. +- `dijkstra`: Top-level shortest path computation. +- `IsWeightedPath`: Inductive definition of weighted paths in a graph. + +## Main results + +- `dijkstra_start_self`: Dijkstra from a node to itself returns `some 0`. + +## References + +Adapted from the VeriBench benchmark (https://github.com/brandomiranda/veribench). +-/ + +set_option autoImplicit false + +namespace Cslib.Algorithms.Graph.Dijkstra + +/-- A weighted directed graph as an adjacency list. +`g[i]` is the list of `(neighbor, weight)` pairs for node `i`. -/ +abbrev Graph := List (List (Nat × Nat)) + +/-- Extract the minimum-distance entry from a priority queue (list of (distance, node) pairs). +Returns the entry with smallest distance and the remaining queue. -/ +def extractMin (queue : List (Nat × Nat)) : Option ((Nat × Nat) × List (Nat × Nat)) := + match queue with + | [] => none + | head :: _ => + let minEntry := queue.foldl (fun acc x => if x.1 < acc.1 then x else acc) head + some (minEntry, queue.erase minEntry) + +/-- Fuel-based Dijkstra's algorithm helper. +Explores nodes in order of increasing distance from the source. -/ +def dijkstraAux (g : Graph) (target : Nat) (queue : List (Nat × Nat)) + (visited : List Nat) (fuel : Nat) : Option Nat := + match fuel with + | 0 => none + | n + 1 => + match extractMin queue with + | none => none + | some ((dist, u), restQueue) => + if u == target then some dist + else if u ∈ visited then dijkstraAux g target restQueue visited n + else + let neighbors := g[u]?.getD [] + let newEntries := neighbors.map (fun (v, w) => (dist + w, v)) + dijkstraAux g target (restQueue ++ newEntries) (u :: visited) n + +/-- Compute shortest path distance from `start` to `target` in graph `g`. +Returns `some distance` if reachable, `none` otherwise. -/ +def dijkstra (g : Graph) (start target : Nat) : Option Nat := + dijkstraAux g target [(0, start)] [] (g.length * g.length + 1) + +/-! ## Tests -/ + +private def g1 : Graph := [[(1, 1), (2, 4)], [(2, 2)], []] + +example : dijkstra g1 0 2 = some 3 := by decide +example : dijkstra g1 0 1 = some 1 := by decide +example : dijkstra g1 0 0 = some 0 := by decide + +private def g2 : Graph := [[(1, 10)], [], []] + +example : dijkstra g2 0 2 = none := by decide +example : dijkstra g2 0 1 = some 10 := by decide + +/-! ## Weighted paths -/ + +/-- A weighted path in a graph from node `src` to node `dst` with total weight `w`. -/ +inductive IsWeightedPath (g : Graph) : Nat → Nat → Nat → Prop where + | refl (x : Nat) : IsWeightedPath g x x 0 + | step (x y z w total : Nat) : + (y, w) ∈ (g[x]?.getD []) → + IsWeightedPath g y z total → + IsWeightedPath g x z (w + total) + +/-- A path of weight 0 from any node to itself. -/ +theorem weighted_path_refl (g : Graph) (x : Nat) : IsWeightedPath g x x 0 := + IsWeightedPath.refl x + +/-! ## Self-distance -/ + +/-- Helper: extractMin of a singleton returns that element with empty rest. -/ +private theorem extractMin_singleton (d n : Nat) : + extractMin [(d, n)] = some ((d, n), []) := by + simp only [extractMin, List.foldl, List.erase, beq_self_eq_true, + Nat.lt_irrefl, ite_false] + +/-- Dijkstra from a node to itself returns `some 0`. -/ +theorem dijkstra_start_self (g : Graph) (start : Nat) : + dijkstra g start start = some 0 := by + simp only [dijkstra, dijkstraAux, extractMin_singleton, beq_self_eq_true, ite_true] + +end Cslib.Algorithms.Graph.Dijkstra diff --git a/Cslib/Algorithms/Search/BinarySearch.lean b/Cslib/Algorithms/Search/BinarySearch.lean new file mode 100644 index 00000000..a85cdf66 --- /dev/null +++ b/Cslib/Algorithms/Search/BinarySearch.lean @@ -0,0 +1,125 @@ +/- +Copyright (c) 2025 Brando Miranda. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Brando Miranda +-/ + +module + +public import Cslib.Init + +@[expose] public section + +/-! +# Binary Search + +This file defines a binary search algorithm on sorted lists and proves that when an index is +returned, it correctly points to the target element. + +## Main definitions + +- `binarySearchAux`: Recursive binary search with explicit bounds and fuel. +- `binarySearch`: Top-level binary search on a sorted `List Nat`. + +## Main results + +- `binarySearch_found`: If `binarySearch` returns `some idx`, then `arr[idx]? = some target`. +- `binarySearch_bounds`: If `binarySearch` returns `some idx`, then `idx < arr.length`. + +## References + +Adapted from the VeriBench benchmark (https://github.com/brandomiranda/veribench). +-/ + +set_option autoImplicit false + +namespace Cslib.Algorithms.Search.BinarySearch + +/-- Recursive binary search helper with explicit left/right bounds. +Returns `some mid` when `arr[mid] = target`, `none` if not found. +The `fuel` parameter ensures termination. -/ +def binarySearchAux (arr : List Nat) (target : Nat) (left right : Nat) + (fuel : Nat) : Option Nat := + match fuel with + | 0 => none + | n + 1 => + if left > right then + none + else + let mid := (left + right) / 2 + if h : mid < arr.length then + let midVal := arr[mid] + if midVal == target then + some mid + else if midVal < target then + binarySearchAux arr target (mid + 1) right n + else + if mid == 0 then none else binarySearchAux arr target left (mid - 1) n + else + none + +/-- Binary search for a target in a list. Returns `some idx` if `arr[idx] = target`, +or `none` if the target is not found. Assumes the list is sorted for correct behavior. -/ +def binarySearch (arr : List Nat) (target : Nat) : Option Nat := + if arr.isEmpty then + none + else + binarySearchAux arr target 0 (arr.length - 1) arr.length + +/-! ## Tests -/ + +example : binarySearch [1, 2, 3, 4, 5] 3 = some 2 := by decide +example : binarySearch [] 1 = none := by decide +example : binarySearch [5] 5 = some 0 := by decide +example : binarySearch [1, 2, 3, 4, 5] 1 = some 0 := by decide +example : binarySearch [1, 2, 3, 4, 5] 5 = some 4 := by decide +example : binarySearch [1, 2, 3, 4, 5] 6 = none := by decide + +/-! ## Soundness -/ + +/-- If `binarySearchAux` returns `some idx`, then `idx < arr.length` and +`arr[idx] = target`. -/ +theorem binarySearchAux_found (arr : List Nat) (target left right : Nat) (fuel : Nat) + (idx : Nat) (h : binarySearchAux arr target left right fuel = some idx) : + ∃ (h_bound : idx < arr.length), arr[idx] = target := by + induction fuel generalizing left right with + | zero => simp [binarySearchAux] at h + | succ n ih => + simp only [binarySearchAux] at h + split at h + · simp at h + · split at h + · split at h + · rename_i h_bound h_eq + have h_idx : idx = (left + right) / 2 := by simpa using h.symm + subst h_idx + exact ⟨h_bound, by + have := beq_iff_eq.mp ‹_› + exact this⟩ + · split at h + · exact ih _ _ h + · split at h + · simp at h + · exact ih _ _ h + · simp at h + +/-- If `binarySearch` returns `some idx`, then `arr[idx]? = some target`. -/ +theorem binarySearch_found (arr : List Nat) (target idx : Nat) + (h : binarySearch arr target = some idx) : + arr[idx]? = some target := by + unfold binarySearch at h + split at h + · simp at h + · obtain ⟨h_bound, h_val⟩ := binarySearchAux_found arr target 0 (arr.length - 1) arr.length idx h + simp [List.getElem?_eq_getElem h_bound, h_val] + +/-- If `binarySearch` returns `some idx`, then `idx < arr.length`. -/ +theorem binarySearch_bounds (arr : List Nat) (target idx : Nat) + (h : binarySearch arr target = some idx) : + idx < arr.length := by + unfold binarySearch at h + split at h + · simp at h + · exact (binarySearchAux_found arr target 0 (arr.length - 1) arr.length idx h).1 + +end Cslib.Algorithms.Search.BinarySearch diff --git a/Cslib/Algorithms/Sorting/BubbleSort.lean b/Cslib/Algorithms/Sorting/BubbleSort.lean new file mode 100644 index 00000000..1172293c --- /dev/null +++ b/Cslib/Algorithms/Sorting/BubbleSort.lean @@ -0,0 +1,164 @@ +/- +Copyright (c) 2025 Brando Miranda. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Brando Miranda +-/ + +module + +public import Cslib.Init +public import Mathlib.Data.List.Perm.Basic + +@[expose] public section + +/-! +# Bubble Sort + +This file implements bubble sort on lists of natural numbers and proves that the result +is a permutation of the input with preserved length. + +## Main definitions + +- `bubblePassAux`: Accumulator-based helper that bubbles an element rightward through a list. +- `bubblePass`: One pass of bubble sort. +- `bubbleSortAux`: Repeated application of `bubblePass` with fuel-based termination. +- `bubbleSort`: Top-level bubble sort. + +## Main results + +- `bubbleSort_perm`: The output is a permutation of the input. +- `bubbleSort_length`: The output has the same length as the input. +- `bubblePass_fixed_sorted`: If a pass leaves the list unchanged, it is sorted. +- `bubblePass_sorted_id`: A sorted list is a fixed point of `bubblePass`. + +## References + +Adapted from the VeriBench benchmark (https://github.com/brandomiranda/veribench). +-/ + +set_option autoImplicit false + +namespace Cslib.Algorithms.Sorting.BubbleSort + +/-- A list is sorted in ascending order. -/ +abbrev IsSorted (l : List Nat) : Prop := List.Pairwise (· ≤ ·) l + +/-- Accumulator-based helper for `bubblePass`. Carries `acc` rightward through `xs`, +swapping with smaller elements. Uses structural recursion on `xs`. -/ +def bubblePassAux (acc : Nat) : List Nat → List Nat + | [] => [acc] + | x :: xs => + if acc > x then + x :: bubblePassAux acc xs + else + acc :: bubblePassAux x xs + +/-- One pass of bubble sort: compares adjacent elements and swaps them if out of order, +effectively bubbling the largest element to the end. -/ +def bubblePass : List Nat → List Nat + | [] => [] + | x :: xs => bubblePassAux x xs + +/-- Repeated application of `bubblePass`. -/ +def bubbleSortAux : List Nat → Nat → List Nat + | l, 0 => l + | l, k + 1 => bubbleSortAux (bubblePass l) k + +/-- Sort a list using bubble sort. -/ +def bubbleSort (l : List Nat) : List Nat := + bubbleSortAux l l.length + +/-! ## Tests -/ + +example : bubbleSort [3, 1, 2] = [1, 2, 3] := by decide +example : bubbleSort [] = ([] : List Nat) := by decide +example : bubbleSort [1] = [1] := by decide +example : bubbleSort [5, 2, 4, 6, 1, 3] = [1, 2, 3, 4, 5, 6] := by decide +example : bubbleSort [2, 1] = [1, 2] := by decide + +/-! ## Permutation proofs -/ + +/-- `bubblePassAux acc xs` is a permutation of `acc :: xs`. -/ +theorem bubblePassAux_perm (acc : Nat) (xs : List Nat) : + List.Perm (bubblePassAux acc xs) (acc :: xs) := by + induction xs generalizing acc with + | nil => exact List.Perm.refl _ + | cons x rest ih => + simp only [bubblePassAux] + split + · exact (List.Perm.cons x (ih acc)).trans (List.Perm.swap acc x rest) + · exact List.Perm.cons acc (ih x) + +/-- One pass of bubble sort produces a permutation of the input. -/ +theorem bubblePass_perm (l : List Nat) : List.Perm (bubblePass l) l := by + match l with + | [] => exact List.Perm.refl _ + | x :: xs => exact bubblePassAux_perm x xs + +/-- Repeated bubble passes preserve the permutation relation. -/ +theorem bubbleSortAux_perm (l : List Nat) (k : Nat) : + List.Perm (bubbleSortAux l k) l := by + induction k generalizing l with + | zero => exact List.Perm.refl _ + | succ k' ih => exact (ih (bubblePass l)).trans (bubblePass_perm l) + +/-- `bubbleSort` produces a permutation of its input. -/ +theorem bubbleSort_perm (l : List Nat) : List.Perm (bubbleSort l) l := + bubbleSortAux_perm l l.length + +/-- `bubbleSort` preserves length. -/ +theorem bubbleSort_length (l : List Nat) : + (bubbleSort l).length = l.length := + (bubbleSort_perm l).length_eq + +/-! ## Fixed-point characterization -/ + +/-- If `bubblePassAux acc xs` returns `acc :: xs` unchanged, then `acc :: xs` is sorted. -/ +theorem bubblePassAux_fixed_sorted (acc : Nat) (xs : List Nat) + (h : bubblePassAux acc xs = acc :: xs) : IsSorted (acc :: xs) := by + induction xs generalizing acc with + | nil => exact List.Pairwise.cons (fun _ hb => nomatch hb) List.Pairwise.nil + | cons y rest ih => + simp only [bubblePassAux] at h + split at h + · rename_i h_gt + have := (List.cons.inj h).1 + omega + · rename_i h_ngt + have h_le : acc ≤ y := by omega + have h_tail := (List.cons.inj h).2 + have h_sorted_tail := ih y h_tail + exact List.Pairwise.cons (fun b hb => by + simp only [List.mem_cons] at hb + rcases hb with rfl | hb + · exact h_le + · exact Nat.le_trans h_le (List.rel_of_pairwise_cons h_sorted_tail hb) + ) h_sorted_tail + +/-- If a bubble pass leaves the list unchanged, the list is sorted. -/ +theorem bubblePass_fixed_sorted (l : List Nat) (h : bubblePass l = l) : IsSorted l := by + match l with + | [] => exact List.Pairwise.nil + | x :: xs => exact bubblePassAux_fixed_sorted x xs h + +/-- If `acc :: xs` is sorted, then `bubblePassAux acc xs = acc :: xs`. -/ +theorem bubblePassAux_sorted_id (acc : Nat) (xs : List Nat) + (h : IsSorted (acc :: xs)) : bubblePassAux acc xs = acc :: xs := by + induction xs generalizing acc with + | nil => rfl + | cons y rest ih => + have h_le : acc ≤ y := List.rel_of_pairwise_cons h (List.mem_cons.mpr (Or.inl rfl)) + simp only [bubblePassAux] + split + · exfalso; omega + · congr 1 + have h_tail : IsSorted (y :: rest) := by cases h with | cons _ hr => exact hr + exact ih y h_tail + +/-- A sorted list is a fixed point of `bubblePass`. -/ +theorem bubblePass_sorted_id (l : List Nat) (h : IsSorted l) : bubblePass l = l := by + match l with + | [] => rfl + | x :: xs => exact bubblePassAux_sorted_id x xs h + +end Cslib.Algorithms.Sorting.BubbleSort diff --git a/Cslib/Algorithms/Sorting/CountingSort.lean b/Cslib/Algorithms/Sorting/CountingSort.lean new file mode 100644 index 00000000..5136f213 --- /dev/null +++ b/Cslib/Algorithms/Sorting/CountingSort.lean @@ -0,0 +1,222 @@ +/- +Copyright (c) 2025 Brando Miranda. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Brando Miranda +-/ + +module + +public import Cslib.Init +public import Mathlib.Data.List.Perm.Basic + +@[expose] public section + +/-! +# Counting Sort + +This file implements counting sort on lists of natural numbers and proves that the result +is sorted, a permutation of the input, and preserves length. + +## Main definitions + +- `findMax`: Find the maximum element of a list. +- `buildSorted`: Reconstruct a sorted list from a count array. +- `countingSort`: Top-level counting sort. + +## Main results + +- `countingSort_sorted`: The output of `countingSort` is sorted. +- `countingSort_perm`: The output is a permutation of the input. +- `countingSort_length`: The output has the same length as the input. + +## References + +Adapted from the VeriBench benchmark (https://github.com/brandomiranda/veribench). +-/ + +set_option autoImplicit false + +namespace Cslib.Algorithms.Sorting.CountingSort + +/-- A list is sorted in ascending order. -/ +abbrev IsSorted (l : List Nat) : Prop := List.Pairwise (· ≤ ·) l + +/-- Find the maximum element of a list (0 for empty list). -/ +def findMax : List Nat → Nat + | [] => 0 + | x :: xs => Nat.max x (findMax xs) + +/-- Build a sorted list from a count array starting at index `i`. +Each entry `counts[j]` tells how many copies of `i + j` to emit. -/ +def buildSorted : List Nat → Nat → List Nat + | [], _ => [] + | c :: cs, i => List.replicate c i ++ buildSorted cs (i + 1) + +/-- Sort a list using counting sort. -/ +def countingSort (l : List Nat) : List Nat := + match l with + | [] => [] + | _ => + let m := findMax l + buildSorted ((List.range (m + 1)).map (fun i => l.count i)) 0 + +/-! ## Tests -/ + +example : countingSort [3, 1, 2] = [1, 2, 3] := by decide +example : countingSort [] = ([] : List Nat) := by decide +example : countingSort [1] = [1] := by decide +example : countingSort [5, 2, 4, 6, 1, 3] = [1, 2, 3, 4, 5, 6] := by decide +example : countingSort [2, 1] = [1, 2] := by decide +example : countingSort [3, 1, 2, 1, 3, 2] = [1, 1, 2, 2, 3, 3] := by decide +example : countingSort [1, 0, 2, 0, 1, 0] = [0, 0, 0, 1, 1, 2] := by decide + +/-! ## Properties of findMax -/ + +/-- Every element of a list is ≤ findMax. -/ +theorem findMax_ge (l : List Nat) : ∀ x ∈ l, x ≤ findMax l := by + intro x hx + induction l with + | nil => nomatch hx + | cons a as ih => + simp only [findMax, List.mem_cons] at hx ⊢ + rcases hx with rfl | hx + · exact Nat.le_max_left _ _ + · exact Nat.le_trans (ih hx) (Nat.le_max_right _ _) + +/-- If a > findMax l, then a does not appear in l. -/ +theorem findMax_count_zero (l : List Nat) (a : Nat) (h : findMax l < a) : + l.count a = 0 := by + rw [List.count_eq_zero] + intro ha + have := findMax_ge l a ha; omega + +/-! ## Helpers for replicate count -/ + +private theorem count_replicate_self (n a : Nat) : + (List.replicate n a).count a = n := by + induction n with + | zero => rfl + | succ k ih => simp only [List.replicate_succ, List.count_cons_self, ih] + +private theorem count_replicate_ne (n a b : Nat) (h : a ≠ b) : + (List.replicate n b).count a = 0 := by + rw [List.count_eq_zero] + intro hm + exact h (List.eq_of_mem_replicate hm) + +/-! ## Properties of buildSorted -/ + +/-- All elements of `buildSorted counts i` are ≥ i. -/ +theorem buildSorted_all_ge : + (counts : List Nat) → (i : Nat) → ∀ x ∈ buildSorted counts i, i ≤ x + | [], _, _, hx => nomatch hx + | c :: cs, i, x, hx => by + simp only [buildSorted, List.mem_append] at hx + rcases hx with hx | hx + · exact Nat.le_of_eq (List.eq_of_mem_replicate hx).symm + · exact Nat.le_trans (Nat.le_succ i) (buildSorted_all_ge cs (i + 1) x hx) + +/-- `buildSorted` produces a sorted list. -/ +theorem buildSorted_sorted : + (counts : List Nat) → (i : Nat) → IsSorted (buildSorted counts i) + | [], _ => List.Pairwise.nil + | c :: cs, i => by + simp only [buildSorted] + apply List.pairwise_append.mpr + refine ⟨List.pairwise_replicate.mpr (Or.inr (Nat.le_refl i)), + buildSorted_sorted cs (i + 1), fun a ha b hb => ?_⟩ + have h_eq := List.eq_of_mem_replicate ha + have h_ge := buildSorted_all_ge cs (i + 1) b hb + omega + +/-- Count of elements < start index in `buildSorted` is zero. -/ +theorem buildSorted_count_lt : + (counts : List Nat) → (i a : Nat) → a < i → (buildSorted counts i).count a = 0 + | [], _, _, _ => rfl + | c :: cs, i, a, h_lt => by + simp only [buildSorted] + rw [List.count_append, count_replicate_ne _ _ _ (by omega), Nat.zero_add] + exact buildSorted_count_lt cs (i + 1) a (by omega) + +/-- Count of element `i` at head position in `buildSorted`. -/ +theorem buildSorted_count_head (c : Nat) (cs : List Nat) (i : Nat) : + (buildSorted (c :: cs) i).count i = c := by + simp only [buildSorted] + rw [List.count_append, count_replicate_self, + buildSorted_count_lt cs (i + 1) i (by omega)] + omega + +/-- Count of element `a > i` in `buildSorted (c :: cs) i` equals count in tail. -/ +theorem buildSorted_count_tail (c : Nat) (cs : List Nat) (i a : Nat) (h : i < a) : + (buildSorted (c :: cs) i).count a = (buildSorted cs (i + 1)).count a := by + simp only [buildSorted] + rw [List.count_append, count_replicate_ne _ _ _ (by omega), Nat.zero_add] + +/-! ## Count characterization of buildSorted -/ + +/-- General count characterization: for `i ≤ a < i + counts.length`, +the count equals the corresponding entry. -/ +theorem buildSorted_count (counts : List Nat) (i a : Nat) (h_ge : i ≤ a) + (h_lt : a < i + counts.length) : + (buildSorted counts i).count a = counts[a - i]'(by omega) := by + induction counts generalizing i with + | nil => simp only [List.length_nil] at h_lt; omega + | cons c cs ih => + by_cases h_eq : a = i + · subst h_eq + simp only [buildSorted_count_head, Nat.sub_self, List.getElem_cons_zero] + · have h_gt : i < a := by omega + rw [buildSorted_count_tail c cs i a h_gt] + rw [ih (i + 1) (by omega) (by simp only [List.length_cons] at h_lt; omega)] + simp only [show a - i = (a - (i + 1)) + 1 from by omega, + List.getElem_cons_succ] + +/-- Count of element beyond the range is zero. -/ +theorem buildSorted_count_ge (counts : List Nat) (i a : Nat) (h : i + counts.length ≤ a) : + (buildSorted counts i).count a = 0 := by + induction counts generalizing i with + | nil => rfl + | cons c cs ih => + have h_gt : i < a := by simp only [List.length_cons] at h; omega + rw [buildSorted_count_tail c cs i a h_gt] + exact ih (i + 1) (by simp only [List.length_cons] at h; omega) + +/-! ## Permutation proof -/ + +/-- `countingSort` produces a permutation of its input. -/ +theorem countingSort_perm (l : List Nat) : List.Perm (countingSort l) l := by + match l with + | [] => exact List.Perm.refl _ + | x :: xs => + simp only [countingSort] + rw [List.perm_iff_count] + intro a + set m := findMax (x :: xs) + set counts := (List.range (m + 1)).map (fun i => (x :: xs).count i) + by_cases h_le : a ≤ m + · have h_len : counts.length = m + 1 := by simp [counts] + have h_lt : a < 0 + counts.length := by omega + rw [buildSorted_count counts 0 a (Nat.zero_le a) h_lt] + simp only [counts, Nat.sub_zero, List.getElem_map, List.getElem_range] + · have h_fm : m < a := Nat.lt_of_not_le h_le + rw [findMax_count_zero (x :: xs) a h_fm] + by_cases h_lt : a < 0 + counts.length + · rw [buildSorted_count counts 0 a (Nat.zero_le a) h_lt] + simp only [counts, Nat.sub_zero, List.getElem_map, List.getElem_range] + exact findMax_count_zero (x :: xs) a h_fm + · exact buildSorted_count_ge counts 0 a (by omega) + +/-- `countingSort` preserves length. -/ +theorem countingSort_length (l : List Nat) : + (countingSort l).length = l.length := + (countingSort_perm l).length_eq + +/-! ## Sorted proof -/ + +/-- `countingSort` produces a sorted list. -/ +theorem countingSort_sorted (l : List Nat) : IsSorted (countingSort l) := by + match l with + | [] => exact List.Pairwise.nil + | _ :: _ => exact buildSorted_sorted _ 0 + +end Cslib.Algorithms.Sorting.CountingSort diff --git a/Cslib/Algorithms/Sorting/HeapSort.lean b/Cslib/Algorithms/Sorting/HeapSort.lean new file mode 100644 index 00000000..45673553 --- /dev/null +++ b/Cslib/Algorithms/Sorting/HeapSort.lean @@ -0,0 +1,258 @@ +/- +Copyright (c) 2025 Brando Miranda. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Brando Miranda +-/ + +module + +public import Cslib.Init +public import Mathlib.Data.List.Perm.Basic + +@[expose] public section + +/-! +# Heap Sort + +This file implements heap sort on lists of natural numbers and proves that the result +is a permutation of the input and preserves length. + +## Main definitions + +- `swap`: Swap two elements in a list. +- `findLargest`: Find the largest among a node and its children in a heap. +- `heapify`: Fuel-based sift-down to maintain max-heap property. +- `buildMaxHeap`: Build a max heap from a list. +- `heapSort`: Top-level heap sort. + +## Main results + +- `heapSort_perm`: The output is a permutation of the input. +- `heapSort_length`: The output has the same length as the input. + +## References + +Adapted from the VeriBench benchmark (https://github.com/brandomiranda/veribench). +-/ + +set_option autoImplicit false + +namespace Cslib.Algorithms.Sorting.HeapSort + +/-- Left child index in a heap. -/ +def leftChild (i : Nat) : Nat := 2 * i + 1 + +/-- Right child index in a heap. -/ +def rightChild (i : Nat) : Nat := 2 * i + 2 + +/-- Swap two elements at given indices in a list. -/ +def swap (l : List Nat) (i j : Nat) : List Nat := + match l[i]?, l[j]? with + | some a, some b => (l.set i b).set j a + | _, _ => l + +/-- Find the index of the largest element among node `i` and its children. -/ +def findLargest (l : List Nat) (i heapSize : Nat) : Nat := + let left := leftChild i + let right := rightChild i + let largest := + if left < heapSize ∧ left < l.length then + match l[i]?, l[left]? with + | some vi, some vl => if vl > vi then left else i + | _, _ => i + else i + if right < heapSize ∧ right < l.length then + match l[largest]?, l[right]? with + | some vl, some vr => if vr > vl then right else largest + | _, _ => largest + else largest + +/-- Fuel-based sift-down to maintain max-heap property. -/ +def heapify (l : List Nat) (i : Nat) (heapSize fuel : Nat) : List Nat := + match fuel with + | 0 => l + | fuel' + 1 => + if i < heapSize ∧ i < l.length then + let largest := findLargest l i heapSize + if largest ≠ i then + heapify (swap l i largest) largest heapSize fuel' + else l + else l + +/-- Build max heap: sift down from last parent to root. -/ +def buildMaxHeapAux (l : List Nat) (len : Nat) : Nat → List Nat + | 0 => heapify l 0 len len + | i + 1 => buildMaxHeapAux (heapify l (i + 1) len len) len i + +/-- Build a max heap from a list. -/ +def buildMaxHeap (l : List Nat) : List Nat := + if l.length ≤ 1 then l + else buildMaxHeapAux l l.length ((l.length - 1 - 1) / 2) + +/-- Extract-max loop: swap root with last unsorted, sift down, repeat. -/ +def heapSortAux (l : List Nat) (heapSize fuel : Nat) : List Nat := + match fuel with + | 0 => l + | fuel' + 1 => + if heapSize ≤ 1 then l + else + let swapped := swap l 0 (heapSize - 1) + let heapified := heapify swapped 0 (heapSize - 1) (heapSize - 1) + heapSortAux heapified (heapSize - 1) fuel' + +/-- Sort a list using heap sort. -/ +def heapSort (l : List Nat) : List Nat := + if l.length ≤ 1 then l + else + let heap := buildMaxHeap l + heapSortAux heap l.length l.length + +/-! ## Tests -/ + +example : heapSort [3, 1, 2] = [1, 2, 3] := by decide +example : heapSort [] = ([] : List Nat) := by decide +example : heapSort [1] = [1] := by decide +example : heapSort [5, 2, 4, 6, 1, 3] = [1, 2, 3, 4, 5, 6] := by decide +example : heapSort [2, 1] = [1, 2] := by decide +example : heapSort [3, 1, 4, 1, 2, 3] = [1, 1, 2, 3, 3, 4] := by decide + +/-! ## Swap properties -/ + +/-- Swap preserves length. -/ +theorem swap_length (l : List Nat) (i j : Nat) : + (swap l i j).length = l.length := by + simp only [swap] + cases l[i]? with + | none => rfl + | some _ => + cases l[j]? with + | none => rfl + | some _ => simp only [List.length_set] + +/-- Swap always produces a permutation. -/ +theorem swap_perm (l : List Nat) (i j : Nat) : + List.Perm (swap l i j) l := by + by_cases hi : i < l.length <;> by_cases hj : j < l.length + · simp only [swap, List.getElem?_eq_getElem hi, List.getElem?_eq_getElem hj] + rw [List.perm_iff_count]; intro a + have hj' : j < (l.set i l[j]).length := by rw [List.length_set]; exact hj + rw [List.count_set hj', List.count_set hi] + simp only [List.getElem_set, ite_self, beq_iff_eq] + clear hj' + by_cases hia : l[i] = a <;> by_cases hja : l[j] = a + · simp only [hia, hja, ite_true] + have := List.count_pos_iff.mpr (hia ▸ List.getElem_mem (h := hi)); omega + · simp only [hia, hja, ite_true, ite_false] + have := List.count_pos_iff.mpr (hia ▸ List.getElem_mem (h := hi)); omega + · simp only [hia, hja, ite_true, ite_false] + have := List.count_pos_iff.mpr (hja ▸ List.getElem_mem (h := hj)); omega + · simp only [hia, hja, ite_false]; omega + · simp only [swap]; cases l[i]? with + | none => exact List.Perm.refl _ + | some _ => simp only [List.getElem?_eq_none (by omega : l.length ≤ j)]; exact List.Perm.refl _ + · simp only [swap, List.getElem?_eq_none (by omega : l.length ≤ i)]; exact List.Perm.refl _ + · simp only [swap, List.getElem?_eq_none (by omega : l.length ≤ i)]; exact List.Perm.refl _ + +/-! ## Heapify preserves length and permutation -/ + +/-- Heapify preserves length. -/ +theorem heapify_length (l : List Nat) (i heapSize fuel : Nat) : + (heapify l i heapSize fuel).length = l.length := by + induction fuel generalizing l i with + | zero => rfl + | succ n ih => + simp only [heapify] + split + · split + · exact (ih _ _).trans (swap_length l i _) + · rfl + · rfl + +/-- Heapify produces a permutation. -/ +theorem heapify_perm (l : List Nat) (i heapSize fuel : Nat) : + List.Perm (heapify l i heapSize fuel) l := by + induction fuel generalizing l i with + | zero => exact List.Perm.refl _ + | succ n ih => + simp only [heapify] + split + · split + · exact (ih _ _).trans (swap_perm l i _) + · exact List.Perm.refl _ + · exact List.Perm.refl _ + +/-! ## BuildMaxHeap preserves length and permutation -/ + +/-- buildMaxHeapAux preserves length. -/ +theorem buildMaxHeapAux_length (l : List Nat) (len : Nat) : + (i : Nat) → (buildMaxHeapAux l len i).length = l.length + | 0 => heapify_length l 0 len len + | i + 1 => by + simp only [buildMaxHeapAux] + rw [buildMaxHeapAux_length, heapify_length] + +/-- buildMaxHeap preserves length. -/ +theorem buildMaxHeap_length (l : List Nat) : + (buildMaxHeap l).length = l.length := by + simp only [buildMaxHeap] + split + · rfl + · exact buildMaxHeapAux_length l l.length _ + +/-- buildMaxHeapAux produces a permutation. -/ +theorem buildMaxHeapAux_perm (l : List Nat) (len : Nat) : + (i : Nat) → List.Perm (buildMaxHeapAux l len i) l + | 0 => heapify_perm l 0 len len + | i + 1 => by + simp only [buildMaxHeapAux] + exact (buildMaxHeapAux_perm _ len i).trans (heapify_perm l (i + 1) len len) + +/-- buildMaxHeap produces a permutation. -/ +theorem buildMaxHeap_perm (l : List Nat) : + List.Perm (buildMaxHeap l) l := by + simp only [buildMaxHeap] + split + · exact List.Perm.refl _ + · exact buildMaxHeapAux_perm l l.length _ + +/-! ## HeapSort preserves length and permutation -/ + +/-- heapSortAux preserves length. -/ +theorem heapSortAux_length (l : List Nat) (heapSize fuel : Nat) : + (heapSortAux l heapSize fuel).length = l.length := by + induction fuel generalizing l heapSize with + | zero => rfl + | succ n ih => + simp only [heapSortAux] + split + · rfl + · rw [ih, heapify_length, swap_length] + +/-- `heapSort` preserves length. -/ +theorem heapSort_length (l : List Nat) : + (heapSort l).length = l.length := by + simp only [heapSort] + split + · rfl + · rw [heapSortAux_length, buildMaxHeap_length] + +/-- heapSortAux produces a permutation. -/ +theorem heapSortAux_perm (l : List Nat) (heapSize fuel : Nat) : + List.Perm (heapSortAux l heapSize fuel) l := by + induction fuel generalizing l heapSize with + | zero => exact List.Perm.refl _ + | succ n ih => + simp only [heapSortAux] + split + · exact List.Perm.refl _ + · exact (ih _ _).trans ((heapify_perm _ 0 _ _).trans (swap_perm l 0 _)) + +/-- `heapSort` produces a permutation of its input. -/ +theorem heapSort_perm (l : List Nat) : + List.Perm (heapSort l) l := by + simp only [heapSort] + split + · exact List.Perm.refl _ + · exact (heapSortAux_perm _ l.length l.length).trans (buildMaxHeap_perm l) + +end Cslib.Algorithms.Sorting.HeapSort diff --git a/Cslib/Algorithms/Sorting/InsertionSort.lean b/Cslib/Algorithms/Sorting/InsertionSort.lean new file mode 100644 index 00000000..1cee5d53 --- /dev/null +++ b/Cslib/Algorithms/Sorting/InsertionSort.lean @@ -0,0 +1,136 @@ +/- +Copyright (c) 2025 Brando Miranda. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Brando Miranda +-/ + +module + +public import Cslib.Init +public import Mathlib.Data.List.Perm.Basic + +@[expose] public section + +/-! +# Insertion Sort + +This file implements insertion sort on lists of natural numbers and proves that the result +is sorted and a permutation of the input. + +## Main definitions + +- `insert`: Insert an element into a sorted list maintaining order. +- `insertionSort`: Sort a list by repeatedly inserting elements. + +## Main results + +- `insertionSort_sorted`: The output of `insertionSort` is sorted. +- `insertionSort_perm`: The output is a permutation of the input. + +## References + +Adapted from the VeriBench benchmark (https://github.com/brandomiranda/veribench). +-/ + +set_option autoImplicit false + +namespace Cslib.Algorithms.Sorting.InsertionSort + +/-- A list is sorted in ascending order. -/ +abbrev IsSorted (l : List Nat) : Prop := List.Pairwise (· ≤ ·) l + +/-- Insert `a` into a sorted list maintaining ascending order. -/ +def insert (a : Nat) : List Nat → List Nat + | [] => [a] + | x :: xs => + if a ≤ x then + a :: x :: xs + else + x :: insert a xs + +/-- Sort a list using insertion sort. -/ +def insertionSort : List Nat → List Nat + | [] => [] + | x :: xs => insert x (insertionSort xs) + +/-! ## Tests -/ + +example : insertionSort [3, 1, 2] = [1, 2, 3] := by decide +example : insertionSort [] = ([] : List Nat) := by decide +example : insertionSort [1] = [1] := by decide +example : insertionSort [5, 2, 4, 6, 1, 3] = [1, 2, 3, 4, 5, 6] := by decide + +/-! ## Membership in insert -/ + +/-- Membership in `insert a xs` is equivalent to being `a` or being in `xs`. -/ +@[simp] +theorem mem_insert (a b : Nat) (xs : List Nat) : + b ∈ insert a xs ↔ b = a ∨ b ∈ xs := by + induction xs with + | nil => simp [insert] + | cons x rest ih => + simp only [insert] + split <;> simp_all only [List.mem_cons] + constructor + · rintro (rfl | rfl | h) <;> simp_all + · rintro (rfl | rfl | h) <;> simp_all + +/-! ## Sorted proof -/ + +/-- Inserting into a sorted list produces a sorted list. -/ +theorem insert_sorted (a : Nat) (xs : List Nat) (h : IsSorted xs) : + IsSorted (insert a xs) := by + induction xs with + | nil => exact List.Pairwise.cons (fun _ hb => nomatch hb) List.Pairwise.nil + | cons x rest ih => + simp only [insert] + cases h with + | cons h_all h_rest => + split + · rename_i h_le + exact List.Pairwise.cons (fun y hy => by + simp only [List.mem_cons] at hy + rcases hy with rfl | hy + · exact h_le + · exact Nat.le_trans h_le (h_all y hy)) (List.Pairwise.cons h_all h_rest) + · rename_i h_nle + have h_lt : x < a := Nat.lt_of_not_le h_nle + exact List.Pairwise.cons (fun y hy => by + rw [mem_insert] at hy + rcases hy with rfl | hy + · exact Nat.le_of_lt h_lt + · exact h_all y hy) (ih h_rest) + +/-- `insertionSort` produces a sorted list. -/ +theorem insertionSort_sorted (xs : List Nat) : IsSorted (insertionSort xs) := by + induction xs with + | nil => exact List.Pairwise.nil + | cons x rest ih => exact insert_sorted x (insertionSort rest) ih + +/-! ## Permutation proof -/ + +/-- Inserting `a` into `xs` produces a permutation of `a :: xs`. -/ +theorem insert_perm (a : Nat) (xs : List Nat) : + List.Perm (insert a xs) (a :: xs) := by + induction xs with + | nil => exact List.Perm.refl _ + | cons x rest ih => + simp only [insert] + split + · exact List.Perm.refl _ + · exact (List.Perm.cons x ih).trans (List.Perm.swap a x rest) + +/-- `insertionSort` produces a permutation of its input. -/ +theorem insertionSort_perm (xs : List Nat) : + List.Perm (insertionSort xs) xs := by + induction xs with + | nil => exact List.Perm.refl _ + | cons x rest ih => + exact (insert_perm x (insertionSort rest)).trans (List.Perm.cons x ih) + +/-- `insertionSort` preserves length. -/ +theorem insertionSort_length (xs : List Nat) : + (insertionSort xs).length = xs.length := + (insertionSort_perm xs).length_eq + +end Cslib.Algorithms.Sorting.InsertionSort diff --git a/Cslib/Algorithms/Sorting/QuickSort.lean b/Cslib/Algorithms/Sorting/QuickSort.lean new file mode 100644 index 00000000..6e38f808 --- /dev/null +++ b/Cslib/Algorithms/Sorting/QuickSort.lean @@ -0,0 +1,218 @@ +/- +Copyright (c) 2025 Brando Miranda. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Brando Miranda +-/ + +module + +public import Cslib.Init +public import Mathlib.Data.List.Perm.Basic + +@[expose] public section + +/-! +# Quick Sort + +This file implements quick sort on lists of natural numbers and proves that the result +is sorted, a permutation of the input, and preserves length. + +## Main definitions + +- `partition`: Split a list into elements ≤ pivot and elements > pivot. +- `quickSort`: Fuel-based quick sort. + +## Main results + +- `sort_sorted`: The output of `sort` is sorted. +- `sort_perm`: The output is a permutation of the input. +- `sort_length`: The output has the same length as the input. + +## References + +Adapted from the VeriBench benchmark (https://github.com/brandomiranda/veribench). +-/ + +set_option autoImplicit false + +namespace Cslib.Algorithms.Sorting.QuickSort + +/-- A list is sorted in ascending order. -/ +abbrev IsSorted (l : List Nat) : Prop := List.Pairwise (· ≤ ·) l + +/-- Partition a list into elements ≤ pivot and elements > pivot. -/ +def partition (pivot : Nat) : List Nat → List Nat × List Nat + | [] => ([], []) + | x :: xs => + let (lo, hi) := partition pivot xs + if x ≤ pivot then (x :: lo, hi) else (lo, x :: hi) + +/-- Fuel-based quick sort. -/ +def quickSort : List Nat → Nat → List Nat + | _, 0 => [] + | [], _ => [] + | pivot :: tail, fuel + 1 => + let (lo, hi) := partition pivot tail + quickSort lo fuel ++ [pivot] ++ quickSort hi fuel + +/-- Top-level quick sort. -/ +def sort (l : List Nat) : List Nat := quickSort l l.length + +/-! ## Tests -/ + +example : sort [3, 1, 2] = [1, 2, 3] := by decide +example : sort [] = ([] : List Nat) := by decide +example : sort [1] = [1] := by decide +example : sort [5, 2, 4, 6, 1, 3] = [1, 2, 3, 4, 5, 6] := by decide +example : sort [2, 1] = [1, 2] := by decide + +/-! ## Properties of partition -/ + +/-- The sum of partition lengths equals the input length. -/ +theorem partition_length (pivot : Nat) : + (xs : List Nat) → (partition pivot xs).1.length + (partition pivot xs).2.length = xs.length + | [] => by simp [partition] + | x :: xs => by + simp only [partition] + have ih := partition_length pivot xs + split <;> simp only [List.length_cons] <;> omega + +/-- Every element of the first component is ≤ the pivot. -/ +theorem partition_fst_le (pivot : Nat) : + (xs : List Nat) → ∀ y ∈ (partition pivot xs).1, y ≤ pivot + | [], _, hy => by simp [partition] at hy + | x :: xs, y, hy => by + simp only [partition] at hy + split at hy + · rename_i h_le + simp only [List.mem_cons] at hy + rcases hy with rfl | hy + · exact h_le + · exact partition_fst_le pivot xs y hy + · exact partition_fst_le pivot xs y hy + +/-- Every element of the second component is > the pivot. -/ +theorem partition_snd_gt (pivot : Nat) : + (xs : List Nat) → ∀ y ∈ (partition pivot xs).2, pivot < y + | [], _, hy => by simp [partition] at hy + | x :: xs, y, hy => by + simp only [partition] at hy + split at hy + · exact partition_snd_gt pivot xs y hy + · rename_i h_nle + simp only [List.mem_cons] at hy + rcases hy with rfl | hy + · exact Nat.lt_of_not_le h_nle + · exact partition_snd_gt pivot xs y hy + +/-- Partition produces a permutation of the input. -/ +theorem partition_perm (pivot : Nat) : + (xs : List Nat) → + List.Perm ((partition pivot xs).1 ++ (partition pivot xs).2) xs + | [] => List.Perm.refl _ + | x :: xs => by + simp only [partition] + have ih := partition_perm pivot xs + split + · simp only [List.cons_append] + exact (List.perm_cons x).mpr ih + · exact List.perm_middle.trans ((List.perm_cons x).mpr ih) + +/-- First component of partition is no longer than input. -/ +theorem partition_fst_length (pivot : Nat) (xs : List Nat) : + (partition pivot xs).1.length ≤ xs.length := by + have := partition_length pivot xs; omega + +/-- Second component of partition is no longer than input. -/ +theorem partition_snd_length (pivot : Nat) (xs : List Nat) : + (partition pivot xs).2.length ≤ xs.length := by + have := partition_length pivot xs; omega + +/-! ## Permutation proof -/ + +/-- `quickSort l fuel` is a permutation of `l` when fuel ≥ l.length. -/ +theorem quickSort_perm (l : List Nat) (fuel : Nat) (hfuel : fuel ≥ l.length) : + List.Perm (quickSort l fuel) l := by + induction fuel generalizing l with + | zero => + match l with + | [] => exact List.Perm.refl _ + | _ :: _ => exfalso; simp only [List.length_cons] at hfuel; omega + | succ n ih => + match l with + | [] => exact List.Perm.refl _ + | pivot :: tail => + simp only [quickSort] + have h_fuel_fst : n ≥ (partition pivot tail).1.length := by + have := partition_fst_length pivot tail + simp only [List.length_cons] at hfuel; omega + have h_fuel_snd : n ≥ (partition pivot tail).2.length := by + have := partition_snd_length pivot tail + simp only [List.length_cons] at hfuel; omega + have ih_lo := ih _ h_fuel_fst + have ih_hi := ih _ h_fuel_snd + -- lo_sorted ++ [pivot] ++ hi_sorted + -- = lo_sorted ++ (pivot :: hi_sorted) by append_assoc + singleton + -- ~ lo ++ (pivot :: hi) by append perm + -- ~ pivot :: (lo ++ hi) by perm_middle.symm + -- ~ pivot :: tail by cons partition_perm + rw [List.append_assoc] + exact (((List.Perm.append ih_lo (List.Perm.cons pivot ih_hi)).trans + List.perm_middle).trans + ((List.perm_cons pivot).mpr (partition_perm pivot tail))) + +/-- `sort` produces a permutation of its input. -/ +theorem sort_perm (l : List Nat) : List.Perm (sort l) l := + quickSort_perm l l.length (Nat.le_refl _) + +/-- `sort` preserves length. -/ +theorem sort_length (l : List Nat) : (sort l).length = l.length := + (sort_perm l).length_eq + +/-! ## Sorted proof -/ + +/-- `quickSort l fuel` is sorted when fuel ≥ l.length. -/ +theorem quickSort_sorted (l : List Nat) (fuel : Nat) (hfuel : fuel ≥ l.length) : + IsSorted (quickSort l fuel) := by + induction fuel generalizing l with + | zero => + match l with + | [] => exact List.Pairwise.nil + | _ :: _ => exfalso; simp only [List.length_cons] at hfuel; omega + | succ n ih => + match l with + | [] => exact List.Pairwise.nil + | pivot :: tail => + simp only [quickSort] + have h_fuel_fst : n ≥ (partition pivot tail).1.length := by + have := partition_fst_length pivot tail + simp only [List.length_cons] at hfuel; omega + have h_fuel_snd : n ≥ (partition pivot tail).2.length := by + have := partition_snd_length pivot tail + simp only [List.length_cons] at hfuel; omega + have h_sorted_lo := ih _ h_fuel_fst + have h_sorted_hi := ih _ h_fuel_snd + have h_perm_lo := quickSort_perm _ n h_fuel_fst + have h_perm_hi := quickSort_perm _ n h_fuel_snd + -- Goal: sorted(lo_sorted ++ [pivot] ++ hi_sorted) + rw [List.append_assoc] + apply List.pairwise_append.mpr + constructor + · exact h_sorted_lo + constructor + · exact List.Pairwise.cons + (fun b hb => Nat.le_of_lt + (partition_snd_gt pivot tail b (h_perm_hi.mem_iff.mp hb))) + h_sorted_hi + · intro a ha b hb + have ha' := partition_fst_le pivot tail a (h_perm_lo.mem_iff.mp ha) + rcases List.mem_cons.mp hb with rfl | hb + · exact ha' + · exact Nat.le_of_lt (Nat.lt_of_le_of_lt ha' + (partition_snd_gt pivot tail b (h_perm_hi.mem_iff.mp hb))) + +/-- `sort` produces a sorted list. -/ +theorem sort_sorted (l : List Nat) : IsSorted (sort l) := + quickSort_sorted l l.length (Nat.le_refl _) + +end Cslib.Algorithms.Sorting.QuickSort diff --git a/Cslib/Algorithms/Sorting/SelectionSort.lean b/Cslib/Algorithms/Sorting/SelectionSort.lean new file mode 100644 index 00000000..67fb377e --- /dev/null +++ b/Cslib/Algorithms/Sorting/SelectionSort.lean @@ -0,0 +1,188 @@ +/- +Copyright (c) 2025 Brando Miranda. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Brando Miranda +-/ + +module + +public import Cslib.Init +public import Mathlib.Data.List.Perm.Basic + +@[expose] public section + +/-! +# Selection Sort + +This file implements selection sort on lists of natural numbers and proves that the result +is sorted, a permutation of the input, and preserves length. + +## Main definitions + +- `extractMin`: Find and remove the minimum element from a non-empty list. +- `selectionSortAux`: Fuel-based selection sort helper. +- `selectionSort`: Top-level selection sort. + +## Main results + +- `selectionSort_sorted`: The output of `selectionSort` is sorted. +- `selectionSort_perm`: The output is a permutation of the input. +- `selectionSort_length`: The output has the same length as the input. + +## References + +Adapted from the VeriBench benchmark (https://github.com/brandomiranda/veribench). +-/ + +set_option autoImplicit false + +namespace Cslib.Algorithms.Sorting.SelectionSort + +/-- A list is sorted in ascending order. -/ +abbrev IsSorted (l : List Nat) : Prop := List.Pairwise (· ≤ ·) l + +/-- Extract the minimum element from a non-empty list, returning it and the remaining elements. +Returns `(0, [])` for the empty list (should not be called on empty lists). -/ +def extractMin : List Nat → Nat × List Nat + | [] => (0, []) + | [x] => (x, []) + | x :: y :: xs => + let (minTail, restTail) := extractMin (y :: xs) + if x ≤ minTail then + (x, y :: xs) + else + (minTail, x :: restTail) + +/-- Fuel-based selection sort: repeatedly extracts the minimum. -/ +def selectionSortAux (l : List Nat) : Nat → List Nat + | 0 => [] + | fuel + 1 => + match l with + | [] => [] + | _ :: _ => + let (minVal, rest) := extractMin l + minVal :: selectionSortAux rest fuel + +/-- Sort a list using selection sort. -/ +def selectionSort (l : List Nat) : List Nat := + selectionSortAux l l.length + +/-! ## Tests -/ + +example : selectionSort [3, 1, 2] = [1, 2, 3] := by decide +example : selectionSort [] = ([] : List Nat) := by decide +example : selectionSort [1] = [1] := by decide +example : selectionSort [5, 2, 4, 6, 1, 3] = [1, 2, 3, 4, 5, 6] := by decide +example : selectionSort [2, 1] = [1, 2] := by decide + +/-! ## Properties of extractMin -/ + +/-- The minimum extracted is ≤ every element of the original list. -/ +theorem extractMin_fst_le : + (l : List Nat) → l ≠ [] → ∀ y ∈ l, (extractMin l).1 ≤ y + | [], h, _, _ => absurd rfl h + | [x], _, y, hy => by simp only [extractMin, List.mem_singleton] at hy ⊢; omega + | x :: z :: zs, _, y, hy => by + simp only [extractMin] + split + · rename_i h_le + simp only [List.mem_cons] at hy + rcases hy with rfl | hy + · exact Nat.le_refl _ + · exact Nat.le_trans h_le + (extractMin_fst_le (z :: zs) (List.cons_ne_nil z zs) y (List.mem_cons.mpr hy)) + · rename_i h_nle + simp only [List.mem_cons] at hy + rcases hy with rfl | hy + · exact Nat.le_of_lt (Nat.lt_of_not_le h_nle) + · exact extractMin_fst_le (z :: zs) (List.cons_ne_nil z zs) y (List.mem_cons.mpr hy) + +/-- Extracting gives a pair whose components form a permutation of the original list. -/ +theorem extractMin_perm : + (l : List Nat) → l ≠ [] → + List.Perm ((extractMin l).1 :: (extractMin l).2) l + | [], h => absurd rfl h + | [_], _ => List.Perm.refl _ + | x :: z :: zs, _ => by + simp only [extractMin] + split + · exact List.Perm.refl _ + · have ih := extractMin_perm (z :: zs) (List.cons_ne_nil z zs) + exact (List.Perm.swap x (extractMin (z :: zs)).1 (extractMin (z :: zs)).2).trans + (List.Perm.cons x ih) + +/-- The rest after `extractMin` has one fewer element. -/ +theorem extractMin_snd_length : + (l : List Nat) → l ≠ [] → (extractMin l).2.length + 1 = l.length + | [], h => absurd rfl h + | [_], _ => by simp [extractMin] + | x :: z :: zs, _ => by + simp only [extractMin] + split + · simp + · have ih := extractMin_snd_length (z :: zs) (List.cons_ne_nil z zs) + simp only [List.length_cons] at ih ⊢ + omega + +/-! ## Permutation proof -/ + +/-- `selectionSortAux l fuel` is a permutation of `l` when fuel ≥ l.length. -/ +theorem selectionSortAux_perm (l : List Nat) (fuel : Nat) (hfuel : fuel ≥ l.length) : + List.Perm (selectionSortAux l fuel) l := by + induction fuel generalizing l with + | zero => + match l with + | [] => exact List.Perm.refl _ + | _ :: _ => exfalso; simp only [List.length_cons] at hfuel; omega + | succ n ih => + match l with + | [] => exact List.Perm.refl _ + | x :: xs => + simp only [selectionSortAux] + have h_len := extractMin_snd_length (x :: xs) (List.cons_ne_nil x xs) + have h_fuel : n ≥ (extractMin (x :: xs)).2.length := by omega + exact (List.Perm.cons _ (ih _ h_fuel)).trans + (extractMin_perm (x :: xs) (List.cons_ne_nil x xs)) + +/-- `selectionSort` produces a permutation of its input. -/ +theorem selectionSort_perm (l : List Nat) : List.Perm (selectionSort l) l := + selectionSortAux_perm l l.length (Nat.le_refl _) + +/-- `selectionSort` preserves length. -/ +theorem selectionSort_length (l : List Nat) : + (selectionSort l).length = l.length := + (selectionSort_perm l).length_eq + +/-! ## Sorted proof -/ + +/-- `selectionSortAux l fuel` is sorted when fuel ≥ l.length. -/ +theorem selectionSortAux_sorted (l : List Nat) (fuel : Nat) (hfuel : fuel ≥ l.length) : + IsSorted (selectionSortAux l fuel) := by + induction fuel generalizing l with + | zero => + match l with + | [] => exact List.Pairwise.nil + | _ :: _ => exfalso; simp only [List.length_cons] at hfuel; omega + | succ n ih => + match l with + | [] => exact List.Pairwise.nil + | x :: xs => + simp only [selectionSortAux] + have h_ne := List.cons_ne_nil x xs + have h_len := extractMin_snd_length (x :: xs) h_ne + have h_fuel : n ≥ (extractMin (x :: xs)).2.length := by omega + have h_sorted_rest := ih _ h_fuel + have h_perm_rest := selectionSortAux_perm _ n h_fuel + exact List.Pairwise.cons (fun y hy => by + have h_in_rest : y ∈ (extractMin (x :: xs)).2 := h_perm_rest.mem_iff.mp hy + have h_perm_em := extractMin_perm (x :: xs) h_ne + have h_in_orig : y ∈ x :: xs := + h_perm_em.mem_iff.mp (List.mem_cons.mpr (Or.inr h_in_rest)) + exact extractMin_fst_le (x :: xs) h_ne y h_in_orig + ) h_sorted_rest + +/-- `selectionSort` produces a sorted list. -/ +theorem selectionSort_sorted (l : List Nat) : IsSorted (selectionSort l) := + selectionSortAux_sorted l l.length (Nat.le_refl _) + +end Cslib.Algorithms.Sorting.SelectionSort