From 5fe74105163b3e4ff85f4622651e20edf52656f7 Mon Sep 17 00:00:00 2001 From: Akhilesh Balaji Date: Fri, 3 Apr 2026 16:55:56 +0530 Subject: [PATCH 01/13] feat: prove myhill-nerode equivalence classes --- .../Automata/DA/MyhillNerode.lean | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 Cslib/Computability/Automata/DA/MyhillNerode.lean diff --git a/Cslib/Computability/Automata/DA/MyhillNerode.lean b/Cslib/Computability/Automata/DA/MyhillNerode.lean new file mode 100644 index 000000000..00643db26 --- /dev/null +++ b/Cslib/Computability/Automata/DA/MyhillNerode.lean @@ -0,0 +1,116 @@ +/- +Copyright (c) 2026 Akhilesh Balaji. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Akhilesh Balaji +-/ + +module + +public import Cslib.Computability.Automata.DA.Basic +public import Cslib.Computability.Automata.DA.Congr +public import Cslib.Computability.Languages.RegularLanguage +public import Cslib.Computability.Languages.Congruences.RightCongruence +public import Mathlib.Computability.Language + +@[expose] public section + +/-! # All three subparts of the Myhill-Nerode Theorem for DFAs + +(1) `L` regular iff. `∼_L` has a finite number of equivalence classes `N`. +(2) `N` is the number of states in the minimal DFA accepting `L`. +(3) The minimal DFA is unique up to unique isomorphism. That is, for any + minimal DFA acceptor, there exists exactly one isomorphism from it to the + following one: + + > Let each equivalence class `⟦ x ⟧` correspond to a state, and let state + transitions be `a : ⟦ x ⟧ → ⟦ x a ⟧` for each `a ∈ Σ`. + Let the starting state be `⟦ ϵ ⟧`, and the accepting states be `⟦ x ⟧` where + `x ∈ L`. + +## References + +* [J. E. Hopcroft, R. Motwani, J. D. Ullman, + *Introduction to Automata Theory, Languages, and Computation*][Hopcroft2006] +-/ + +namespace Language + +variable {α : Type} {l m : Language α} + +def RightQuotient (L : Language α) (y : List α) : Language α := { x | x ++ y ∈ L } + +end Language + +namespace Cslib + +open Cslib +open scoped RightCongruence + +open Language + +namespace Automata.DA +open Acceptor + +variable {α : Type} {l m : Language α} + +def NerodeCongruence (l : Language α) : RightCongruence α where + r x y := ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l + iseqv := ⟨fun _ _ => Iff.rfl, fun h z => (h z).symm, fun h_1 h_2 z => (h_1 z).trans (h_2 z)⟩ + right_cov := ⟨fun a {x y} (h : ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l) z => + List.append_assoc x a z ▸ List.append_assoc y a z ▸ h (a ++ z)⟩ + +def NerodeCongruence.toFinAcc (l : Language α) : + DA.FinAcc (Quotient (NerodeCongruence l).eq) α := + letI c := NerodeCongruence l + { c.toDA with + accept := {q | Quotient.lift (fun x => x ∈ l) (by + intro x y hxy + simpa using hxy []) q} } + +@[simp, scoped grind =] +theorem nerodecongruence_to_finacc_acc (l : Language α) : + language (NerodeCongruence.toFinAcc l) = l := by + ext x + simp [NerodeCongruence.toFinAcc, language, Acceptor.Accepts] + exact Iff.of_eq rfl + +theorem nerodeCongruence_accepts_apply + (M : DA.FinAcc State α) (x y : List α) : + (NerodeCongruence (language M)).r x y ↔ + ∀ z, + M.mtr (M.mtr M.start x) z ∈ M.accept ↔ + M.mtr (M.mtr M.start y) z ∈ M.accept := by + simp only [FLTS.mtr, ← List.foldl_append] + rfl + +theorem IsRegular.finite_range_nerode_quotient (h : l.IsRegular) : + Finite (Quotient (NerodeCongruence l).eq) := by + rcases IsRegular.iff_dfa.mp h with ⟨State, hFin, M, hM⟩ + rw [← hM] + apply Finite.of_surjective + (fun s : State => Quotient.mk (NerodeCongruence (language M)).eq + (Classical.epsilon (fun x => M.mtr M.start x = s))) + intro q + induction q using Quotient.inductionOn with + | h x => + exact ⟨M.mtr M.start x, Quotient.sound (by + change (NerodeCongruence (language M)).r _ x + rw [nerodeCongruence_accepts_apply] + intro z + have heps : M.mtr M.start (Classical.epsilon + (fun y => M.mtr M.start y = M.mtr M.start x)) = M.mtr M.start x := + @Classical.epsilon_spec _ (fun y => M.mtr M.start y = M.mtr M.start x) ⟨x, rfl⟩ + rw [heps])⟩ + +@[simp, scoped grind =] +theorem IsRegular_iff_finite_eqv_cls_wrt_nerode {l : Language α} : + l.IsRegular ↔ Finite (Quotient (NerodeCongruence l).eq) := by + constructor + · intro h + exact IsRegular.finite_range_nerode_quotient h + · intro h + refine IsRegular.iff_dfa.mpr ⟨Quotient (NerodeCongruence l).eq, h, + NerodeCongruence.toFinAcc l, nerodecongruence_to_finacc_acc l⟩ + +end Automata.DA +end Cslib From 7675df21f60151c90095967f0853094559da3e7a Mon Sep 17 00:00:00 2001 From: Akhilesh Balaji Date: Tue, 7 Apr 2026 16:43:28 +0530 Subject: [PATCH 02/13] feat: proof of number of states of minimal DFA for a language --- .../Automata/DA/MyhillNerode.lean | 67 ++++++++++++++++--- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/Cslib/Computability/Automata/DA/MyhillNerode.lean b/Cslib/Computability/Automata/DA/MyhillNerode.lean index 00643db26..0c30b9510 100644 --- a/Cslib/Computability/Automata/DA/MyhillNerode.lean +++ b/Cslib/Computability/Automata/DA/MyhillNerode.lean @@ -11,6 +11,8 @@ public import Cslib.Computability.Automata.DA.Congr public import Cslib.Computability.Languages.RegularLanguage public import Cslib.Computability.Languages.Congruences.RightCongruence public import Mathlib.Computability.Language +public import Mathlib.Data.Fintype.Card +public import Mathlib.CategoryTheory.Iso @[expose] public section @@ -31,16 +33,10 @@ public import Mathlib.Computability.Language * [J. E. Hopcroft, R. Motwani, J. D. Ullman, *Introduction to Automata Theory, Languages, and Computation*][Hopcroft2006] +* [T. Malkin, *COMS W3261: Computer Science Theory, Handout 3: The Myhill-Nerode Theorem + and Implications*][Malkin2024] -/ -namespace Language - -variable {α : Type} {l m : Language α} - -def RightQuotient (L : Language α) (y : List α) : Language α := { x | x ++ y ∈ L } - -end Language - namespace Cslib open Cslib @@ -74,7 +70,7 @@ theorem nerodecongruence_to_finacc_acc (l : Language α) : simp [NerodeCongruence.toFinAcc, language, Acceptor.Accepts] exact Iff.of_eq rfl -theorem nerodeCongruence_accepts_apply +theorem nerodecongruence_accepts_apply (M : DA.FinAcc State α) (x y : List α) : (NerodeCongruence (language M)).r x y ↔ ∀ z, @@ -95,13 +91,14 @@ theorem IsRegular.finite_range_nerode_quotient (h : l.IsRegular) : | h x => exact ⟨M.mtr M.start x, Quotient.sound (by change (NerodeCongruence (language M)).r _ x - rw [nerodeCongruence_accepts_apply] + rw [nerodecongruence_accepts_apply] intro z have heps : M.mtr M.start (Classical.epsilon (fun y => M.mtr M.start y = M.mtr M.start x)) = M.mtr M.start x := @Classical.epsilon_spec _ (fun y => M.mtr M.start y = M.mtr M.start x) ⟨x, rfl⟩ rw [heps])⟩ +-- Myhill-Nerode (1) @[simp, scoped grind =] theorem IsRegular_iff_finite_eqv_cls_wrt_nerode {l : Language α} : l.IsRegular ↔ Finite (Quotient (NerodeCongruence l).eq) := by @@ -111,6 +108,56 @@ theorem IsRegular_iff_finite_eqv_cls_wrt_nerode {l : Language α} : · intro h refine IsRegular.iff_dfa.mpr ⟨Quotient (NerodeCongruence l).eq, h, NerodeCongruence.toFinAcc l, nerodecongruence_to_finacc_acc l⟩ +-- + +@[simp] +theorem lower_bound_num_states_dfa_acc {l : Language α} {M : DA.FinAcc States α} + {ws : Finset (List α)} (hws : ∀ x ∈ ws, ∀ y ∈ ws, x ≠ y → ¬(NerodeCongruence l).r x y) + (hM : language M = l) [Fintype States] : + Fintype.card States ≥ ws.card := by + classical + by_contra h + simp only [ge_iff_le, not_le] at h + by_cases h_card : ws.card ≤ 1 + · exact (lt_of_lt_of_le h (le_trans h_card + (Nat.succ_le_of_lt (Fintype.card_pos_iff.mpr ⟨M.start⟩)))).false + · obtain ⟨x, hx, y, hy, hne, heq⟩ := Finset.exists_ne_map_eq_of_card_lt_of_maps_to h + (fun x _ => Finset.mem_univ (M.mtr M.start x)) + rw [← hM] at hws + exact hws x hx y hy hne + ((nerodecongruence_accepts_apply M x y).mpr (fun z => heq ▸ Iff.rfl)) + +-- Myhill-Nerode (2) +@[simp] +theorem minimum_dfa_states_eq_num_eqv_clss_nerode {M : DA.FinAcc States α} + [Fintype States] [Fintype (Quotient (NerodeCongruence (language M)).eq)] : + Fintype.card States ≥ Fintype.card (Quotient (NerodeCongruence (language M)).eq) := by + classical + let ws : Finset (List α) := Finset.univ.image + (Quotient.out : Quotient (NerodeCongruence (language M)).eq → List α) + have hws : ∀ x ∈ ws, ∀ y ∈ ws, x ≠ y → ¬(NerodeCongruence (language M)).r x y := by + simp only [ws, Finset.mem_image, Finset.mem_univ, true_and] + rintro _ ⟨qx, rfl⟩ _ ⟨qy, rfl⟩ hne h + exact hne (by simpa using Quotient.sound h) + have card_hws_eq_num_eqv_clss : ws.card = Fintype.card + (Quotient (NerodeCongruence (language M)).eq) := by + simp [ws, Finset.card_image_of_injective _ Quotient.out_injective] + exact card_hws_eq_num_eqv_clss ▸ lower_bound_num_states_dfa_acc hws rfl +-- + +def IsMinimalAutomaton (M : DA.FinAcc States α) + [Fintype States] [Fintype (Quotient (NerodeCongruence (language M)).eq)] := + Fintype.card States = Fintype.card (Quotient (NerodeCongruence (language M)).eq) + +-- Myhill-Nerode (3) +/- L and two minimal DFAs M and N accepting L ~> Iso M N -/ +@[simp] +theorem unique_minimal_dfa (M : DA.FinAcc States_M α) [Fintype States_M] + [Fintype (Quotient (NerodeCongruence (language M)).eq)] (hMin : IsMinimalAutomaton M) : + ∃! φ : States_M ≃ Quotient (NerodeCongruence (language M)).eq, + ∀ x, φ (M.mtr M.start x) = ⟦x⟧ := by + sorry +-- end Automata.DA end Cslib From c5079daf53cb4fbce26282d9be47b7149a09f74a Mon Sep 17 00:00:00 2001 From: Akhilesh Balaji Date: Wed, 8 Apr 2026 16:56:15 +0530 Subject: [PATCH 03/13] feat: proof of unique minimal DFA --- .../Automata/DA/MyhillNerode.lean | 101 ++++++++++++++++-- 1 file changed, 95 insertions(+), 6 deletions(-) diff --git a/Cslib/Computability/Automata/DA/MyhillNerode.lean b/Cslib/Computability/Automata/DA/MyhillNerode.lean index 0c30b9510..351054c81 100644 --- a/Cslib/Computability/Automata/DA/MyhillNerode.lean +++ b/Cslib/Computability/Automata/DA/MyhillNerode.lean @@ -49,12 +49,19 @@ open Acceptor variable {α : Type} {l m : Language α} +/-- The Nerode congruence of a language `l` is a right congruence on strings where two +strings are related iff. all their right extensions are either both in the language +or both not in it. -/ def NerodeCongruence (l : Language α) : RightCongruence α where r x y := ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l iseqv := ⟨fun _ _ => Iff.rfl, fun h z => (h z).symm, fun h_1 h_2 z => (h_1 z).trans (h_2 z)⟩ right_cov := ⟨fun a {x y} (h : ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l) z => List.append_assoc x a z ▸ List.append_assoc y a z ▸ h (a ++ z)⟩ +/-- The Nerode congruence of a language `l` gives rise to a DFA where each state corresponds to an +equivalence class of the language under the Nerode congruence. Note that this is simply the DFA +given rise to by the underlying right congruence with only the accept states specified here as +`{⟦ x ⟧ | x ∈ l}`. -/ def NerodeCongruence.toFinAcc (l : Language α) : DA.FinAcc (Quotient (NerodeCongruence l).eq) α := letI c := NerodeCongruence l @@ -63,6 +70,7 @@ def NerodeCongruence.toFinAcc (l : Language α) : intro x y hxy simpa using hxy []) q} } +/-- The DFA constructed from the Nerode congruence on `l` accepts `l`. -/ @[simp, scoped grind =] theorem nerodecongruence_to_finacc_acc (l : Language α) : language (NerodeCongruence.toFinAcc l) = l := by @@ -70,6 +78,9 @@ theorem nerodecongruence_to_finacc_acc (l : Language α) : simp [NerodeCongruence.toFinAcc, language, Acceptor.Accepts] exact Iff.of_eq rfl +/-- The statement that two strings are related by the Nerode congruence `c` iff. all their right +extensions are either both in the language or both not in it is equivalent to saying that all their +right extensions are either both accepted or rejected by the DFA given rise to by `c`. -/ theorem nerodecongruence_accepts_apply (M : DA.FinAcc State α) (x y : List α) : (NerodeCongruence (language M)).r x y ↔ @@ -79,6 +90,7 @@ theorem nerodecongruence_accepts_apply simp only [FLTS.mtr, ← List.foldl_append] rfl +/-- If `l` is regular, then `l/c` is finite. -/ theorem IsRegular.finite_range_nerode_quotient (h : l.IsRegular) : Finite (Quotient (NerodeCongruence l).eq) := by rcases IsRegular.iff_dfa.mp h with ⟨State, hFin, M, hM⟩ @@ -99,6 +111,8 @@ theorem IsRegular.finite_range_nerode_quotient (h : l.IsRegular) : rw [heps])⟩ -- Myhill-Nerode (1) + +/-- `l` is regular if and only if `l/c` is finite. -/ @[simp, scoped grind =] theorem IsRegular_iff_finite_eqv_cls_wrt_nerode {l : Language α} : l.IsRegular ↔ Finite (Quotient (NerodeCongruence l).eq) := by @@ -110,6 +124,8 @@ theorem IsRegular_iff_finite_eqv_cls_wrt_nerode {l : Language α} : NerodeCongruence.toFinAcc l, nerodecongruence_to_finacc_acc l⟩ -- +/-- Given a set of strings all distinguishable by `l` (i.e., not related to each other by `c`), +the number of states in the DFA accepting `l` is at least the number of strings in the set. -/ @[simp] theorem lower_bound_num_states_dfa_acc {l : Language α} {M : DA.FinAcc States α} {ws : Finset (List α)} (hws : ∀ x ∈ ws, ∀ y ∈ ws, x ≠ y → ¬(NerodeCongruence l).r x y) @@ -128,6 +144,9 @@ theorem lower_bound_num_states_dfa_acc {l : Language α} {M : DA.FinAcc States ((nerodecongruence_accepts_apply M x y).mpr (fun z => heq ▸ Iff.rfl)) -- Myhill-Nerode (2) + +/-- All DFAs accepting `l` must have at least as many states as the number of equivalence classes +of `l` under `c` (i.e., `|l/c|`). -/ @[simp] theorem minimum_dfa_states_eq_num_eqv_clss_nerode {M : DA.FinAcc States α} [Fintype States] [Fintype (Quotient (NerodeCongruence (language M)).eq)] : @@ -145,18 +164,88 @@ theorem minimum_dfa_states_eq_num_eqv_clss_nerode {M : DA.FinAcc States α} exact card_hws_eq_num_eqv_clss ▸ lower_bound_num_states_dfa_acc hws rfl -- +/-- The minimal DFA accepting `l` has `|l/c|` states. -/ def IsMinimalAutomaton (M : DA.FinAcc States α) [Fintype States] [Fintype (Quotient (NerodeCongruence (language M)).eq)] := Fintype.card States = Fintype.card (Quotient (NerodeCongruence (language M)).eq) +/-- Given a DFA `M`, two strings are related iff. they reach the same state under when run through +`M`. The Nerode congruence is the state congruence wrt. the minimal DFA accepting `l`. -/ +def StateCongruence (M : DA.FinAcc States α) : RightCongruence α where + r x y := ∀ z, M.mtr M.start (x ++ z) = M.mtr M.start (y ++ z) + iseqv := ⟨by intro x z; rfl, by intro x y h z; symm; exact h z, + by intro x y z h_1 h_2 w; exact (h_1 w).trans (h_2 w)⟩ + right_cov := ⟨by + intro a x y h z + simpa [List.append_assoc, FLTS.mtr_concat_eq] using h (a ++ z)⟩ + +/-- The Nerode congruence is the most coarse state congruence given a language. -/ +@[simp] +theorem statecongruence_refines_nerodecongruence {M : DA.FinAcc States α} : + ∀ x y, (StateCongruence M).r x y → (NerodeCongruence (language M)).r x y := by + intro x y h z + constructor + · intro hx + have := h z + simpa [language, Acceptor.Accepts, FLTS.mtr_concat_eq] using + congrArg (fun s => s ∈ M.accept) this ▸ hx + · intro hy + have := h z + simpa [language, Acceptor.Accepts, FLTS.mtr_concat_eq] using + congrArg (fun s => s ∈ M.accept) this ▸ hy + +/-- Every equivalence class of `l` under a Nerode congruence is a union of one or more equivalence +classes from the state congruence of a DFA accepting `l`. -/ +@[simp] +lemma nerodecongruence_eqv_cls_eq_union_statecongruence_eqv_clss + {M : DA.FinAcc States α} (Q : Quotient (NerodeCongruence (language M)).eq) : + {x : List α | Quotient.mk (NerodeCongruence (language M)).eq x = Q} = + ⋃ (R : Quotient (StateCongruence M).eq) + (_ : Quotient.mk (NerodeCongruence (language M)).eq (Quotient.out R) = Q), + {x | Quotient.mk (StateCongruence M).eq x = R} := by + let NC := NerodeCongruence (language M); let SC := StateCongruence M + ext x; simp only [Set.mem_setOf_eq, Set.mem_iUnion] + constructor + · intro hx + exact ⟨Quotient.mk SC.eq x, + (Quotient.sound (statecongruence_refines_nerodecongruence _ _ + (Quotient.eq.mp (Quotient.out_eq (Quotient.mk SC.eq x))))).trans hx, + rfl⟩ + · intro ⟨R, hRQ, hxR⟩ + exact (Quotient.out_eq Q) ▸ Quotient.sound (NC.iseqv.trans + (statecongruence_refines_nerodecongruence _ _ + (Quotient.eq.mp (hxR.trans (Quotient.out_eq R).symm))) + (Quotient.eq.mp (hRQ.trans (Quotient.out_eq Q).symm))) + -- Myhill-Nerode (3) -/- L and two minimal DFAs M and N accepting L ~> Iso M N -/ + +/-- The minimal DFA `M` accepting `l` is unique up to unique isomorphism. -/ @[simp] -theorem unique_minimal_dfa (M : DA.FinAcc States_M α) [Fintype States_M] - [Fintype (Quotient (NerodeCongruence (language M)).eq)] (hMin : IsMinimalAutomaton M) : - ∃! φ : States_M ≃ Quotient (NerodeCongruence (language M)).eq, - ∀ x, φ (M.mtr M.start x) = ⟦x⟧ := by - sorry +theorem unique_minimal_dfa (M : DA.FinAcc States α) [Fintype States] + [Fintype (Quotient (NerodeCongruence (language M)).eq)] (hMin : IsMinimalAutomaton M) : + ∃! φ : States ≃ Quotient (NerodeCongruence (language M)).eq, + ∀ x, φ (M.mtr M.start x) = ⟦ x ⟧ := by + haveI : Finite States := @Fintype.finite States ‹Fintype States› + let φ : States → Quotient (NerodeCongruence (language M)).eq := + fun s => ⟦Classical.epsilon (fun x : List α => M.mtr M.start x = s)⟧ + have hφ : ∀ x, φ (M.mtr M.start x) = ⟦x⟧ := fun x => by + apply Quotient.sound + apply statecongruence_refines_nerodecongruence + intro z + have := @Classical.epsilon_spec _ (fun y : List α => M.mtr M.start y = M.mtr M.start x) ⟨x, rfl⟩ + simp only [FLTS.mtr, List.foldl_append] at this ⊢; rw [this] + have hφ_surj : Function.Surjective φ := fun q => + q.inductionOn (fun x => ⟨M.mtr M.start x, hφ x⟩) + have hφ_inj : Function.Injective φ := + hφ_surj.injective_of_finite (Fintype.equivOfCardEq hMin) + let φ_equiv := Equiv.ofBijective φ ⟨hφ_inj, hφ_surj⟩ + refine ⟨φ_equiv, hφ, fun ψ hψ => ?_⟩ + ext s + obtain ⟨x, rfl⟩ : ∃ x, M.mtr M.start x = s := by + induction h : φ s using Quotient.inductionOn with + | h x => exact ⟨x, hφ_inj ((hφ x).trans h.symm)⟩ + simp [φ_equiv, Equiv.ofBijective, hφ, hψ] + -- end Automata.DA From 4b7178b95200c9831c7e17494ff31a4726557fce Mon Sep 17 00:00:00 2001 From: Akhilesh Balaji Date: Wed, 8 Apr 2026 18:54:48 +0530 Subject: [PATCH 04/13] =?UTF-8?q?refactor:=20use=20`=E2=9F=A6=E2=9F=A7`=20?= =?UTF-8?q?instead=20of=20`Quotient.mk`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Automata/DA/MyhillNerode.lean | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Cslib/Computability/Automata/DA/MyhillNerode.lean b/Cslib/Computability/Automata/DA/MyhillNerode.lean index 351054c81..96fe3670b 100644 --- a/Cslib/Computability/Automata/DA/MyhillNerode.lean +++ b/Cslib/Computability/Automata/DA/MyhillNerode.lean @@ -96,8 +96,8 @@ theorem IsRegular.finite_range_nerode_quotient (h : l.IsRegular) : rcases IsRegular.iff_dfa.mp h with ⟨State, hFin, M, hM⟩ rw [← hM] apply Finite.of_surjective - (fun s : State => Quotient.mk (NerodeCongruence (language M)).eq - (Classical.epsilon (fun x => M.mtr M.start x = s))) + (fun s : State => + ⟦ Classical.epsilon (fun x => M.mtr M.start x = s) ⟧) intro q induction q using Quotient.inductionOn with | h x => @@ -197,19 +197,19 @@ theorem statecongruence_refines_nerodecongruence {M : DA.FinAcc States α} : /-- Every equivalence class of `l` under a Nerode congruence is a union of one or more equivalence classes from the state congruence of a DFA accepting `l`. -/ @[simp] -lemma nerodecongruence_eqv_cls_eq_union_statecongruence_eqv_clss +theorem nerodecongruence_eqv_cls_eq_union_statecongruence_eqv_clss {M : DA.FinAcc States α} (Q : Quotient (NerodeCongruence (language M)).eq) : - {x : List α | Quotient.mk (NerodeCongruence (language M)).eq x = Q} = + {x : List α | (⟦ x ⟧ : Quotient (NerodeCongruence (language M)).eq) = Q} = ⋃ (R : Quotient (StateCongruence M).eq) - (_ : Quotient.mk (NerodeCongruence (language M)).eq (Quotient.out R) = Q), - {x | Quotient.mk (StateCongruence M).eq x = R} := by + (_ : (⟦ Quotient.out R ⟧ : Quotient (NerodeCongruence (language M)).eq) = Q), + {x | (⟦ x ⟧ : Quotient (StateCongruence M).eq) = R} := by let NC := NerodeCongruence (language M); let SC := StateCongruence M ext x; simp only [Set.mem_setOf_eq, Set.mem_iUnion] constructor · intro hx - exact ⟨Quotient.mk SC.eq x, + exact ⟨(⟦ x ⟧ : Quotient SC.eq), (Quotient.sound (statecongruence_refines_nerodecongruence _ _ - (Quotient.eq.mp (Quotient.out_eq (Quotient.mk SC.eq x))))).trans hx, + (Quotient.eq.mp (Quotient.out_eq (⟦ x ⟧ : Quotient SC.eq))))).trans hx, rfl⟩ · intro ⟨R, hRQ, hxR⟩ exact (Quotient.out_eq Q) ▸ Quotient.sound (NC.iseqv.trans @@ -227,8 +227,8 @@ theorem unique_minimal_dfa (M : DA.FinAcc States α) [Fintype States] ∀ x, φ (M.mtr M.start x) = ⟦ x ⟧ := by haveI : Finite States := @Fintype.finite States ‹Fintype States› let φ : States → Quotient (NerodeCongruence (language M)).eq := - fun s => ⟦Classical.epsilon (fun x : List α => M.mtr M.start x = s)⟧ - have hφ : ∀ x, φ (M.mtr M.start x) = ⟦x⟧ := fun x => by + fun s => ⟦ Classical.epsilon (fun x : List α => M.mtr M.start x = s) ⟧ + have hφ : ∀ x, φ (M.mtr M.start x) = ⟦ x ⟧ := fun x => by apply Quotient.sound apply statecongruence_refines_nerodecongruence intro z From 59e6aaae0246dd9c0a5e91e8b86f7f1f7b776bcd Mon Sep 17 00:00:00 2001 From: Akhilesh Balaji Date: Mon, 13 Apr 2026 16:29:48 +0530 Subject: [PATCH 05/13] refactor: changed the namespace of MyhillNerode and made stylistic changes --- Cslib.lean | 1 + .../Automata/DA/MyhillNerode.lean | 252 ----------------- .../Computability/Languages/MyhillNerode.lean | 255 ++++++++++++++++++ references.bib | 18 ++ 4 files changed, 274 insertions(+), 252 deletions(-) delete mode 100644 Cslib/Computability/Automata/DA/MyhillNerode.lean create mode 100644 Cslib/Computability/Languages/MyhillNerode.lean diff --git a/Cslib.lean b/Cslib.lean index 3e5977af2..397c16ccd 100644 --- a/Cslib.lean +++ b/Cslib.lean @@ -26,6 +26,7 @@ public import Cslib.Computability.Languages.Congruences.BuchiCongruence public import Cslib.Computability.Languages.Congruences.RightCongruence public import Cslib.Computability.Languages.ExampleEventuallyZero public import Cslib.Computability.Languages.Language +public import Cslib.Computability.Languages.MyhillNerode public import Cslib.Computability.Languages.OmegaLanguage public import Cslib.Computability.Languages.OmegaRegularLanguage public import Cslib.Computability.Languages.RegularLanguage diff --git a/Cslib/Computability/Automata/DA/MyhillNerode.lean b/Cslib/Computability/Automata/DA/MyhillNerode.lean deleted file mode 100644 index 96fe3670b..000000000 --- a/Cslib/Computability/Automata/DA/MyhillNerode.lean +++ /dev/null @@ -1,252 +0,0 @@ -/- -Copyright (c) 2026 Akhilesh Balaji. All rights reserved. -Released under Apache 2.0 license as described in the file LICENSE. -Authors: Akhilesh Balaji --/ - -module - -public import Cslib.Computability.Automata.DA.Basic -public import Cslib.Computability.Automata.DA.Congr -public import Cslib.Computability.Languages.RegularLanguage -public import Cslib.Computability.Languages.Congruences.RightCongruence -public import Mathlib.Computability.Language -public import Mathlib.Data.Fintype.Card -public import Mathlib.CategoryTheory.Iso - -@[expose] public section - -/-! # All three subparts of the Myhill-Nerode Theorem for DFAs - -(1) `L` regular iff. `∼_L` has a finite number of equivalence classes `N`. -(2) `N` is the number of states in the minimal DFA accepting `L`. -(3) The minimal DFA is unique up to unique isomorphism. That is, for any - minimal DFA acceptor, there exists exactly one isomorphism from it to the - following one: - - > Let each equivalence class `⟦ x ⟧` correspond to a state, and let state - transitions be `a : ⟦ x ⟧ → ⟦ x a ⟧` for each `a ∈ Σ`. - Let the starting state be `⟦ ϵ ⟧`, and the accepting states be `⟦ x ⟧` where - `x ∈ L`. - -## References - -* [J. E. Hopcroft, R. Motwani, J. D. Ullman, - *Introduction to Automata Theory, Languages, and Computation*][Hopcroft2006] -* [T. Malkin, *COMS W3261: Computer Science Theory, Handout 3: The Myhill-Nerode Theorem - and Implications*][Malkin2024] --/ - -namespace Cslib - -open Cslib -open scoped RightCongruence - -open Language - -namespace Automata.DA -open Acceptor - -variable {α : Type} {l m : Language α} - -/-- The Nerode congruence of a language `l` is a right congruence on strings where two -strings are related iff. all their right extensions are either both in the language -or both not in it. -/ -def NerodeCongruence (l : Language α) : RightCongruence α where - r x y := ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l - iseqv := ⟨fun _ _ => Iff.rfl, fun h z => (h z).symm, fun h_1 h_2 z => (h_1 z).trans (h_2 z)⟩ - right_cov := ⟨fun a {x y} (h : ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l) z => - List.append_assoc x a z ▸ List.append_assoc y a z ▸ h (a ++ z)⟩ - -/-- The Nerode congruence of a language `l` gives rise to a DFA where each state corresponds to an -equivalence class of the language under the Nerode congruence. Note that this is simply the DFA -given rise to by the underlying right congruence with only the accept states specified here as -`{⟦ x ⟧ | x ∈ l}`. -/ -def NerodeCongruence.toFinAcc (l : Language α) : - DA.FinAcc (Quotient (NerodeCongruence l).eq) α := - letI c := NerodeCongruence l - { c.toDA with - accept := {q | Quotient.lift (fun x => x ∈ l) (by - intro x y hxy - simpa using hxy []) q} } - -/-- The DFA constructed from the Nerode congruence on `l` accepts `l`. -/ -@[simp, scoped grind =] -theorem nerodecongruence_to_finacc_acc (l : Language α) : - language (NerodeCongruence.toFinAcc l) = l := by - ext x - simp [NerodeCongruence.toFinAcc, language, Acceptor.Accepts] - exact Iff.of_eq rfl - -/-- The statement that two strings are related by the Nerode congruence `c` iff. all their right -extensions are either both in the language or both not in it is equivalent to saying that all their -right extensions are either both accepted or rejected by the DFA given rise to by `c`. -/ -theorem nerodecongruence_accepts_apply - (M : DA.FinAcc State α) (x y : List α) : - (NerodeCongruence (language M)).r x y ↔ - ∀ z, - M.mtr (M.mtr M.start x) z ∈ M.accept ↔ - M.mtr (M.mtr M.start y) z ∈ M.accept := by - simp only [FLTS.mtr, ← List.foldl_append] - rfl - -/-- If `l` is regular, then `l/c` is finite. -/ -theorem IsRegular.finite_range_nerode_quotient (h : l.IsRegular) : - Finite (Quotient (NerodeCongruence l).eq) := by - rcases IsRegular.iff_dfa.mp h with ⟨State, hFin, M, hM⟩ - rw [← hM] - apply Finite.of_surjective - (fun s : State => - ⟦ Classical.epsilon (fun x => M.mtr M.start x = s) ⟧) - intro q - induction q using Quotient.inductionOn with - | h x => - exact ⟨M.mtr M.start x, Quotient.sound (by - change (NerodeCongruence (language M)).r _ x - rw [nerodecongruence_accepts_apply] - intro z - have heps : M.mtr M.start (Classical.epsilon - (fun y => M.mtr M.start y = M.mtr M.start x)) = M.mtr M.start x := - @Classical.epsilon_spec _ (fun y => M.mtr M.start y = M.mtr M.start x) ⟨x, rfl⟩ - rw [heps])⟩ - --- Myhill-Nerode (1) - -/-- `l` is regular if and only if `l/c` is finite. -/ -@[simp, scoped grind =] -theorem IsRegular_iff_finite_eqv_cls_wrt_nerode {l : Language α} : - l.IsRegular ↔ Finite (Quotient (NerodeCongruence l).eq) := by - constructor - · intro h - exact IsRegular.finite_range_nerode_quotient h - · intro h - refine IsRegular.iff_dfa.mpr ⟨Quotient (NerodeCongruence l).eq, h, - NerodeCongruence.toFinAcc l, nerodecongruence_to_finacc_acc l⟩ --- - -/-- Given a set of strings all distinguishable by `l` (i.e., not related to each other by `c`), -the number of states in the DFA accepting `l` is at least the number of strings in the set. -/ -@[simp] -theorem lower_bound_num_states_dfa_acc {l : Language α} {M : DA.FinAcc States α} - {ws : Finset (List α)} (hws : ∀ x ∈ ws, ∀ y ∈ ws, x ≠ y → ¬(NerodeCongruence l).r x y) - (hM : language M = l) [Fintype States] : - Fintype.card States ≥ ws.card := by - classical - by_contra h - simp only [ge_iff_le, not_le] at h - by_cases h_card : ws.card ≤ 1 - · exact (lt_of_lt_of_le h (le_trans h_card - (Nat.succ_le_of_lt (Fintype.card_pos_iff.mpr ⟨M.start⟩)))).false - · obtain ⟨x, hx, y, hy, hne, heq⟩ := Finset.exists_ne_map_eq_of_card_lt_of_maps_to h - (fun x _ => Finset.mem_univ (M.mtr M.start x)) - rw [← hM] at hws - exact hws x hx y hy hne - ((nerodecongruence_accepts_apply M x y).mpr (fun z => heq ▸ Iff.rfl)) - --- Myhill-Nerode (2) - -/-- All DFAs accepting `l` must have at least as many states as the number of equivalence classes -of `l` under `c` (i.e., `|l/c|`). -/ -@[simp] -theorem minimum_dfa_states_eq_num_eqv_clss_nerode {M : DA.FinAcc States α} - [Fintype States] [Fintype (Quotient (NerodeCongruence (language M)).eq)] : - Fintype.card States ≥ Fintype.card (Quotient (NerodeCongruence (language M)).eq) := by - classical - let ws : Finset (List α) := Finset.univ.image - (Quotient.out : Quotient (NerodeCongruence (language M)).eq → List α) - have hws : ∀ x ∈ ws, ∀ y ∈ ws, x ≠ y → ¬(NerodeCongruence (language M)).r x y := by - simp only [ws, Finset.mem_image, Finset.mem_univ, true_and] - rintro _ ⟨qx, rfl⟩ _ ⟨qy, rfl⟩ hne h - exact hne (by simpa using Quotient.sound h) - have card_hws_eq_num_eqv_clss : ws.card = Fintype.card - (Quotient (NerodeCongruence (language M)).eq) := by - simp [ws, Finset.card_image_of_injective _ Quotient.out_injective] - exact card_hws_eq_num_eqv_clss ▸ lower_bound_num_states_dfa_acc hws rfl --- - -/-- The minimal DFA accepting `l` has `|l/c|` states. -/ -def IsMinimalAutomaton (M : DA.FinAcc States α) - [Fintype States] [Fintype (Quotient (NerodeCongruence (language M)).eq)] := - Fintype.card States = Fintype.card (Quotient (NerodeCongruence (language M)).eq) - -/-- Given a DFA `M`, two strings are related iff. they reach the same state under when run through -`M`. The Nerode congruence is the state congruence wrt. the minimal DFA accepting `l`. -/ -def StateCongruence (M : DA.FinAcc States α) : RightCongruence α where - r x y := ∀ z, M.mtr M.start (x ++ z) = M.mtr M.start (y ++ z) - iseqv := ⟨by intro x z; rfl, by intro x y h z; symm; exact h z, - by intro x y z h_1 h_2 w; exact (h_1 w).trans (h_2 w)⟩ - right_cov := ⟨by - intro a x y h z - simpa [List.append_assoc, FLTS.mtr_concat_eq] using h (a ++ z)⟩ - -/-- The Nerode congruence is the most coarse state congruence given a language. -/ -@[simp] -theorem statecongruence_refines_nerodecongruence {M : DA.FinAcc States α} : - ∀ x y, (StateCongruence M).r x y → (NerodeCongruence (language M)).r x y := by - intro x y h z - constructor - · intro hx - have := h z - simpa [language, Acceptor.Accepts, FLTS.mtr_concat_eq] using - congrArg (fun s => s ∈ M.accept) this ▸ hx - · intro hy - have := h z - simpa [language, Acceptor.Accepts, FLTS.mtr_concat_eq] using - congrArg (fun s => s ∈ M.accept) this ▸ hy - -/-- Every equivalence class of `l` under a Nerode congruence is a union of one or more equivalence -classes from the state congruence of a DFA accepting `l`. -/ -@[simp] -theorem nerodecongruence_eqv_cls_eq_union_statecongruence_eqv_clss - {M : DA.FinAcc States α} (Q : Quotient (NerodeCongruence (language M)).eq) : - {x : List α | (⟦ x ⟧ : Quotient (NerodeCongruence (language M)).eq) = Q} = - ⋃ (R : Quotient (StateCongruence M).eq) - (_ : (⟦ Quotient.out R ⟧ : Quotient (NerodeCongruence (language M)).eq) = Q), - {x | (⟦ x ⟧ : Quotient (StateCongruence M).eq) = R} := by - let NC := NerodeCongruence (language M); let SC := StateCongruence M - ext x; simp only [Set.mem_setOf_eq, Set.mem_iUnion] - constructor - · intro hx - exact ⟨(⟦ x ⟧ : Quotient SC.eq), - (Quotient.sound (statecongruence_refines_nerodecongruence _ _ - (Quotient.eq.mp (Quotient.out_eq (⟦ x ⟧ : Quotient SC.eq))))).trans hx, - rfl⟩ - · intro ⟨R, hRQ, hxR⟩ - exact (Quotient.out_eq Q) ▸ Quotient.sound (NC.iseqv.trans - (statecongruence_refines_nerodecongruence _ _ - (Quotient.eq.mp (hxR.trans (Quotient.out_eq R).symm))) - (Quotient.eq.mp (hRQ.trans (Quotient.out_eq Q).symm))) - --- Myhill-Nerode (3) - -/-- The minimal DFA `M` accepting `l` is unique up to unique isomorphism. -/ -@[simp] -theorem unique_minimal_dfa (M : DA.FinAcc States α) [Fintype States] - [Fintype (Quotient (NerodeCongruence (language M)).eq)] (hMin : IsMinimalAutomaton M) : - ∃! φ : States ≃ Quotient (NerodeCongruence (language M)).eq, - ∀ x, φ (M.mtr M.start x) = ⟦ x ⟧ := by - haveI : Finite States := @Fintype.finite States ‹Fintype States› - let φ : States → Quotient (NerodeCongruence (language M)).eq := - fun s => ⟦ Classical.epsilon (fun x : List α => M.mtr M.start x = s) ⟧ - have hφ : ∀ x, φ (M.mtr M.start x) = ⟦ x ⟧ := fun x => by - apply Quotient.sound - apply statecongruence_refines_nerodecongruence - intro z - have := @Classical.epsilon_spec _ (fun y : List α => M.mtr M.start y = M.mtr M.start x) ⟨x, rfl⟩ - simp only [FLTS.mtr, List.foldl_append] at this ⊢; rw [this] - have hφ_surj : Function.Surjective φ := fun q => - q.inductionOn (fun x => ⟨M.mtr M.start x, hφ x⟩) - have hφ_inj : Function.Injective φ := - hφ_surj.injective_of_finite (Fintype.equivOfCardEq hMin) - let φ_equiv := Equiv.ofBijective φ ⟨hφ_inj, hφ_surj⟩ - refine ⟨φ_equiv, hφ, fun ψ hψ => ?_⟩ - ext s - obtain ⟨x, rfl⟩ : ∃ x, M.mtr M.start x = s := by - induction h : φ s using Quotient.inductionOn with - | h x => exact ⟨x, hφ_inj ((hφ x).trans h.symm)⟩ - simp [φ_equiv, Equiv.ofBijective, hφ, hψ] - --- - -end Automata.DA -end Cslib diff --git a/Cslib/Computability/Languages/MyhillNerode.lean b/Cslib/Computability/Languages/MyhillNerode.lean new file mode 100644 index 000000000..49e23d141 --- /dev/null +++ b/Cslib/Computability/Languages/MyhillNerode.lean @@ -0,0 +1,255 @@ +/- +Copyright (c) 2026 Akhilesh Balaji. All rights reserved. +Released under Apache 2.0 license as described in the file LICENSE. +Authors: Akhilesh Balaji +-/ + +module + +public import Cslib.Computability.Languages.RegularLanguage + +@[expose] public section + +/-! # All three subparts of the Myhill-Nerode Theorem for DFAs + +(1) `L` regular iff. `∼_L` has a finite number of equivalence classes `N`. +(2) `N` is the number of states in the minimal DFA accepting `L`. +(3) The minimal DFA is unique up to unique isomorphism. That is, for any + minimal DFA acceptor, there exists exactly one isomorphism from it to the + following one: + + > Let each equivalence class `⟦ x ⟧` correspond to a state, and let state + transitions be `a : ⟦ x ⟧ → ⟦ x a ⟧` for each `a ∈ Σ`. + Let the starting state be `⟦ ϵ ⟧`, and the accepting states be `⟦ x ⟧` where + `x ∈ L`. + +## References + +* [J. E. Hopcroft, R. Motwani, J. D. Ullman, + *Introduction to Automata Theory, Languages, and Computation*][Hopcroft2006] +* [T. Malkin, *COMS W3261: Computer Science Theory, Handout 3: The Myhill-Nerode Theorem + and Implications*][Malkin2024] +* [Wikipedia contributors, Myhill–Nerode theorem][WikipediaMyhillNerode2026] +-/ + +namespace Language +open Cslib +open scoped RightCongruence +open Language +open Automata Automata.DA Automata.DA.FinAcc Acceptor + +variable {α : Type*} {l : Language α} + +/-- The Nerode congruence of a language `l` is a right congruence on strings where two +strings are related iff. all their right extensions are either both in the language +or both not in it. -/ +instance NerodeCongruence (l : Language α) : RightCongruence α where + r x y := ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l + iseqv.refl := fun _ _ => Iff.rfl + iseqv.symm := fun h z => (h z).symm + iseqv.trans := fun h_1 h_2 z => (h_1 z).trans (h_2 z) + right_cov := ⟨fun a {x y} (h : ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l) z => + List.append_assoc x a z ▸ List.append_assoc y a z ▸ h (a ++ z)⟩ + +/-- The Nerode congruence of a language `l` gives rise to a DFA where each state corresponds to an +equivalence class of the language under the Nerode congruence. Note that this is simply the DFA +given rise to by the underlying right congruence with only the accept states specified here as +`{⟦ x ⟧ | x ∈ l}`. -/ +def NerodeCongruence.toFinAcc (l : Language α) : + DA.FinAcc (Quotient (l.NerodeCongruence).eq) α := + { (l.NerodeCongruence).toDA with accept := (⟦·⟧) '' l } + +/-- The DFA constructed from the Nerode congruence on `l` accepts `l`. -/ +@[simp, scoped grind =] +theorem nerodeCongruence_to_finacc_acc (l : Language α) : + language (NerodeCongruence.toFinAcc l) = l := by + ext x + simp only [NerodeCongruence.toFinAcc, language, Acceptor.Accepts, congr_mtr_eq, Set.mem_image] + constructor + · rintro ⟨y, hy, heq⟩ + simpa using (Quotient.eq.mp heq []).mp (by simpa using hy) + · exact fun hx => ⟨x, hx, rfl⟩ + +/-- The statement that (two strings are related by the Nerode congruence `c_l` iff. all their right +extensions are either both in the language or both not in it) is equivalent to stating that (all +their right extensions are either both accepted or rejected by the DFA given rise to by `c_l`. -/ +theorem nerodeCongruence_accepts_apply + (M : DA.FinAcc State α) (x y : List α) : + ((language M).NerodeCongruence).r x y ↔ + ∀ z, + M.mtr (M.mtr M.start x) z ∈ M.accept ↔ + M.mtr (M.mtr M.start y) z ∈ M.accept := by + simp only [FLTS.mtr, ← List.foldl_append] + rfl + +/-- If `l` is regular, then `α*/c_l` is finite. -/ +theorem IsRegular.finite_range_nerode_quotient (h : l.IsRegular) : + Finite (Quotient (l.NerodeCongruence).eq) := by + rcases IsRegular.iff_dfa.mp h with ⟨State, hFin, M, hM⟩ + rw [← hM] + apply Finite.of_surjective + (fun s : State => + ⟦ Classical.epsilon (fun x => M.mtr M.start x = s) ⟧) + intro q + induction q using Quotient.inductionOn with + | h x => + exact ⟨M.mtr M.start x, Quotient.sound (by + change ((language M).NerodeCongruence).r _ x + rw [nerodeCongruence_accepts_apply] + intro z + have heps : M.mtr M.start (Classical.epsilon + (fun y => M.mtr M.start y = M.mtr M.start x)) = M.mtr M.start x := + @Classical.epsilon_spec _ (fun y => M.mtr M.start y = M.mtr M.start x) ⟨x, rfl⟩ + rw [heps])⟩ + +-- Myhill-Nerode (1) + +/-- `l` is regular if and only if `α*/c_l` is finite. -/ +@[simp, scoped grind =] +theorem IsRegular_iff_finite_eqv_cls_wrt_nerode {l : Language α} : + l.IsRegular ↔ Finite (Quotient (l.NerodeCongruence).eq) := by + constructor + · intro h + exact IsRegular.finite_range_nerode_quotient h + · intro h + letI : Fintype (Quotient (l.NerodeCongruence).eq) := Fintype.ofFinite _ + set e := Fintype.equivFin (Quotient (l.NerodeCongruence).eq) + set M := NerodeCongruence.toFinAcc l + refine IsRegular.iff_dfa.mpr ⟨Fin _, inferInstance, + { tr := fun s x => e (M.tr (e.symm s) x) + start := e M.start + accept := e '' M.accept }, ?_⟩ + have hfoldl : ∀ s x, List.foldl (fun s x => e (M.tr (e.symm s) x)) (e s) x = + e (List.foldl M.tr s x) := by + intro s x + induction x generalizing s with + | nil => simp + | cons a as ih => simp only [List.foldl, e.symm_apply_apply, ih] + ext x + change List.foldl (fun s x => e (M.tr (e.symm s) x)) (e M.start) x ∈ e '' M.accept ↔ x ∈ l + simp only [hfoldl, Set.mem_image_equiv, Equiv.symm_apply_apply, + ← nerodeCongruence_to_finacc_acc l, language, Acceptor.Accepts, FLTS.mtr] + exact Iff.of_eq rfl +-- + +/-- Given a set of strings all distinguishable by `l` (i.e., not related to each other by `c_l`), +the number of states in the DFA accepting `l` is at least the number of strings in the set. -/ +@[simp] +theorem lower_bound_num_states_dfa_acc [Finite States] {l : Language α} + {M : DA.FinAcc States α} {ws : Set (List α)} [Finite ws] + (hws : ws.Pairwise (fun x y => ¬(l.NerodeCongruence).r x y)) + (hM : language M = l) : + Nat.card States ≥ Nat.card ws := by + classical + letI : Fintype States := Fintype.ofFinite _ + letI : Fintype ws := Fintype.ofFinite _ + rw [Nat.card_eq_fintype_card, Nat.card_eq_fintype_card] + by_contra h + simp only [ge_iff_le, not_le] at h + by_cases h_card : Fintype.card ws ≤ 1 + · exact (lt_of_lt_of_le h (le_trans h_card + (Nat.succ_le_of_lt (Fintype.card_pos_iff.mpr ⟨M.start⟩)))).false + · obtain ⟨⟨x, hx⟩, ⟨y, hy⟩, hne, heq⟩ := + Fintype.exists_ne_map_eq_of_card_lt (f := fun w : ws => M.mtr M.start w) (by omega) + rw [← hM] at hws + exact hws hx hy (fun h => hne (Subtype.ext h)) + ((nerodeCongruence_accepts_apply M x y).mpr (fun z => heq ▸ Iff.rfl)) + +-- Myhill-Nerode (2) + +/-- All DFAs accepting `l` must have at least as many states as the number of equivalence classes +of `α*` under the Nerode congruence `c_l` induced by `l` (i.e., `|α*/c_l|`). -/ +@[simp] +theorem minimum_dfa_states_eq_num_eqv_clss_nerode {M : DA.FinAcc States α} + [Finite States] [Finite (Quotient ((language M).NerodeCongruence).eq)] : + Nat.card States ≥ Nat.card (Quotient ((language M).NerodeCongruence).eq) := by + classical + let ws : Set (List α) := Set.range + (Quotient.out : Quotient ((language M).NerodeCongruence).eq → List α) + haveI : Finite ws := Set.finite_range _ |>.to_subtype + have hws : ws.Pairwise (fun x y => ¬((language M).NerodeCongruence).r x y) := by + rintro _ ⟨qx, rfl⟩ _ ⟨qy, rfl⟩ hne h + exact hne (by simpa using Quotient.sound h) + have card_hws_eq : Nat.card ws = Nat.card (Quotient ((language M).NerodeCongruence).eq) := + Nat.card_congr (Equiv.ofInjective _ Quotient.out_injective).symm + exact card_hws_eq ▸ lower_bound_num_states_dfa_acc hws rfl +-- + +end Language + +namespace Cslib +namespace Automata.DA + +open Cslib +open scoped RightCongruence +open Language +open Automata Automata.DA Automata.DA.FinAcc Acceptor + +/-- The minimal DFA accepting `l` has `|α*/c_l|` states. -/ +def FinAcc.IsMinimalAutomaton [Finite States] (M : DA.FinAcc States α) (l : Language α) := + language M = l ∧ Nat.card States = Nat.card (Quotient (l.NerodeCongruence).eq) + +/-- Given a DFA `M`, two strings are related iff. they reach the same state under when run through +`M`. The Nerode congruence is the state congruence wrt. the minimal DFA accepting `l`. -/ +instance StateCongruence (M : DA.FinAcc States α) : RightCongruence α where + r x y := ∀ z, M.mtr M.start (x ++ z) = M.mtr M.start (y ++ z) + iseqv := ⟨by intro x z; rfl, by intro x y h z; symm; exact h z, + by intro x y z h_1 h_2 w; exact (h_1 w).trans (h_2 w)⟩ + right_cov := ⟨by + intro a x y h z + simpa [List.append_assoc, FLTS.mtr_concat_eq] using h (a ++ z)⟩ + +/-- The Nerode congruence is the most coarse state congruence given a language. -/ +@[simp] +theorem stateCongruence_refines_nerodeCongruence {M : DA.FinAcc States α} : + ∀ x y, (StateCongruence M).r x y → ((language M).NerodeCongruence ).r x y := by + intro x y h z + constructor + · intro hx + have := h z + simpa [language, Acceptor.Accepts, FLTS.mtr_concat_eq] using + congrArg (fun s => s ∈ M.accept) this ▸ hx + · intro hy + have := h z + simpa [language, Acceptor.Accepts, FLTS.mtr_concat_eq] using + congrArg (fun s => s ∈ M.accept) this ▸ hy + +-- Myhill-Nerode (3) + +/-- The minimal DFA `M` accepting `l` is unique up to unique isomorphism. -/ +@[simp] +theorem unique_minimal_dfa (M : DA.FinAcc States α) [Finite States] + (l : Language α) (hReg : l.IsRegular) (hMin : M.IsMinimalAutomaton l) : + ∃! φ : States ≃ Quotient (l.NerodeCongruence).eq, + ∀ x, φ (M.mtr M.start x) = ⟦ x ⟧ := by + obtain ⟨hML, hCard⟩ := hMin + haveI : Finite (Quotient (l.NerodeCongruence).eq) := + Language.IsRegular_iff_finite_eqv_cls_wrt_nerode.mp hReg + letI : Fintype States := Fintype.ofFinite _ + letI : Fintype (Quotient (l.NerodeCongruence).eq) := Fintype.ofFinite _ + subst hML + let φ : States → Quotient ((language M).NerodeCongruence).eq := + fun s => ⟦ Classical.epsilon (fun x : List α => M.mtr M.start x = s) ⟧ + have hφ : ∀ x, φ (M.mtr M.start x) = ⟦ x ⟧ := fun x => by + apply Quotient.sound + apply stateCongruence_refines_nerodeCongruence + intro z + have := @Classical.epsilon_spec _ (fun y : List α => M.mtr M.start y = M.mtr M.start x) ⟨x, rfl⟩ + simp only [FLTS.mtr, List.foldl_append] at this ⊢; rw [this] + have hφ_surj : Function.Surjective φ := fun q => + q.inductionOn (fun x => ⟨M.mtr M.start x, hφ x⟩) + have hφ_inj : Function.Injective φ := + hφ_surj.injective_of_finite (Fintype.equivOfCardEq (by + rwa [← Nat.card_eq_fintype_card, ← Nat.card_eq_fintype_card])) + let φ_equiv := Equiv.ofBijective φ ⟨hφ_inj, hφ_surj⟩ + refine ⟨φ_equiv, hφ, fun ψ hψ => ?_⟩ + ext s + obtain ⟨x, rfl⟩ : ∃ x, M.mtr M.start x = s := by + induction h : φ s using Quotient.inductionOn with + | h x => exact ⟨x, hφ_inj ((hφ x).trans h.symm)⟩ + simp [φ_equiv, Equiv.ofBijective, hφ, hψ] +-- + +end Automata.DA +end Cslib + diff --git a/references.bib b/references.bib index 2cccb928f..2677d4e7d 100644 --- a/references.bib +++ b/references.bib @@ -61,6 +61,24 @@ @book{ Hopcroft2006 address = {USA} } +@misc{ Malkin2024, + author = {Tal Malkin}, + title = {{COMS W3261: Computer Science Theory, Handout 3: The Myhill-Nerode Theorem and Implications}}, + howpublished = {Columbia University Course Handout}, + year = {2024}, + month = {September}, + url = {https://www.cs.columbia.edu/~tal/3261/fall24/Handouts/3_Myhill_Nerode.pdf} +} + +@misc{ WikipediaMyhillNerode2026, + author = {{Wikipedia contributors}}, + title = {{Myhill--Nerode theorem}}, + howpublished = {Wikipedia, The Free Encyclopedia}, + year = {2026}, + url = {https://en.wikipedia.org/wiki/Myhill%E2%80%93Nerode_theorem}, + note = {[Online; accessed 9-April-2026]} +} + @article{ Chargueraud2012, title = {The {Locally} {Nameless} {Representation}}, volume = {49}, From 44c5bb9458f696bae82c41bbce4e03cf3f8892f8 Mon Sep 17 00:00:00 2001 From: Ching-Tsun Chou Date: Mon, 13 Apr 2026 12:52:50 -0700 Subject: [PATCH 06/13] Some golfing by ctchou --- .../Computability/Languages/MyhillNerode.lean | 114 +++++++++--------- 1 file changed, 54 insertions(+), 60 deletions(-) diff --git a/Cslib/Computability/Languages/MyhillNerode.lean b/Cslib/Computability/Languages/MyhillNerode.lean index 49e23d141..509e8115a 100644 --- a/Cslib/Computability/Languages/MyhillNerode.lean +++ b/Cslib/Computability/Languages/MyhillNerode.lean @@ -32,16 +32,15 @@ public import Cslib.Computability.Languages.RegularLanguage * [Wikipedia contributors, Myhill–Nerode theorem][WikipediaMyhillNerode2026] -/ +variable {α State : Type*} + namespace Language -open Cslib -open scoped RightCongruence -open Language -open Automata Automata.DA Automata.DA.FinAcc Acceptor -variable {α : Type*} {l : Language α} +open Cslib Language Automata DA FinAcc Acceptor +open scoped RightCongruence /-- The Nerode congruence of a language `l` is a right congruence on strings where two -strings are related iff. all their right extensions are either both in the language +strings are related iff all their right extensions are either both in the language or both not in it. -/ instance NerodeCongruence (l : Language α) : RightCongruence α where r x y := ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l @@ -51,20 +50,24 @@ instance NerodeCongruence (l : Language α) : RightCongruence α where right_cov := ⟨fun a {x y} (h : ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l) z => List.append_assoc x a z ▸ List.append_assoc y a z ▸ h (a ++ z)⟩ +/-- The quotient type of a Nerode congruence. -/ +abbrev NerodeQuotient (l : Language α) := Quotient (l.NerodeCongruence).eq + /-- The Nerode congruence of a language `l` gives rise to a DFA where each state corresponds to an equivalence class of the language under the Nerode congruence. Note that this is simply the DFA given rise to by the underlying right congruence with only the accept states specified here as `{⟦ x ⟧ | x ∈ l}`. -/ -def NerodeCongruence.toFinAcc (l : Language α) : - DA.FinAcc (Quotient (l.NerodeCongruence).eq) α := +def NerodeCongruenceDA (l : Language α) : DA.FinAcc (l.NerodeQuotient) α := { (l.NerodeCongruence).toDA with accept := (⟦·⟧) '' l } +variable {l : Language α} + /-- The DFA constructed from the Nerode congruence on `l` accepts `l`. -/ @[simp, scoped grind =] -theorem nerodeCongruence_to_finacc_acc (l : Language α) : - language (NerodeCongruence.toFinAcc l) = l := by +theorem nerodeCongruenceDA_language_eq (l : Language α) : + language (l.NerodeCongruenceDA) = l := by ext x - simp only [NerodeCongruence.toFinAcc, language, Acceptor.Accepts, congr_mtr_eq, Set.mem_image] + simp only [NerodeCongruenceDA, language, Acceptor.Accepts, congr_mtr_eq, Set.mem_image] constructor · rintro ⟨y, hy, heq⟩ simpa using (Quotient.eq.mp heq []).mp (by simpa using hy) @@ -73,18 +76,15 @@ theorem nerodeCongruence_to_finacc_acc (l : Language α) : /-- The statement that (two strings are related by the Nerode congruence `c_l` iff. all their right extensions are either both in the language or both not in it) is equivalent to stating that (all their right extensions are either both accepted or rejected by the DFA given rise to by `c_l`. -/ -theorem nerodeCongruence_accepts_apply - (M : DA.FinAcc State α) (x y : List α) : +theorem da_nerodeCongruence_iff {State : Type*} (M : DA.FinAcc State α) (x y : List α) : ((language M).NerodeCongruence).r x y ↔ - ∀ z, - M.mtr (M.mtr M.start x) z ∈ M.accept ↔ - M.mtr (M.mtr M.start y) z ∈ M.accept := by + ∀ z, M.mtr (M.mtr M.start x) z ∈ M.accept ↔ M.mtr (M.mtr M.start y) z ∈ M.accept := by simp only [FLTS.mtr, ← List.foldl_append] rfl /-- If `l` is regular, then `α*/c_l` is finite. -/ -theorem IsRegular.finite_range_nerode_quotient (h : l.IsRegular) : - Finite (Quotient (l.NerodeCongruence).eq) := by +theorem IsRegular.finite_nerodeQuotient (h : l.IsRegular) : + Finite (l.NerodeQuotient) := by rcases IsRegular.iff_dfa.mp h with ⟨State, hFin, M, hM⟩ rw [← hM] apply Finite.of_surjective @@ -95,7 +95,7 @@ theorem IsRegular.finite_range_nerode_quotient (h : l.IsRegular) : | h x => exact ⟨M.mtr M.start x, Quotient.sound (by change ((language M).NerodeCongruence).r _ x - rw [nerodeCongruence_accepts_apply] + rw [da_nerodeCongruence_iff] intro z have heps : M.mtr M.start (Classical.epsilon (fun y => M.mtr M.start y = M.mtr M.start x)) = M.mtr M.start x := @@ -106,15 +106,15 @@ theorem IsRegular.finite_range_nerode_quotient (h : l.IsRegular) : /-- `l` is regular if and only if `α*/c_l` is finite. -/ @[simp, scoped grind =] -theorem IsRegular_iff_finite_eqv_cls_wrt_nerode {l : Language α} : - l.IsRegular ↔ Finite (Quotient (l.NerodeCongruence).eq) := by +theorem IsRegular.iff_finite_nerodeQuotient {l : Language α} : + l.IsRegular ↔ Finite (l.NerodeQuotient) := by constructor · intro h - exact IsRegular.finite_range_nerode_quotient h + exact IsRegular.finite_nerodeQuotient h · intro h - letI : Fintype (Quotient (l.NerodeCongruence).eq) := Fintype.ofFinite _ - set e := Fintype.equivFin (Quotient (l.NerodeCongruence).eq) - set M := NerodeCongruence.toFinAcc l + letI : Fintype (l.NerodeQuotient) := Fintype.ofFinite _ + set e := Fintype.equivFin (l.NerodeQuotient) + set M := l.NerodeCongruenceDA refine IsRegular.iff_dfa.mpr ⟨Fin _, inferInstance, { tr := fun s x => e (M.tr (e.symm s) x) start := e M.start @@ -128,20 +128,18 @@ theorem IsRegular_iff_finite_eqv_cls_wrt_nerode {l : Language α} : ext x change List.foldl (fun s x => e (M.tr (e.symm s) x)) (e M.start) x ∈ e '' M.accept ↔ x ∈ l simp only [hfoldl, Set.mem_image_equiv, Equiv.symm_apply_apply, - ← nerodeCongruence_to_finacc_acc l, language, Acceptor.Accepts, FLTS.mtr] + ← nerodeCongruenceDA_language_eq l, language, Acceptor.Accepts, FLTS.mtr] exact Iff.of_eq rfl --- /-- Given a set of strings all distinguishable by `l` (i.e., not related to each other by `c_l`), the number of states in the DFA accepting `l` is at least the number of strings in the set. -/ @[simp] -theorem lower_bound_num_states_dfa_acc [Finite States] {l : Language α} - {M : DA.FinAcc States α} {ws : Set (List α)} [Finite ws] - (hws : ws.Pairwise (fun x y => ¬(l.NerodeCongruence).r x y)) - (hM : language M = l) : - Nat.card States ≥ Nat.card ws := by +theorem dfa_num_state_ge {State : Type*} [Finite State] {l : Language α} + {M : DA.FinAcc State α} {ws : Set (List α)} [Finite ws] + (hws : ws.Pairwise (¬ (l.NerodeCongruence).r · ·)) (hM : language M = l) : + Nat.card State ≥ Nat.card ws := by classical - letI : Fintype States := Fintype.ofFinite _ + letI : Fintype State := Fintype.ofFinite _ letI : Fintype ws := Fintype.ofFinite _ rw [Nat.card_eq_fintype_card, Nat.card_eq_fintype_card] by_contra h @@ -153,16 +151,16 @@ theorem lower_bound_num_states_dfa_acc [Finite States] {l : Language α} Fintype.exists_ne_map_eq_of_card_lt (f := fun w : ws => M.mtr M.start w) (by omega) rw [← hM] at hws exact hws hx hy (fun h => hne (Subtype.ext h)) - ((nerodeCongruence_accepts_apply M x y).mpr (fun z => heq ▸ Iff.rfl)) + ((da_nerodeCongruence_iff M x y).mpr (fun z => heq ▸ Iff.rfl)) -- Myhill-Nerode (2) /-- All DFAs accepting `l` must have at least as many states as the number of equivalence classes of `α*` under the Nerode congruence `c_l` induced by `l` (i.e., `|α*/c_l|`). -/ @[simp] -theorem minimum_dfa_states_eq_num_eqv_clss_nerode {M : DA.FinAcc States α} - [Finite States] [Finite (Quotient ((language M).NerodeCongruence).eq)] : - Nat.card States ≥ Nat.card (Quotient ((language M).NerodeCongruence).eq) := by +theorem dfa_num_state_min {State : Type*} {M : DA.FinAcc State α} + [Finite State] [Finite (language M).NerodeQuotient] : + Nat.card State ≥ Nat.card (language M).NerodeQuotient := by classical let ws : Set (List α) := Set.range (Quotient.out : Quotient ((language M).NerodeCongruence).eq → List α) @@ -172,26 +170,23 @@ theorem minimum_dfa_states_eq_num_eqv_clss_nerode {M : DA.FinAcc States α} exact hne (by simpa using Quotient.sound h) have card_hws_eq : Nat.card ws = Nat.card (Quotient ((language M).NerodeCongruence).eq) := Nat.card_congr (Equiv.ofInjective _ Quotient.out_injective).symm - exact card_hws_eq ▸ lower_bound_num_states_dfa_acc hws rfl + exact card_hws_eq ▸ dfa_num_state_ge hws rfl -- end Language -namespace Cslib -namespace Automata.DA +namespace Cslib.Automata.DA.FinAcc -open Cslib +open Cslib Language Automata DA FinAcc Acceptor open scoped RightCongruence -open Language -open Automata Automata.DA Automata.DA.FinAcc Acceptor /-- The minimal DFA accepting `l` has `|α*/c_l|` states. -/ -def FinAcc.IsMinimalAutomaton [Finite States] (M : DA.FinAcc States α) (l : Language α) := - language M = l ∧ Nat.card States = Nat.card (Quotient (l.NerodeCongruence).eq) +def IsMinimalAutomaton [Finite State] (M : FinAcc State α) (l : Language α) := + language M = l ∧ Nat.card State = Nat.card (l.NerodeQuotient) /-- Given a DFA `M`, two strings are related iff. they reach the same state under when run through `M`. The Nerode congruence is the state congruence wrt. the minimal DFA accepting `l`. -/ -instance StateCongruence (M : DA.FinAcc States α) : RightCongruence α where +instance StateCongruence (M : FinAcc State α) : RightCongruence α where r x y := ∀ z, M.mtr M.start (x ++ z) = M.mtr M.start (y ++ z) iseqv := ⟨by intro x z; rfl, by intro x y h z; symm; exact h z, by intro x y z h_1 h_2 w; exact (h_1 w).trans (h_2 w)⟩ @@ -199,10 +194,12 @@ instance StateCongruence (M : DA.FinAcc States α) : RightCongruence α where intro a x y h z simpa [List.append_assoc, FLTS.mtr_concat_eq] using h (a ++ z)⟩ +variable {M : FinAcc State α} + /-- The Nerode congruence is the most coarse state congruence given a language. -/ @[simp] -theorem stateCongruence_refines_nerodeCongruence {M : DA.FinAcc States α} : - ∀ x y, (StateCongruence M).r x y → ((language M).NerodeCongruence ).r x y := by +theorem stateCongruence_le_nerodeCongruence : + ∀ x y, (StateCongruence M).r x y → ((language M).NerodeCongruence).r x y := by intro x y h z constructor · intro hx @@ -218,21 +215,21 @@ theorem stateCongruence_refines_nerodeCongruence {M : DA.FinAcc States α} : /-- The minimal DFA `M` accepting `l` is unique up to unique isomorphism. -/ @[simp] -theorem unique_minimal_dfa (M : DA.FinAcc States α) [Finite States] +theorem unique_minimal [Finite State] (l : Language α) (hReg : l.IsRegular) (hMin : M.IsMinimalAutomaton l) : - ∃! φ : States ≃ Quotient (l.NerodeCongruence).eq, + ∃! φ : State ≃ l.NerodeQuotient, ∀ x, φ (M.mtr M.start x) = ⟦ x ⟧ := by obtain ⟨hML, hCard⟩ := hMin - haveI : Finite (Quotient (l.NerodeCongruence).eq) := - Language.IsRegular_iff_finite_eqv_cls_wrt_nerode.mp hReg - letI : Fintype States := Fintype.ofFinite _ - letI : Fintype (Quotient (l.NerodeCongruence).eq) := Fintype.ofFinite _ + haveI : Finite (l.NerodeQuotient) := + Language.IsRegular.iff_finite_nerodeQuotient.mp hReg + letI : Fintype State := Fintype.ofFinite _ + letI : Fintype (l.NerodeQuotient) := Fintype.ofFinite _ subst hML - let φ : States → Quotient ((language M).NerodeCongruence).eq := + let φ : State → Quotient ((language M).NerodeCongruence).eq := fun s => ⟦ Classical.epsilon (fun x : List α => M.mtr M.start x = s) ⟧ have hφ : ∀ x, φ (M.mtr M.start x) = ⟦ x ⟧ := fun x => by apply Quotient.sound - apply stateCongruence_refines_nerodeCongruence + apply stateCongruence_le_nerodeCongruence intro z have := @Classical.epsilon_spec _ (fun y : List α => M.mtr M.start y = M.mtr M.start x) ⟨x, rfl⟩ simp only [FLTS.mtr, List.foldl_append] at this ⊢; rw [this] @@ -248,8 +245,5 @@ theorem unique_minimal_dfa (M : DA.FinAcc States α) [Finite States] induction h : φ s using Quotient.inductionOn with | h x => exact ⟨x, hφ_inj ((hφ x).trans h.symm)⟩ simp [φ_equiv, Equiv.ofBijective, hφ, hψ] --- - -end Automata.DA -end Cslib +end Cslib.Automata.DA.FinAcc From 7aff3ea0d1b35377c79559a6fd7e75c395c91d8c Mon Sep 17 00:00:00 2001 From: Akhilesh Balaji Date: Wed, 15 Apr 2026 11:35:50 +0530 Subject: [PATCH 07/13] chore: made minor stylistic changes in arguments and added definitions to docstrings --- .../Computability/Languages/MyhillNerode.lean | 44 ++++++++++--------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/Cslib/Computability/Languages/MyhillNerode.lean b/Cslib/Computability/Languages/MyhillNerode.lean index 509e8115a..59da6924c 100644 --- a/Cslib/Computability/Languages/MyhillNerode.lean +++ b/Cslib/Computability/Languages/MyhillNerode.lean @@ -10,9 +10,11 @@ public import Cslib.Computability.Languages.RegularLanguage @[expose] public section -/-! # All three subparts of the Myhill-Nerode Theorem for DFAs +/-! # All three parts of the Myhill-Nerode Theorem for DFAs listed at [WikipediaMyhillNerode2026]. -(1) `L` regular iff. `∼_L` has a finite number of equivalence classes `N`. +`~_L` is the Nerode congruence on a language `L`. + +(1) `L` regular iff `∼_L` has a finite number of equivalence classes `N`. (2) `N` is the number of states in the minimal DFA accepting `L`. (3) The minimal DFA is unique up to unique isomorphism. That is, for any minimal DFA acceptor, there exists exactly one isomorphism from it to the @@ -62,7 +64,7 @@ def NerodeCongruenceDA (l : Language α) : DA.FinAcc (l.NerodeQuotient) α := variable {l : Language α} -/-- The DFA constructed from the Nerode congruence on `l` accepts `l`. -/ +/-- The DFA constructed from the Nerode congruence `c_l` on `l` accepts `l`. -/ @[simp, scoped grind =] theorem nerodeCongruenceDA_language_eq (l : Language α) : language (l.NerodeCongruenceDA) = l := by @@ -73,16 +75,16 @@ theorem nerodeCongruenceDA_language_eq (l : Language α) : simpa using (Quotient.eq.mp heq []).mp (by simpa using hy) · exact fun hx => ⟨x, hx, rfl⟩ -/-- The statement that (two strings are related by the Nerode congruence `c_l` iff. all their right +/-- The statement (two strings are related by the Nerode congruence `c_l` on `l` iff all their right extensions are either both in the language or both not in it) is equivalent to stating that (all -their right extensions are either both accepted or rejected by the DFA given rise to by `c_l`. -/ +their right extensions are either both accepted or rejected by the DFA given rise to by `c_l`.) -/ theorem da_nerodeCongruence_iff {State : Type*} (M : DA.FinAcc State α) (x y : List α) : ((language M).NerodeCongruence).r x y ↔ ∀ z, M.mtr (M.mtr M.start x) z ∈ M.accept ↔ M.mtr (M.mtr M.start y) z ∈ M.accept := by simp only [FLTS.mtr, ← List.foldl_append] rfl -/-- If `l` is regular, then `α*/c_l` is finite. -/ +/-- If `l` is regular then the Nerode congruence on `l` has finitely many equivalence classes. -/ theorem IsRegular.finite_nerodeQuotient (h : l.IsRegular) : Finite (l.NerodeQuotient) := by rcases IsRegular.iff_dfa.mp h with ⟨State, hFin, M, hM⟩ @@ -104,7 +106,7 @@ theorem IsRegular.finite_nerodeQuotient (h : l.IsRegular) : -- Myhill-Nerode (1) -/-- `l` is regular if and only if `α*/c_l` is finite. -/ +/-- `l` is regular iff the Nerode congruence on `l` has finitely many equivalence classes. -/ @[simp, scoped grind =] theorem IsRegular.iff_finite_nerodeQuotient {l : Language α} : l.IsRegular ↔ Finite (l.NerodeQuotient) := by @@ -131,14 +133,14 @@ theorem IsRegular.iff_finite_nerodeQuotient {l : Language α} : ← nerodeCongruenceDA_language_eq l, language, Acceptor.Accepts, FLTS.mtr] exact Iff.of_eq rfl -/-- Given a set of strings all distinguishable by `l` (i.e., not related to each other by `c_l`), -the number of states in the DFA accepting `l` is at least the number of strings in the set. -/ +/-- Given a set of strings all distinguishable by `l` (i.e., not related to each other by the Nerode +congruence on `l`), the number of states in the DFA accepting `l` is at least the number of strings +in the set. -/ @[simp] theorem dfa_num_state_ge {State : Type*} [Finite State] {l : Language α} {M : DA.FinAcc State α} {ws : Set (List α)} [Finite ws] (hws : ws.Pairwise (¬ (l.NerodeCongruence).r · ·)) (hM : language M = l) : Nat.card State ≥ Nat.card ws := by - classical letI : Fintype State := Fintype.ofFinite _ letI : Fintype ws := Fintype.ofFinite _ rw [Nat.card_eq_fintype_card, Nat.card_eq_fintype_card] @@ -156,14 +158,14 @@ theorem dfa_num_state_ge {State : Type*} [Finite State] {l : Language α} -- Myhill-Nerode (2) /-- All DFAs accepting `l` must have at least as many states as the number of equivalence classes -of `α*` under the Nerode congruence `c_l` induced by `l` (i.e., `|α*/c_l|`). -/ +of the Nerode congruence on `l`. -/ @[simp] -theorem dfa_num_state_min {State : Type*} {M : DA.FinAcc State α} - [Finite State] [Finite (language M).NerodeQuotient] : +theorem dfa_num_state_min {State : Type} {M : DA.FinAcc State α} [Finite State] : Nat.card State ≥ Nat.card (language M).NerodeQuotient := by - classical let ws : Set (List α) := Set.range (Quotient.out : Quotient ((language M).NerodeCongruence).eq → List α) + haveI : Finite (language M).NerodeQuotient := + IsRegular.iff_finite_nerodeQuotient.mp (IsRegular.iff_dfa.mpr ⟨State, inferInstance, M, rfl⟩) haveI : Finite ws := Set.finite_range _ |>.to_subtype have hws : ws.Pairwise (fun x y => ¬((language M).NerodeCongruence).r x y) := by rintro _ ⟨qx, rfl⟩ _ ⟨qy, rfl⟩ hne h @@ -180,12 +182,13 @@ namespace Cslib.Automata.DA.FinAcc open Cslib Language Automata DA FinAcc Acceptor open scoped RightCongruence -/-- The minimal DFA accepting `l` has `|α*/c_l|` states. -/ -def IsMinimalAutomaton [Finite State] (M : FinAcc State α) (l : Language α) := +/-- The minimal DFA accepting `l` has the same number of states as the number of equivalence classes +of the Nerode congruence on `l`. -/ +def IsMinimalAutomaton (M : FinAcc State α) (l : Language α) := language M = l ∧ Nat.card State = Nat.card (l.NerodeQuotient) -/-- Given a DFA `M`, two strings are related iff. they reach the same state under when run through -`M`. The Nerode congruence is the state congruence wrt. the minimal DFA accepting `l`. -/ +/-- Given a DFA `M`, two strings are related iff they reach the same state under when run through +`M`. The Nerode congruence is the state congruence with respect to the minimal DFA accepting `l`. -/ instance StateCongruence (M : FinAcc State α) : RightCongruence α where r x y := ∀ z, M.mtr M.start (x ++ z) = M.mtr M.start (y ++ z) iseqv := ⟨by intro x z; rfl, by intro x y h z; symm; exact h z, @@ -213,12 +216,11 @@ theorem stateCongruence_le_nerodeCongruence : -- Myhill-Nerode (3) -/-- The minimal DFA `M` accepting `l` is unique up to unique isomorphism. -/ +/-- The minimal DFA `M` accepting the language `l` is unique up to unique isomorphism. -/ @[simp] theorem unique_minimal [Finite State] (l : Language α) (hReg : l.IsRegular) (hMin : M.IsMinimalAutomaton l) : - ∃! φ : State ≃ l.NerodeQuotient, - ∀ x, φ (M.mtr M.start x) = ⟦ x ⟧ := by + ∃! φ : State ≃ l.NerodeQuotient, ∀ x, φ (M.mtr M.start x) = ⟦ x ⟧ := by obtain ⟨hML, hCard⟩ := hMin haveI : Finite (l.NerodeQuotient) := Language.IsRegular.iff_finite_nerodeQuotient.mp hReg From 4aa324b9141a6ebeb96a9900d537c2c90939ccce Mon Sep 17 00:00:00 2001 From: Akhilesh Balaji Date: Thu, 16 Apr 2026 11:14:11 +0530 Subject: [PATCH 08/13] chore: remove redundant lines and remove ambiguity in congruence definitions and docstrings --- .../Computability/Languages/MyhillNerode.lean | 40 ++++++++++--------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/Cslib/Computability/Languages/MyhillNerode.lean b/Cslib/Computability/Languages/MyhillNerode.lean index 59da6924c..477dc6304 100644 --- a/Cslib/Computability/Languages/MyhillNerode.lean +++ b/Cslib/Computability/Languages/MyhillNerode.lean @@ -12,18 +12,22 @@ public import Cslib.Computability.Languages.RegularLanguage /-! # All three parts of the Myhill-Nerode Theorem for DFAs listed at [WikipediaMyhillNerode2026]. -`~_L` is the Nerode congruence on a language `L`. +Let `l`` be a language on an alphabet `α`. `c_l` is the Nerode congruence on a language `L`. -(1) `L` regular iff `∼_L` has a finite number of equivalence classes `N`. -(2) `N` is the number of states in the minimal DFA accepting `L`. +The Nerode congruence (henceforth called `c_l`) of a language `l` is a right congruence on +strings where two strings are related iff all their right extensions are either both in the language +or both not in it. + +(1) `l` is regular iff `c_l` has a finite number of equivalence classes `N`. +(2) `N` is the number of states in the minimal DFA accepting `l`. (3) The minimal DFA is unique up to unique isomorphism. That is, for any minimal DFA acceptor, there exists exactly one isomorphism from it to the following one: > Let each equivalence class `⟦ x ⟧` correspond to a state, and let state - transitions be `a : ⟦ x ⟧ → ⟦ x a ⟧` for each `a ∈ Σ`. + transitions be `a : ⟦ x ⟧ → ⟦ x a ⟧` for each `a ∈ α`. Let the starting state be `⟦ ϵ ⟧`, and the accepting states be `⟦ x ⟧` where - `x ∈ L`. + `x ∈ l`. ## References @@ -41,16 +45,16 @@ namespace Language open Cslib Language Automata DA FinAcc Acceptor open scoped RightCongruence -/-- The Nerode congruence of a language `l` is a right congruence on strings where two -strings are related iff all their right extensions are either both in the language +/-- The Nerode congruence (henceforth called `c_l`) of a language `l` is a right congruence on +strings where two strings are related iff all their right extensions are either both in the language or both not in it. -/ instance NerodeCongruence (l : Language α) : RightCongruence α where r x y := ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l iseqv.refl := fun _ _ => Iff.rfl iseqv.symm := fun h z => (h z).symm iseqv.trans := fun h_1 h_2 z => (h_1 z).trans (h_2 z) - right_cov := ⟨fun a {x y} (h : ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l) z => - List.append_assoc x a z ▸ List.append_assoc y a z ▸ h (a ++ z)⟩ + right_cov.elim := fun a {x y} (h : ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l) z => + List.append_assoc x a z ▸ List.append_assoc y a z ▸ h (a ++ z) /-- The quotient type of a Nerode congruence. -/ abbrev NerodeQuotient (l : Language α) := Quotient (l.NerodeCongruence).eq @@ -80,7 +84,7 @@ extensions are either both in the language or both not in it) is equivalent to s their right extensions are either both accepted or rejected by the DFA given rise to by `c_l`.) -/ theorem da_nerodeCongruence_iff {State : Type*} (M : DA.FinAcc State α) (x y : List α) : ((language M).NerodeCongruence).r x y ↔ - ∀ z, M.mtr (M.mtr M.start x) z ∈ M.accept ↔ M.mtr (M.mtr M.start y) z ∈ M.accept := by + ∀ z, M.mtr (M.mtr M.start x) z ∈ M.accept ↔ M.mtr (M.mtr M.start y) z ∈ M.accept := by simp only [FLTS.mtr, ← List.foldl_append] rfl @@ -170,9 +174,8 @@ theorem dfa_num_state_min {State : Type} {M : DA.FinAcc State α} [Finite State] have hws : ws.Pairwise (fun x y => ¬((language M).NerodeCongruence).r x y) := by rintro _ ⟨qx, rfl⟩ _ ⟨qy, rfl⟩ hne h exact hne (by simpa using Quotient.sound h) - have card_hws_eq : Nat.card ws = Nat.card (Quotient ((language M).NerodeCongruence).eq) := - Nat.card_congr (Equiv.ofInjective _ Quotient.out_injective).symm - exact card_hws_eq ▸ dfa_num_state_ge hws rfl + exact (Nat.card_congr (Equiv.ofInjective _ Quotient.out_injective).symm) + ▸ dfa_num_state_ge hws rfl -- end Language @@ -191,11 +194,12 @@ def IsMinimalAutomaton (M : FinAcc State α) (l : Language α) := `M`. The Nerode congruence is the state congruence with respect to the minimal DFA accepting `l`. -/ instance StateCongruence (M : FinAcc State α) : RightCongruence α where r x y := ∀ z, M.mtr M.start (x ++ z) = M.mtr M.start (y ++ z) - iseqv := ⟨by intro x z; rfl, by intro x y h z; symm; exact h z, - by intro x y z h_1 h_2 w; exact (h_1 w).trans (h_2 w)⟩ - right_cov := ⟨by - intro a x y h z - simpa [List.append_assoc, FLTS.mtr_concat_eq] using h (a ++ z)⟩ + iseqv.refl := by intro x z; rfl + iseqv.symm := by intro x y h z; symm; exact h z + iseqv.trans := by intro x y z h_1 h_2 w; exact (h_1 w).trans (h_2 w) + right_cov.elim := by + intro a x y h z + simpa [List.append_assoc, FLTS.mtr_concat_eq] using h (a ++ z) variable {M : FinAcc State α} From 1a85fa5db0da719a43412dc03d01978edc370ce6 Mon Sep 17 00:00:00 2001 From: Akhilesh Balaji Date: Thu, 16 Apr 2026 11:32:46 +0530 Subject: [PATCH 09/13] chore: minor typographical changes --- Cslib/Computability/Languages/MyhillNerode.lean | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Cslib/Computability/Languages/MyhillNerode.lean b/Cslib/Computability/Languages/MyhillNerode.lean index 477dc6304..501fdcc6e 100644 --- a/Cslib/Computability/Languages/MyhillNerode.lean +++ b/Cslib/Computability/Languages/MyhillNerode.lean @@ -12,7 +12,7 @@ public import Cslib.Computability.Languages.RegularLanguage /-! # All three parts of the Myhill-Nerode Theorem for DFAs listed at [WikipediaMyhillNerode2026]. -Let `l`` be a language on an alphabet `α`. `c_l` is the Nerode congruence on a language `L`. +Let `l` be a language on an alphabet `α`. `c_l` is the Nerode congruence on a language `L`. The Nerode congruence (henceforth called `c_l`) of a language `l` is a right congruence on strings where two strings are related iff all their right extensions are either both in the language @@ -251,5 +251,6 @@ theorem unique_minimal [Finite State] induction h : φ s using Quotient.inductionOn with | h x => exact ⟨x, hφ_inj ((hφ x).trans h.symm)⟩ simp [φ_equiv, Equiv.ofBijective, hφ, hψ] +-- end Cslib.Automata.DA.FinAcc From 95e82f41dc9f1036b380dd39e49a74e725f24786 Mon Sep 17 00:00:00 2001 From: Ching-Tsun Chou Date: Thu, 16 Apr 2026 11:18:01 -0700 Subject: [PATCH 10/13] re-word the main comment --- .../Computability/Languages/MyhillNerode.lean | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Cslib/Computability/Languages/MyhillNerode.lean b/Cslib/Computability/Languages/MyhillNerode.lean index 501fdcc6e..9f42be770 100644 --- a/Cslib/Computability/Languages/MyhillNerode.lean +++ b/Cslib/Computability/Languages/MyhillNerode.lean @@ -10,24 +10,24 @@ public import Cslib.Computability.Languages.RegularLanguage @[expose] public section -/-! # All three parts of the Myhill-Nerode Theorem for DFAs listed at [WikipediaMyhillNerode2026]. +/-! # Myhill-Nerode Theorem -Let `l` be a language on an alphabet `α`. `c_l` is the Nerode congruence on a language `L`. +Let `l` be a language on an alphabet `α`. The Nerode congruence (referred to as `c_l` +in comments below) of a language `l` is a right congruence on strings where two strings are +related iff all their right extensions are either both in the language or both not in it. -The Nerode congruence (henceforth called `c_l`) of a language `l` is a right congruence on -strings where two strings are related iff all their right extensions are either both in the language -or both not in it. +The Myhill-Nerode theorem has three parts [WikipediaMyhillNerode2026]: -(1) `l` is regular iff `c_l` has a finite number of equivalence classes `N`. -(2) `N` is the number of states in the minimal DFA accepting `l`. -(3) The minimal DFA is unique up to unique isomorphism. That is, for any - minimal DFA acceptor, there exists exactly one isomorphism from it to the - following one: +(1) `l` is regular iff `c_l` has a finite number `N` of equivalence classes. - > Let each equivalence class `⟦ x ⟧` correspond to a state, and let state - transitions be `a : ⟦ x ⟧ → ⟦ x a ⟧` for each `a ∈ α`. - Let the starting state be `⟦ ϵ ⟧`, and the accepting states be `⟦ x ⟧` where - `x ∈ l`. +(2) `N` is the number of states of the minimal DFA accepting `l`. + +(3) The minimal DFA is unique up to unique isomorphism. That is, for any + minimal DFA accepting `l`, there exists exactly an isomorphism from it to the + canonical DFA whose states are the equivalence classses of `c_l`, whose + state transitions are of the form `⟦ x ⟧ → ⟦ x ++ [a] ⟧` (where `a : α` + and `x : List α`), whose initial state is `⟦ [] ⟧`, and whose accepting states + are `{ ⟦ x ⟧ | x ∈ l }`. ## References From 856a75c78b3d00cc9a8e54653e5020094bc64790 Mon Sep 17 00:00:00 2001 From: Ching-Tsun Chou Date: Thu, 16 Apr 2026 11:23:27 -0700 Subject: [PATCH 11/13] remove redundant lines --- Cslib/Computability/Languages/MyhillNerode.lean | 2 -- 1 file changed, 2 deletions(-) diff --git a/Cslib/Computability/Languages/MyhillNerode.lean b/Cslib/Computability/Languages/MyhillNerode.lean index 9f42be770..c4029b6b5 100644 --- a/Cslib/Computability/Languages/MyhillNerode.lean +++ b/Cslib/Computability/Languages/MyhillNerode.lean @@ -176,7 +176,6 @@ theorem dfa_num_state_min {State : Type} {M : DA.FinAcc State α} [Finite State] exact hne (by simpa using Quotient.sound h) exact (Nat.card_congr (Equiv.ofInjective _ Quotient.out_injective).symm) ▸ dfa_num_state_ge hws rfl --- end Language @@ -251,6 +250,5 @@ theorem unique_minimal [Finite State] induction h : φ s using Quotient.inductionOn with | h x => exact ⟨x, hφ_inj ((hφ x).trans h.symm)⟩ simp [φ_equiv, Equiv.ofBijective, hφ, hψ] --- end Cslib.Automata.DA.FinAcc From e9f96eaa22a3c3ab0fe64b9dc394f43c12fa2bdf Mon Sep 17 00:00:00 2001 From: Ching-Tsun Chou Date: Mon, 20 Apr 2026 13:38:06 -0700 Subject: [PATCH 12/13] fix linter errors and remove some unnecessary @[simp] --- Cslib/Computability/Languages/MyhillNerode.lean | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Cslib/Computability/Languages/MyhillNerode.lean b/Cslib/Computability/Languages/MyhillNerode.lean index c4029b6b5..c5b92ed84 100644 --- a/Cslib/Computability/Languages/MyhillNerode.lean +++ b/Cslib/Computability/Languages/MyhillNerode.lean @@ -48,7 +48,8 @@ open scoped RightCongruence /-- The Nerode congruence (henceforth called `c_l`) of a language `l` is a right congruence on strings where two strings are related iff all their right extensions are either both in the language or both not in it. -/ -instance NerodeCongruence (l : Language α) : RightCongruence α where +@[implicit_reducible] +def NerodeCongruence (l : Language α) : RightCongruence α where r x y := ∀ z, x ++ z ∈ l ↔ y ++ z ∈ l iseqv.refl := fun _ _ => Iff.rfl iseqv.symm := fun h z => (h z).symm @@ -111,7 +112,7 @@ theorem IsRegular.finite_nerodeQuotient (h : l.IsRegular) : -- Myhill-Nerode (1) /-- `l` is regular iff the Nerode congruence on `l` has finitely many equivalence classes. -/ -@[simp, scoped grind =] +@[scoped grind =] theorem IsRegular.iff_finite_nerodeQuotient {l : Language α} : l.IsRegular ↔ Finite (l.NerodeQuotient) := by constructor @@ -140,7 +141,6 @@ theorem IsRegular.iff_finite_nerodeQuotient {l : Language α} : /-- Given a set of strings all distinguishable by `l` (i.e., not related to each other by the Nerode congruence on `l`), the number of states in the DFA accepting `l` is at least the number of strings in the set. -/ -@[simp] theorem dfa_num_state_ge {State : Type*} [Finite State] {l : Language α} {M : DA.FinAcc State α} {ws : Set (List α)} [Finite ws] (hws : ws.Pairwise (¬ (l.NerodeCongruence).r · ·)) (hM : language M = l) : @@ -163,7 +163,6 @@ theorem dfa_num_state_ge {State : Type*} [Finite State] {l : Language α} /-- All DFAs accepting `l` must have at least as many states as the number of equivalence classes of the Nerode congruence on `l`. -/ -@[simp] theorem dfa_num_state_min {State : Type} {M : DA.FinAcc State α} [Finite State] : Nat.card State ≥ Nat.card (language M).NerodeQuotient := by let ws : Set (List α) := Set.range @@ -191,7 +190,8 @@ def IsMinimalAutomaton (M : FinAcc State α) (l : Language α) := /-- Given a DFA `M`, two strings are related iff they reach the same state under when run through `M`. The Nerode congruence is the state congruence with respect to the minimal DFA accepting `l`. -/ -instance StateCongruence (M : FinAcc State α) : RightCongruence α where +@[implicit_reducible] +def StateCongruence (M : FinAcc State α) : RightCongruence α where r x y := ∀ z, M.mtr M.start (x ++ z) = M.mtr M.start (y ++ z) iseqv.refl := by intro x z; rfl iseqv.symm := by intro x y h z; symm; exact h z @@ -203,7 +203,6 @@ instance StateCongruence (M : FinAcc State α) : RightCongruence α where variable {M : FinAcc State α} /-- The Nerode congruence is the most coarse state congruence given a language. -/ -@[simp] theorem stateCongruence_le_nerodeCongruence : ∀ x y, (StateCongruence M).r x y → ((language M).NerodeCongruence).r x y := by intro x y h z @@ -220,7 +219,6 @@ theorem stateCongruence_le_nerodeCongruence : -- Myhill-Nerode (3) /-- The minimal DFA `M` accepting the language `l` is unique up to unique isomorphism. -/ -@[simp] theorem unique_minimal [Finite State] (l : Language α) (hReg : l.IsRegular) (hMin : M.IsMinimalAutomaton l) : ∃! φ : State ≃ l.NerodeQuotient, ∀ x, φ (M.mtr M.start x) = ⟦ x ⟧ := by From 6810b72d4f8b520bfc4d6dffe25cfe06caae823e Mon Sep 17 00:00:00 2001 From: Ching-Tsun Chou Date: Sat, 25 Apr 2026 13:31:07 -0700 Subject: [PATCH 13/13] chore: conform to header linter (#517) --- Cslib/Computability/Languages/MyhillNerode.lean | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cslib/Computability/Languages/MyhillNerode.lean b/Cslib/Computability/Languages/MyhillNerode.lean index c5b92ed84..cac0a62ea 100644 --- a/Cslib/Computability/Languages/MyhillNerode.lean +++ b/Cslib/Computability/Languages/MyhillNerode.lean @@ -8,8 +8,6 @@ module public import Cslib.Computability.Languages.RegularLanguage -@[expose] public section - /-! # Myhill-Nerode Theorem Let `l` be a language on an alphabet `α`. The Nerode congruence (referred to as `c_l` @@ -38,6 +36,8 @@ The Myhill-Nerode theorem has three parts [WikipediaMyhillNerode2026]: * [Wikipedia contributors, Myhill–Nerode theorem][WikipediaMyhillNerode2026] -/ +@[expose] public section + variable {α State : Type*} namespace Language