From 43ae00249b600e752dec691db3701a36c8b4bd72 Mon Sep 17 00:00:00 2001 From: Robert Hawkins Date: Fri, 19 Jun 2026 23:10:35 -0700 Subject: [PATCH 1/6] refactor(Phonology): tidy Tone/Constraints.lean docstring + variables --- Linglib/Phonology/Tone/Constraints.lean | 111 ++++++++++-------------- 1 file changed, 45 insertions(+), 66 deletions(-) diff --git a/Linglib/Phonology/Tone/Constraints.lean b/Linglib/Phonology/Tone/Constraints.lean index bc0c9cd80..b29b85873 100644 --- a/Linglib/Phonology/Tone/Constraints.lean +++ b/Linglib/Phonology/Tone/Constraints.lean @@ -2,37 +2,32 @@ import Linglib.Phonology.Autosegmental.Floating import Linglib.Phonology.OptimalityTheory.DirectionalTableau /-! -# Tonal Constraints — Generic Constructors over `FloatingForm` -[wolf-2007] [cahill-2004] [mcpherson-lamont-2026] +# Tonal constraints OT/HS constraint constructors over the `FloatingForm S TRN` autosegmental -representation (`Phonology/Autosegmental/Floating.lean`). -Generic over the segment type `S`. - -All constraints are `DirectionalConstraint`s: -- Genuinely directional (e.g., `*FLOAT^→`) emit indicator vectors with - positions in tier order. -- Parallel-by-nature emit singletons `[count]` (per `EvalMode.le_singleton`'s - singleton-degeneracy guarantee). - -## Constraint inventory (paper, eqs. 5, 7, 15-17, 23) - -- `starFloat` ← paper eq. 16 (`*FLOAT`, directional) -- `starTautDock` ← paper eq. 15 (`*TAUTDOCK`, after [wolf-2007]) -- `starCrowd threshold` ← paper eq. 5 (`*CROWD`, after [cahill-2004]) -- `starFall` ← paper eq. 23 (`*FALL`, falling-contour ban) -- `maxTone t` ← paper eq. 7c (`MAX(T)`) -- `depLinkTone t` ← paper eq. 7a (`DEP(link)/T`) -- `maxLinkTone t` ← paper eq. 7b (`MAX(link)/T`) -- `haveTone` ← paper eq. 17 (`HAVETONE`) - -## Faithfulness against the underlying form - -Faithfulness constraints (`maxTone`, `maxLinkTone`, `depLinkTone`) -compare surface state to the immutable underlying state stored in -`FloatingForm`. This is what makes the [mcpherson-lamont-2026] -fig. 3 multi-step asymmetry visible: without underlying-form tracking, -faithfulness can't fire and the LR-vs-RL distinction collapses. +representation (`Phonology/Autosegmental/Floating.lean`), generic over the segment +type `S`. Each is a `DirectionalConstraint`: genuinely directional ones (e.g. +`*FLOAT^→`) emit indicator vectors with positions in tier order, while +parallel-by-nature ones emit singletons `[count]` (per `EvalMode.le_singleton`'s +singleton-degeneracy guarantee). Equation numbers below are [mcpherson-lamont-2026]'s. + +## Main definitions + +* `starFloat` / `starFloatCount` — `*FLOAT` (eq. 16): position-aware indicator vs. count variant +* `starTautDock` — `*TAUTDOCK` (eq. 15, after [wolf-2007]) +* `starCrowd` — `*CROWD` (eq. 5): per-morpheme tone-count ceiling +* `starFall` — `*FALL` (eq. 23): falling-contour ban +* `starMlessL` — `*M [f.floatIndicator.sum] --- ============================================================================ --- § 3: *TAUTDOCK --- ============================================================================ +/-! ### *TAUTDOCK -/ /-- `*TAUTDOCK` (paper, eq. 15, after [wolf-2007]): one violation per GEN-inserted tautomorphic surface link. -/ @@ -118,36 +107,34 @@ def starTautDock : DirectionalConstraint (FloatingForm S TRN) where eval := fun f => [(f.surfaceLinks.filter (fun l => IsInsertedLink f l ∧ f.IsTautomorphic l)).card] --- ============================================================================ --- § 4: *CROWD (per-morpheme tone count) --- ============================================================================ +/-! ### *CROWD (per-morpheme tone count) -/ /-- The tone at index `k` belongs to morpheme `m`. -/ -def ToneInMorpheme (f : FloatingForm S TRN) (k : TierIdx) (m : Morpheme) : Prop := +def ToneInMorpheme (k : TierIdx) (m : Morpheme) : Prop := (f.upper[k]?).map TierSpec.morpheme = some m -instance (f : FloatingForm S TRN) (k : TierIdx) (m : Morpheme) : +instance (k : TierIdx) (m : Morpheme) : Decidable (ToneInMorpheme f k m) := inferInstanceAs (Decidable (_ = _)) /-- The TBU at index `i` belongs to morpheme `m`. -/ -def SegInMorpheme (f : FloatingForm S TRN) (i : SegIdx) (m : Morpheme) : Prop := +def SegInMorpheme (i : SegIdx) (m : Morpheme) : Prop := (f.lower[i]?).map SegSpec.morpheme = some m -instance (f : FloatingForm S TRN) (i : SegIdx) (m : Morpheme) : +instance (i : SegIdx) (m : Morpheme) : Decidable (SegInMorpheme f i m) := inferInstanceAs (Decidable (_ = _)) /-- The set of tone indices counting toward morpheme `m`'s tonal mass: surviving underlying tones of `m`, plus tones surface-linked to TBUs of `m`. -/ -def tonesForMorpheme (f : FloatingForm S TRN) (m : Morpheme) : Finset TierIdx := +def tonesForMorpheme (m : Morpheme) : Finset TierIdx := let ownAlive := (Finset.range f.upper.length).filter fun k => f.IsAlive k ∧ ToneInMorpheme f k m let docked := (f.surfaceLinks.filter fun l => SegInMorpheme f l.snd m).image Prod.fst ownAlive ∪ docked -/-- `*CROWD` (paper, eq. 5, after [cahill-2004]): one violation per +/-- `*CROWD` (paper, eq. 5): one violation per morpheme associated with more than `threshold` tones. Default `threshold = 2` matches the paper. @@ -165,9 +152,7 @@ def starCrowd (threshold : Nat := 2) : DirectionalConstraint (FloatingForm S TRN (f.upper.map TierSpec.morpheme).toFinset [(morphIds.filter (fun m => (tonesForMorpheme f m).card > threshold)).card] --- ============================================================================ --- § 5: *FALL (falling contours on multi-linked TBUs) --- ============================================================================ +/-! ### *FALL (falling contours on multi-linked TBUs) -/ /-- A tone pair `(t1, t2)` (in tier order) is **falling** iff it is HM, HL, or ML (paper, eq. 23). TRN's `H`, `M`, `L` are not @@ -207,9 +192,7 @@ def starFall : DirectionalConstraint (FloatingForm S TRN) where eval := fun f => [(List.range f.lower.length).countP (fun i => decide (HasFall (f.tierValues i)))] --- ============================================================================ --- § 5.5: *M.countP fun p => decide (p.1 = TRN.M ∧ p.2 = TRN.L)] --- ============================================================================ --- § 6: HAVETONE --- ============================================================================ +/-! ### HAVETONE -/ /-- `HAVETONE` (paper, eq. 17): one violation per syllable not associated to any tone. -/ @@ -244,9 +225,7 @@ def haveTone : DirectionalConstraint (FloatingForm S TRN) where eval := fun f => [(List.range f.lower.length).countP (fun i => (f.linksTo i).isEmpty)] --- ============================================================================ --- § 7: Faithfulness — Generic over Tone Value --- ============================================================================ +/-! ### Faithfulness — Generic over Tone Value -/ /-- `MAX(T)` (paper, eq. 7c): one violation per underlying tone of value `t` deleted by GEN. -/ From c4e24335fa48ea0efa960289e7f0217ff22679eb Mon Sep 17 00:00:00 2001 From: Robert Hawkins Date: Fri, 19 Jun 2026 23:15:00 -0700 Subject: [PATCH 2/6] docs(Phonology): trim verbose def docstrings in Tone/Constraints.lean --- Linglib/Phonology/Tone/Constraints.lean | 42 ++++++++----------------- 1 file changed, 13 insertions(+), 29 deletions(-) diff --git a/Linglib/Phonology/Tone/Constraints.lean b/Linglib/Phonology/Tone/Constraints.lean index b29b85873..815a4881d 100644 --- a/Linglib/Phonology/Tone/Constraints.lean +++ b/Linglib/Phonology/Tone/Constraints.lean @@ -28,6 +28,14 @@ Faithfulness (`maxTone`, `maxLinkTone`, `depLinkTone`) compares surface state to immutable underlying state stored in `FloatingForm`. Without that underlying-form tracking, faithfulness can't fire and the [mcpherson-lamont-2026] LR-vs-RL multi-step asymmetry collapses. + +`starFloat` (a position-aware indicator vector) and `starFloatCount` (the total count) +agree with at most one floating tone but diverge as the set grows: count comparison +cannot tell "delete leftmost" from "delete rightmost" — [mcpherson-lamont-2026]'s +eq. (62) divergent tie. So the parallel-vs-directional split lives in the *constraint* +(count vs. indicator), not the EVAL mode: `EvalMode.le .parallel` and +`EvalMode.le (.directional .leftToRight)` both apply the same `LexLE`; the flag documents +intent and selects vector reversal for the right-to-left case. -/ namespace Phonology.Tone @@ -72,26 +80,8 @@ def starFloat : DirectionalConstraint (FloatingForm S TRN) where family := .markedness eval := FloatingForm.floatIndicator -/-- `*FLOAT (count)`: count-based variant of `*FLOAT` that emits the - *total* floating-tone count as a singleton vector. Used in regular - (non-directional) HS where positions don't matter — only the - cardinality of the floating set. - - Distinct from `starFloat` (which emits the position-aware indicator - vector). The two coincide when there is at most one floating tone - but diverge as the floating set grows: count-based comparison - cannot distinguish "delete leftmost" from "delete rightmost" since - both reduce the count by 1. This is [mcpherson-lamont-2026]'s - eq. (62) "divergent ties" claim — regular HS with `starFloatCount` - cannot disambiguate `/kāk^H + rī^H + dō^H/` step 1. - - Architectural note: the substrate's `EvalMode.le .parallel` and - `EvalMode.le (.directional .leftToRight)` both use the same `LexLE` - on the violation vector. The parallel-vs-directional distinction - therefore lives in the *constraint* (count vs indicator), not the - EVAL mode flag. The flag remains useful for documenting intent and - for the right-to-left case (which uses `LexLE` on the *reversed* - vector). -/ +/-- `*FLOAT (count)`: count-based variant of `starFloat`, emitting the *total* + floating-tone count as a singleton `[count]` rather than a position-aware vector. -/ def starFloatCount : DirectionalConstraint (FloatingForm S TRN) where name := "*FLOAT (count)" family := .markedness @@ -134,15 +124,9 @@ def tonesForMorpheme (m : Morpheme) : Finset TierIdx := let docked := (f.surfaceLinks.filter fun l => SegInMorpheme f l.snd m).image Prod.fst ownAlive ∪ docked -/-- `*CROWD` (paper, eq. 5): one violation per - morpheme associated with more than `threshold` tones. Default - `threshold = 2` matches the paper. - - Counting includes surviving underlying tones of the morpheme PLUS - surface tones from other morphemes docked onto this morpheme's - TBUs. This is how the paper (p. 32) blocks rightward docking onto - `/rī^H/`: rī already has 2 tones (M + own H), so an external H - docking would make 3. -/ +/-- `*CROWD` (paper eq. 5): one violation per morpheme with more than `threshold` + tones (default 2), counting its surviving underlying tones plus tones docked onto + its TBUs from other morphemes. -/ def starCrowd (threshold : Nat := 2) : DirectionalConstraint (FloatingForm S TRN) where name := s!"*CROWD({threshold})" family := .markedness From f9f9bed28518e621dc1b669704be93d3830d7f17 Mon Sep 17 00:00:00 2001 From: Robert Hawkins Date: Fri, 19 Jun 2026 23:16:48 -0700 Subject: [PATCH 3/6] docs(Phonology): trim IsFallingPair docstring --- Linglib/Phonology/Tone/Constraints.lean | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Linglib/Phonology/Tone/Constraints.lean b/Linglib/Phonology/Tone/Constraints.lean index 815a4881d..5e6c3455d 100644 --- a/Linglib/Phonology/Tone/Constraints.lean +++ b/Linglib/Phonology/Tone/Constraints.lean @@ -138,10 +138,8 @@ def starCrowd (threshold : Nat := 2) : DirectionalConstraint (FloatingForm S TRN /-! ### *FALL (falling contours on multi-linked TBUs) -/ -/-- A tone pair `(t1, t2)` (in tier order) is **falling** iff it is - HM, HL, or ML (paper, eq. 23). TRN's `H`, `M`, `L` are not - constructors (they're `def`s), so we use equality rather than - pattern matching. -/ +/-- A tone pair `(t1, t2)` (in tier order) is **falling** iff it is HM, HL, or ML + (paper eq. 23). -/ def IsFallingPair (t1 t2 : TRN) : Prop := (t1 = .H ∧ t2 = .M) ∨ (t1 = .H ∧ t2 = .L) ∨ (t1 = .M ∧ t2 = .L) From c91f1323477fa0ab651abe33e656365638ff56b6 Mon Sep 17 00:00:00 2001 From: Robert Hawkins Date: Fri, 19 Jun 2026 23:19:34 -0700 Subject: [PATCH 4/6] docs(Phonology): trim starFall/starMlessL/integrityTone docstrings --- Linglib/Phonology/Tone/Constraints.lean | 43 ++++++------------------- 1 file changed, 10 insertions(+), 33 deletions(-) diff --git a/Linglib/Phonology/Tone/Constraints.lean b/Linglib/Phonology/Tone/Constraints.lean index 5e6c3455d..658613030 100644 --- a/Linglib/Phonology/Tone/Constraints.lean +++ b/Linglib/Phonology/Tone/Constraints.lean @@ -146,9 +146,7 @@ def IsFallingPair (t1 t2 : TRN) : Prop := instance (t1 t2 : TRN) : Decidable (IsFallingPair t1 t2) := inferInstanceAs (Decidable (_ ∨ _)) -/-- A tone sequence contains a falling adjacent pair. Recursive - Prop predicate; the explicit decidability instance below carries - the recursion through. -/ +/-- A tone sequence contains a falling adjacent pair. -/ def HasFall : List TRN → Prop | [] => False | [_] => False @@ -161,13 +159,8 @@ instance decidableHasFall : (ts : List TRN) → Decidable (HasFall ts) have : Decidable (HasFall (t2 :: rest)) := decidableHasFall (t2 :: rest) inferInstanceAs (Decidable (IsFallingPair t1 t2 ∨ HasFall (t2 :: rest))) -/-- `*FALL` (paper, eq. 23): one violation per syllable associated with - a falling contour (HM, HL, ML). - - A TBU is checked by extracting its surface-linked tones in tier - order (via `tierValues`) and scanning for falling adjacent pairs. - Uses `List.range` + `countP` rather than `Finset.range` + `filter` - + `card` so kernel `decide` reduces. -/ +/-- `*FALL` (paper eq. 23): one violation per syllable with a falling contour + (HM, HL, ML). -/ def starFall : DirectionalConstraint (FloatingForm S TRN) where name := "*FALL" family := .markedness @@ -176,18 +169,9 @@ def starFall : DirectionalConstraint (FloatingForm S TRN) where /-! ### *M [(f.links.filter (fun l => IsDeletedLink f l ∧ ToneHasValue f l.fst t)).card] -/-- `INTEGRITY` (paper, [mccarthy-prince-1995]; [akinbo-fwangwar-2026] - eq. 22c): no input tone has multiple output correspondents. In our - autosegmental encoding, an "output correspondent of an input tone" - is an alive ulTier entry sharing the input tone's value AND - morpheme. SPREADING (one alive ulTier entry, multi-linked) → 0 - violations. COPYING (multiple alive ulTier entries with same - value+morpheme) → `(count - 1)` violations. - - Parameterised by the morpheme `m` and tone value `t` whose copies - are being counted (typically the verbaliser's M or H). The paper's - INTEGRITY-Mᵥ is `integrityTone vbzMorph .M`. -/ +/-- `INTEGRITY` ([mccarthy-prince-1995]; [akinbo-fwangwar-2026]): no input tone has + multiple output correspondents — here, alive `ulTier` entries sharing tone value `t` + and morpheme `m`. Spreading (one multi-linked entry) → 0; copying (`n` such entries) + → `n - 1` violations. -/ def integrityTone (m : Morpheme) (t : TRN) : DirectionalConstraint (FloatingForm S TRN) where name := s!"INTEGRITY-{reprStr t}({m.form})" From 0e7f363e48c7f6ef15e14295aa017fee59603ffc Mon Sep 17 00:00:00 2001 From: Robert Hawkins Date: Fri, 19 Jun 2026 23:25:59 -0700 Subject: [PATCH 5/6] refactor(Phonology): tighten Tone/Constraints eval bodies (drop lets) --- Linglib/Phonology/Tone/Constraints.lean | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/Linglib/Phonology/Tone/Constraints.lean b/Linglib/Phonology/Tone/Constraints.lean index 658613030..457ae49f7 100644 --- a/Linglib/Phonology/Tone/Constraints.lean +++ b/Linglib/Phonology/Tone/Constraints.lean @@ -124,6 +124,10 @@ def tonesForMorpheme (m : Morpheme) : Finset TierIdx := let docked := (f.surfaceLinks.filter fun l => SegInMorpheme f l.snd m).image Prod.fst ownAlive ∪ docked +/-- All morphemes occurring in `f`, on either tier. -/ +def morphemes : Finset Morpheme := + (f.lower.map SegSpec.morpheme).toFinset ∪ (f.upper.map TierSpec.morpheme).toFinset + /-- `*CROWD` (paper eq. 5): one violation per morpheme with more than `threshold` tones (default 2), counting its surviving underlying tones plus tones docked onto its TBUs from other morphemes. -/ @@ -131,10 +135,7 @@ def starCrowd (threshold : Nat := 2) : DirectionalConstraint (FloatingForm S TRN name := s!"*CROWD({threshold})" family := .markedness eval := fun f => - let morphIds : Finset Morpheme := - (f.lower.map SegSpec.morpheme).toFinset ∪ - (f.upper.map TierSpec.morpheme).toFinset - [(morphIds.filter (fun m => (tonesForMorpheme f m).card > threshold)).card] + [((morphemes f).filter (fun m => threshold < (tonesForMorpheme f m).card)).card] /-! ### *FALL (falling contours on multi-linked TBUs) -/ @@ -177,9 +178,8 @@ def starMlessL : DirectionalConstraint (FloatingForm S TRN) where family := .markedness eval := fun f => let aliveValues : List TRN := - f.aliveTierIdxs.filterMap fun k => (f.upper[k]?).map TierSpec.value - [aliveValues.zip aliveValues.tail - |>.countP fun p => decide (p.1 = TRN.M ∧ p.2 = TRN.L)] + f.aliveTierIdxs.filterMap (f.upper[·]?.map TierSpec.value) + [(aliveValues.zip aliveValues.tail).countP (fun p => decide (p = (TRN.M, TRN.L)))] /-! ### HAVETONE -/ @@ -227,8 +227,7 @@ def integrityTone (m : Morpheme) (t : TRN) : name := s!"INTEGRITY-{reprStr t}({m.form})" family := .faithfulness eval := fun f => - let aliveCopies := (List.range f.upper.length).countP fun k => - decide (f.IsAlive k ∧ ToneInMorpheme f k m ∧ ToneHasValue f k t) - [if aliveCopies = 0 then 0 else aliveCopies - 1] + [(List.range f.upper.length).countP + (fun k => decide (f.IsAlive k ∧ ToneInMorpheme f k m ∧ ToneHasValue f k t)) - 1] end Phonology.Tone From e3238782d1927365b335219c4920dc7e5608789d Mon Sep 17 00:00:00 2001 From: Robert Hawkins Date: Fri, 19 Jun 2026 23:33:09 -0700 Subject: [PATCH 6/6] refactor(Phonology): mathlib-quality nits in Tone/Constraints.lean --- Linglib/Phonology/Tone/Constraints.lean | 34 +++++-------------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/Linglib/Phonology/Tone/Constraints.lean b/Linglib/Phonology/Tone/Constraints.lean index 457ae49f7..bcc43b80e 100644 --- a/Linglib/Phonology/Tone/Constraints.lean +++ b/Linglib/Phonology/Tone/Constraints.lean @@ -20,7 +20,7 @@ singleton-degeneracy guarantee). Equation numbers below are [mcpherson-lamont-20 * `starMlessL` — `*M False