diff --git a/Linglib/Phonology/Tone/Constraints.lean b/Linglib/Phonology/Tone/Constraints.lean index bc0c9cd80..bcc43b80e 100644 --- a/Linglib/Phonology/Tone/Constraints.lean +++ b/Linglib/Phonology/Tone/Constraints.lean @@ -2,37 +2,40 @@ import Linglib.Phonology.Autosegmental.Floating import Linglib.Phonology.OptimalityTheory.DirectionalTableau /-! -# Tonal Constraints — Generic Constructors over `FloatingForm` -[wolf-2007] [cahill-2004] [mcpherson-lamont-2026] +# Tonal constraints OT/HS constraint constructors over the `FloatingForm S TRN` autosegmental -representation (`Phonology/Autosegmental/Floating.lean`). -Generic over the segment type `S`. - -All constraints are `DirectionalConstraint`s: -- Genuinely directional (e.g., `*FLOAT^→`) emit indicator vectors with - positions in tier order. -- Parallel-by-nature emit singletons `[count]` (per `EvalMode.le_singleton`'s - singleton-degeneracy guarantee). - -## Constraint inventory (paper, eqs. 5, 7, 15-17, 23) - -- `starFloat` ← paper eq. 16 (`*FLOAT`, directional) -- `starTautDock` ← paper eq. 15 (`*TAUTDOCK`, after [wolf-2007]) -- `starCrowd threshold` ← paper eq. 5 (`*CROWD`, after [cahill-2004]) -- `starFall` ← paper eq. 23 (`*FALL`, falling-contour ban) -- `maxTone t` ← paper eq. 7c (`MAX(T)`) -- `depLinkTone t` ← paper eq. 7a (`DEP(link)/T`) -- `maxLinkTone t` ← paper eq. 7b (`MAX(link)/T`) -- `haveTone` ← paper eq. 17 (`HAVETONE`) - -## Faithfulness against the underlying form - -Faithfulness constraints (`maxTone`, `maxLinkTone`, `depLinkTone`) -compare surface state to the immutable underlying state stored in -`FloatingForm`. This is what makes the [mcpherson-lamont-2026] -fig. 3 multi-step asymmetry visible: without underlying-form tracking, -faithfulness can't fire and the LR-vs-RL distinction collapses. +representation (`Phonology/Autosegmental/Floating.lean`), generic over the segment +type `S`. Each is a `DirectionalConstraint`: genuinely directional ones (e.g. +`*FLOAT^→`) emit indicator vectors with positions in tier order, while +parallel-by-nature ones emit singletons `[count]` (per `EvalMode.le_singleton`'s +singleton-degeneracy guarantee). Equation numbers below are [mcpherson-lamont-2026]'s. + +## Main definitions + +* `starFloat` / `starFloatCount` — `*FLOAT` (eq. 16): position-aware indicator vs. count variant +* `starTautDock` — `*TAUTDOCK` (eq. 15, after [wolf-2007]) +* `starCrowd` — `*CROWD` (eq. 5): per-morpheme tone-count ceiling +* `starFall` — `*FALL` (eq. 23): falling-contour ban +* `starMlessL` — `*M [f.floatIndicator.sum] --- ============================================================================ --- § 3: *TAUTDOCK --- ============================================================================ +/-! ### *TAUTDOCK -/ /-- `*TAUTDOCK` (paper, eq. 15, after [wolf-2007]): one violation per GEN-inserted tautomorphic surface link. -/ @@ -118,70 +88,46 @@ def starTautDock : DirectionalConstraint (FloatingForm S TRN) where eval := fun f => [(f.surfaceLinks.filter (fun l => IsInsertedLink f l ∧ f.IsTautomorphic l)).card] --- ============================================================================ --- § 4: *CROWD (per-morpheme tone count) --- ============================================================================ +/-! ### *CROWD (per-morpheme tone count) -/ /-- The tone at index `k` belongs to morpheme `m`. -/ -def ToneInMorpheme (f : FloatingForm S TRN) (k : TierIdx) (m : Morpheme) : Prop := +abbrev ToneInMorpheme (k : TierIdx) (m : Morpheme) : Prop := (f.upper[k]?).map TierSpec.morpheme = some m -instance (f : FloatingForm S TRN) (k : TierIdx) (m : Morpheme) : - Decidable (ToneInMorpheme f k m) := - inferInstanceAs (Decidable (_ = _)) - /-- The TBU at index `i` belongs to morpheme `m`. -/ -def SegInMorpheme (f : FloatingForm S TRN) (i : SegIdx) (m : Morpheme) : Prop := +abbrev SegInMorpheme (i : SegIdx) (m : Morpheme) : Prop := (f.lower[i]?).map SegSpec.morpheme = some m -instance (f : FloatingForm S TRN) (i : SegIdx) (m : Morpheme) : - Decidable (SegInMorpheme f i m) := - inferInstanceAs (Decidable (_ = _)) - /-- The set of tone indices counting toward morpheme `m`'s tonal mass: surviving underlying tones of `m`, plus tones surface-linked to TBUs of `m`. -/ -def tonesForMorpheme (f : FloatingForm S TRN) (m : Morpheme) : Finset TierIdx := +def tonesForMorpheme (m : Morpheme) : Finset TierIdx := let ownAlive := (Finset.range f.upper.length).filter fun k => f.IsAlive k ∧ ToneInMorpheme f k m let docked := (f.surfaceLinks.filter fun l => SegInMorpheme f l.snd m).image Prod.fst ownAlive ∪ docked -/-- `*CROWD` (paper, eq. 5, after [cahill-2004]): one violation per - morpheme associated with more than `threshold` tones. Default - `threshold = 2` matches the paper. +/-- All morphemes occurring in `f`, on either tier. -/ +def morphemes : Finset Morpheme := + (f.lower.map SegSpec.morpheme).toFinset ∪ (f.upper.map TierSpec.morpheme).toFinset - Counting includes surviving underlying tones of the morpheme PLUS - surface tones from other morphemes docked onto this morpheme's - TBUs. This is how the paper (p. 32) blocks rightward docking onto - `/rī^H/`: rī already has 2 tones (M + own H), so an external H - docking would make 3. -/ +/-- `*CROWD` (paper eq. 5): one violation per morpheme with more than `threshold` + tones (default 2), counting its surviving underlying tones plus tones docked onto + its TBUs from other morphemes. -/ def starCrowd (threshold : Nat := 2) : DirectionalConstraint (FloatingForm S TRN) where name := s!"*CROWD({threshold})" family := .markedness eval := fun f => - let morphIds : Finset Morpheme := - (f.lower.map SegSpec.morpheme).toFinset ∪ - (f.upper.map TierSpec.morpheme).toFinset - [(morphIds.filter (fun m => (tonesForMorpheme f m).card > threshold)).card] - --- ============================================================================ --- § 5: *FALL (falling contours on multi-linked TBUs) --- ============================================================================ - -/-- A tone pair `(t1, t2)` (in tier order) is **falling** iff it is - HM, HL, or ML (paper, eq. 23). TRN's `H`, `M`, `L` are not - constructors (they're `def`s), so we use equality rather than - pattern matching. -/ -def IsFallingPair (t1 t2 : TRN) : Prop := - (t1 = .H ∧ t2 = .M) ∨ (t1 = .H ∧ t2 = .L) ∨ (t1 = .M ∧ t2 = .L) + [((morphemes f).filter (fun m => threshold < (tonesForMorpheme f m).card)).card] -instance (t1 t2 : TRN) : Decidable (IsFallingPair t1 t2) := - inferInstanceAs (Decidable (_ ∨ _)) +/-! ### *FALL (falling contours on multi-linked TBUs) -/ -/-- A tone sequence contains a falling adjacent pair. Recursive - Prop predicate; the explicit decidability instance below carries - the recursion through. -/ +/-- A tone pair `(t1, t2)` (in tier order) is **falling** iff it is HM, HL, or ML + (paper eq. 23). -/ +abbrev IsFallingPair (t1 t2 : TRN) : Prop := + (t1 = .H ∧ t2 = .M) ∨ (t1 = .H ∧ t2 = .L) ∨ (t1 = .M ∧ t2 = .L) + +/-- A tone sequence contains a falling adjacent pair. -/ def HasFall : List TRN → Prop | [] => False | [_] => False @@ -194,47 +140,28 @@ instance decidableHasFall : (ts : List TRN) → Decidable (HasFall ts) have : Decidable (HasFall (t2 :: rest)) := decidableHasFall (t2 :: rest) inferInstanceAs (Decidable (IsFallingPair t1 t2 ∨ HasFall (t2 :: rest))) -/-- `*FALL` (paper, eq. 23): one violation per syllable associated with - a falling contour (HM, HL, ML). - - A TBU is checked by extracting its surface-linked tones in tier - order (via `tierValues`) and scanning for falling adjacent pairs. - Uses `List.range` + `countP` rather than `Finset.range` + `filter` - + `card` so kernel `decide` reduces. -/ +/-- `*FALL` (paper eq. 23): one violation per syllable with a falling contour + (HM, HL, ML). -/ def starFall : DirectionalConstraint (FloatingForm S TRN) where name := "*FALL" family := .markedness eval := fun f => [(List.range f.lower.length).countP (fun i => decide (HasFall (f.tierValues i)))] --- ============================================================================ --- § 5.5: *M let aliveValues : List TRN := - f.aliveTierIdxs.filterMap fun k => (f.upper[k]?).map TierSpec.value - [aliveValues.zip aliveValues.tail - |>.countP fun p => decide (p.1 = TRN.M ∧ p.2 = TRN.L)] + f.aliveTierIdxs.filterMap (f.upper[·]?.map TierSpec.value) + [(aliveValues.zip aliveValues.tail).countP (fun p => decide (p = (TRN.M, TRN.L)))] --- ============================================================================ --- § 6: HAVETONE --- ============================================================================ +/-! ### HAVETONE -/ /-- `HAVETONE` (paper, eq. 17): one violation per syllable not associated to any tone. -/ @@ -244,9 +171,7 @@ def haveTone : DirectionalConstraint (FloatingForm S TRN) where eval := fun f => [(List.range f.lower.length).countP (fun i => (f.linksTo i).isEmpty)] --- ============================================================================ --- § 7: Faithfulness — Generic over Tone Value --- ============================================================================ +/-! ### Faithfulness — Generic over Tone Value -/ /-- `MAX(T)` (paper, eq. 7c): one violation per underlying tone of value `t` deleted by GEN. -/ @@ -273,24 +198,16 @@ def maxLinkTone (t : TRN) : DirectionalConstraint (FloatingForm S TRN) where eval := fun f => [(f.links.filter (fun l => IsDeletedLink f l ∧ ToneHasValue f l.fst t)).card] -/-- `INTEGRITY` (paper, [mccarthy-prince-1995]; [akinbo-fwangwar-2026] - eq. 22c): no input tone has multiple output correspondents. In our - autosegmental encoding, an "output correspondent of an input tone" - is an alive ulTier entry sharing the input tone's value AND - morpheme. SPREADING (one alive ulTier entry, multi-linked) → 0 - violations. COPYING (multiple alive ulTier entries with same - value+morpheme) → `(count - 1)` violations. - - Parameterised by the morpheme `m` and tone value `t` whose copies - are being counted (typically the verbaliser's M or H). The paper's - INTEGRITY-Mᵥ is `integrityTone vbzMorph .M`. -/ +/-- `INTEGRITY` ([mccarthy-prince-1995]; [akinbo-fwangwar-2026]): no input tone has + multiple output correspondents — here, alive `ulTier` entries sharing tone value `t` + and morpheme `m`. Spreading (one multi-linked entry) → 0; copying (`n` such entries) + → `n - 1` violations. -/ def integrityTone (m : Morpheme) (t : TRN) : DirectionalConstraint (FloatingForm S TRN) where name := s!"INTEGRITY-{reprStr t}({m.form})" family := .faithfulness eval := fun f => - let aliveCopies := (List.range f.upper.length).countP fun k => - decide (f.IsAlive k ∧ ToneInMorpheme f k m ∧ ToneHasValue f k t) - [if aliveCopies = 0 then 0 else aliveCopies - 1] + [(List.range f.upper.length).countP + (fun k => decide (f.IsAlive k ∧ ToneInMorpheme f k m ∧ ToneHasValue f k t)) - 1] end Phonology.Tone