diff --git a/Linglib.lean b/Linglib.lean index 3362c617b..10bdd76b6 100644 --- a/Linglib.lean +++ b/Linglib.lean @@ -167,6 +167,8 @@ import Linglib.Discourse.Strategy import Linglib.Discourse.Scoreboard import Linglib.Discourse.AtIssueness import Linglib.Features.CoreferenceStatus +import Linglib.Syntax.Anaphora.Basic +import Linglib.Syntax.Anaphora.Diagnostic import Linglib.Syntax.Binding.Basic import Linglib.Syntax.Binding.Semantics import Linglib.Semantics.Composition.Combinator @@ -2902,3 +2904,4 @@ import Linglib.Pragmatics.Emotion import Linglib.Data.Examples.Mizuno2024 import Linglib.Data.Examples.OgiharaSteinertThrelkeld2024 import Linglib.Data.Examples.Rubinstein2014 +import Linglib.Data.Examples.Landau2026 diff --git a/Linglib/Data/Examples/Landau2026.json b/Linglib/Data/Examples/Landau2026.json new file mode 100644 index 000000000..71a6e9dc3 --- /dev/null +++ b/Linglib/Data/Examples/Landau2026.json @@ -0,0 +1,201 @@ +[ + { + "id": "landau2026_hebrewEN", + "source": {"bibkey": "landau-2026", "paperLabel": "(18a)"}, + "language": "hebr1245", + "primaryText": "yeš la'hakot še-ahavti et ha-šeni.", + "glossedTokens": [], + "translation": "(There are bands that I liked the second one.)", + "context": "Empty noun (EN), deep anaphor: a bare n head hosting a resumptive bound by an Ā-operator.", + "judgment": "ungrammatical", + "paperFeatures": [ + ["domain", "nP"], + ["depth", "deep"], + ["extractionAvailable", "false"], + ["abarContext", "restrictive relative, interrogative, free relative"] + ], + "comment": "Landau 2026, §2.2 (18a). EN fails EIR: no internal structure to host the resumptive, so the Ā-operator violates the BVQ. NP-ellipsis previously established; EIR gives additional confirmation via the EN/ENP contrast." + }, + { + "id": "landau2026_hebrewENP", + "source": {"bibkey": "landau-2026", "paperLabel": "(19a)"}, + "language": "hebr1245", + "primaryText": "yeš la'hakot še-ahavti et ha-albom ha-rišon šela'hen, ve-yeš la'hakot še-ahavti et ha-šeni.", + "glossedTokens": [], + "translation": "There are bands that I liked their first album, and there are bands that I liked their second one.", + "context": "Elided noun phrase (ENP), surface anaphor: full nP structure deleted under identity, licensed by [E] on Num.", + "judgment": "acceptable", + "paperFeatures": [ + ["domain", "nP"], + ["depth", "surface"], + ["extractionAvailable", "false"], + ["abarContext", "restrictive relative, interrogative, maximizing relative"] + ], + "comment": "Landau 2026, §2.2 (19a). ENP passes EIR: the resumptive inside the elided nP supplies the variable the operator binds." + }, + { + "id": "landau2026_hebrewNCA_DP", + "source": {"bibkey": "landau-2026", "paperLabel": "(31a)"}, + "language": "hebr1245", + "primaryText": "yeš li xaver exad še-maxarti.", + "glossedTokens": [], + "translation": "(I have one friend that I sold.)", + "context": "Null complement anaphora (NCA) / pro, deep anaphor in the DP domain.", + "judgment": "ungrammatical", + "paperFeatures": [ + ["domain", "DP"], + ["depth", "deep"], + ["extractionAvailable", "false"], + ["abarContext", "restrictive relative, interrogative, free relative"] + ], + "comment": "Landau 2026, §3.2 (31a). NCA/pro fails EIR: no internal structure. The existence of AE in Hebrew was debated; EIR provides a novel argument." + }, + { + "id": "landau2026_hebrewAE", + "source": {"bibkey": "landau-2026", "paperLabel": "(32a)"}, + "language": "hebr1245", + "primaryText": "yeš li xaver še-kaniti et ha-oto šelo, ve-yeš li xaver axer še-maxarti.", + "glossedTokens": [], + "translation": "I have one friend whose car I bought and I have another one whose car I sold.", + "context": "Argument ellipsis (AE) / DP-ellipsis, surface anaphor: full DP structure deleted under identity.", + "judgment": "acceptable", + "paperFeatures": [ + ["domain", "DP"], + ["depth", "surface"], + ["extractionAvailable", "false"], + ["abarContext", "restrictive relative, interrogative, free relative"] + ], + "comment": "Landau 2026, §3.2 (32a). AE passes EIR: a resumptive inside the elided DP supplies the variable. Novel argument for AE in Hebrew." + }, + { + "id": "landau2026_hebrewNCA_PP", + "source": {"bibkey": "landau-2026", "paperLabel": "(37a)"}, + "language": "hebr1245", + "primaryText": "yeš irgunim še-ani lo xotemet.", + "glossedTokens": [], + "translation": "(There are organizations that I don't sign.)", + "context": "Null PP via NCA, deep anaphor: PP argument omitted, content recovered pragmatically.", + "judgment": "ungrammatical", + "paperFeatures": [ + ["domain", "PP"], + ["depth", "deep"], + ["extractionAvailable", "false"], + ["abarContext", "restrictive relative, interrogative, free relative"] + ], + "comment": "Landau 2026, §4.2 (37a). Null PP via NCA fails EIR: no internal structure." + }, + { + "id": "landau2026_hebrewPPE", + "source": {"bibkey": "landau-2026", "paperLabel": "(38a)"}, + "language": "hebr1245", + "primaryText": "yeš irgunim še-ani xotemet al ha-acumot šelahem, ve-yeš irgunim še-ani lo xotemet.", + "glossedTokens": [], + "translation": "There are organizations that I sign their petitions and there are organizations that I don't.", + "context": "PP-ellipsis (PPE), surface anaphor: full PP structure deleted under identity. Data are Landau's own (first documented in Vardi 2022).", + "judgment": "acceptable", + "paperFeatures": [ + ["domain", "PP"], + ["depth", "surface"], + ["extractionAvailable", "false"], + ["abarContext", "restrictive relative, interrogative, free relative"] + ], + "comment": "Landau 2026, §4.2 (38a). PPE passes EIR: first robust evidence for PP-ellipsis; the paper argues this holds cross-linguistically, not only in Hebrew." + }, + { + "id": "landau2026_englishVPE", + "source": {"bibkey": "landau-2026", "paperLabel": "(44a)"}, + "language": "stan1293", + "primaryText": "Deep Purple, I never bought their records. Led Zeppelin, I did.", + "discourseSegments": [ + "Deep Purple, I never bought their records.", + "Led Zeppelin, I did." + ], + "glossedTokens": [], + "translation": "Deep Purple, I never bought their records. Led Zeppelin, I did.", + "context": "VP-ellipsis, surface anaphor: left-dislocated constituent binds a resumptive possessive inside the elided VP. Contrastive baseline for do so.", + "judgment": "acceptable", + "paperFeatures": [ + ["domain", "VP"], + ["depth", "surface"], + ["extractionAvailable", "true"], + ["abarContext", "left-dislocation"] + ], + "comment": "Landau 2026, §5 (44a). VP-ellipsis passes EIR." + }, + { + "id": "landau2026_englishDoSo", + "source": {"bibkey": "landau-2026", "paperLabel": "(44c)"}, + "language": "stan1293", + "primaryText": "Deep Purple, I never bought their records. Led Zeppelin, I did so.", + "discourseSegments": [ + "Deep Purple, I never bought their records.", + "Led Zeppelin, I did so." + ], + "glossedTokens": [], + "translation": "Deep Purple, I never bought their records. Led Zeppelin, I did so.", + "context": "do so, deep VP anaphor: left-dislocation with resumptive binding into do so is ungrammatical.", + "judgment": "ungrammatical", + "paperFeatures": [ + ["domain", "VP"], + ["depth", "deep"], + ["extractionAvailable", "true"], + ["abarContext", "left-dislocation"] + ], + "comment": "Landau 2026, §5 (44c). do so fails EIR; Ā-extraction is also impossible, but that is ambiguous between deep anaphor and derivational bleeding. EIR resolves the ambiguity: do so is deep [bruening-2019]." + }, + { + "id": "landau2026_dutchDatDoen", + "source": {"bibkey": "landau-2026", "paperLabel": "(45a)"}, + "language": "dutc1256", + "primaryText": "Jan, ik heb zijn muffins al eens geproefd; maar Wim, ik heb dat nog niet gedaan.", + "glossedTokens": [], + "translation": "Jan, I have already tasted his muffins once, but Wim, I haven't done that.", + "context": "dat doen 'do that', deep VP anaphor: blocks most Ā-extractions and fails EIR.", + "judgment": "ungrammatical", + "paperFeatures": [ + ["domain", "VP"], + ["depth", "deep"], + ["extractionAvailable", "true"], + ["abarContext", "left-dislocation"] + ], + "comment": "Landau 2026, §5 (45a). dat doen fails EIR (Marcel den Dikken, p.c.)." + }, + { + "id": "landau2026_danishDet", + "source": {"bibkey": "landau-2026", "paperLabel": "(46b)"}, + "language": "dani1285", + "primaryText": "Den gamle bager, jeg har smagt hans rugbrød, men den nye bager, jeg har det ikke.", + "glossedTokens": [], + "translation": "The old baker, I have tasted his rye bread, but the new baker, I haven't.", + "context": "det 'it', deep VP anaphor: allows A-dependencies but not Ā-dependencies, so it fails EIR.", + "judgment": "ungrammatical", + "paperFeatures": [ + ["domain", "VP"], + ["depth", "deep"], + ["extractionAvailable", "true"], + ["abarContext", "left-dislocation"] + ], + "comment": "Landau 2026, §5 (46b). det fails EIR (Line Mikkelsen, p.c.). The grammatical baseline is (46a)." + }, + { + "id": "landau2026_koreanNullObj", + "source": {"bibkey": "landau-2026", "paperLabel": "(47b)"}, + "language": "kore1280", + "primaryText": "Na-uy i chinkwu, nay-ka ku-uy cha-lul sasse. Nay-uy ce chinkwu, nay-ka phallasse.", + "discourseSegments": [ + "Na-uy i chinkwu, nay-ka ku-uy cha-lul sasse.", + "Nay-uy ce chinkwu, nay-ka phallasse." + ], + "glossedTokens": [], + "translation": "This friend of mine, I bought his car. THAT friend of mine, I sold his car.", + "context": "Korean null object, deep anaphor (pro): left-dislocation mandates a resumptive, but the null object fails to host one.", + "judgment": "ungrammatical", + "paperFeatures": [ + ["domain", "DP"], + ["depth", "deep"], + ["extractionAvailable", "true"], + ["abarContext", "left-dislocation"] + ], + "comment": "Landau 2026, §5 (47b). Korean null object fails EIR (Heejeong Ko, p.c.), supporting the pro analysis over AE. The grammatical baseline is (47a)." + } +] diff --git a/Linglib/Data/Examples/Landau2026.lean b/Linglib/Data/Examples/Landau2026.lean new file mode 100644 index 000000000..33044e23c --- /dev/null +++ b/Linglib/Data/Examples/Landau2026.lean @@ -0,0 +1,216 @@ +import Linglib.Data.Examples.Schema + +/-! +# `Landau2026` — typed example data + +Auto-generated from `Linglib/Data/Examples/Landau2026.json` by +`scripts/gen_examples.py`. Do not edit by hand; edit the JSON and re-run +the generator. Consumers (the paper's study file, test-suite hubs) import +this module; declarations live in `namespace Landau2026.Examples`. +-/ + +namespace Landau2026.Examples + +open Data.Examples + +def hebrewEN : LinguisticExample := + { id := "landau2026_hebrewEN" + source := ⟨"landau-2026", "(18a)"⟩ + reportedIn := none + language := "hebr1245" + primaryText := "yeš la'hakot še-ahavti et ha-šeni." + discourseSegments := [] + glossedTokens := [] + translation := "(There are bands that I liked the second one.)" + context := "Empty noun (EN), deep anaphor: a bare n head hosting a resumptive bound by an Ā-operator." + judgment := .ungrammatical + alternatives := [] + readings := [] + paperFeatures := [("domain", "nP"), ("depth", "deep"), ("extractionAvailable", "false"), ("abarContext", "restrictive relative, interrogative, free relative")] + comment := "Landau 2026, §2.2 (18a). EN fails EIR: no internal structure to host the resumptive, so the Ā-operator violates the BVQ. NP-ellipsis previously established; EIR gives additional confirmation via the EN/ENP contrast." + metaLanguage := "stan1293" + lgrConformance := "" } + +def hebrewENP : LinguisticExample := + { id := "landau2026_hebrewENP" + source := ⟨"landau-2026", "(19a)"⟩ + reportedIn := none + language := "hebr1245" + primaryText := "yeš la'hakot še-ahavti et ha-albom ha-rišon šela'hen, ve-yeš la'hakot še-ahavti et ha-šeni." + discourseSegments := [] + glossedTokens := [] + translation := "There are bands that I liked their first album, and there are bands that I liked their second one." + context := "Elided noun phrase (ENP), surface anaphor: full nP structure deleted under identity, licensed by [E] on Num." + judgment := .acceptable + alternatives := [] + readings := [] + paperFeatures := [("domain", "nP"), ("depth", "surface"), ("extractionAvailable", "false"), ("abarContext", "restrictive relative, interrogative, maximizing relative")] + comment := "Landau 2026, §2.2 (19a). ENP passes EIR: the resumptive inside the elided nP supplies the variable the operator binds." + metaLanguage := "stan1293" + lgrConformance := "" } + +def hebrewNCA_DP : LinguisticExample := + { id := "landau2026_hebrewNCA_DP" + source := ⟨"landau-2026", "(31a)"⟩ + reportedIn := none + language := "hebr1245" + primaryText := "yeš li xaver exad še-maxarti." + discourseSegments := [] + glossedTokens := [] + translation := "(I have one friend that I sold.)" + context := "Null complement anaphora (NCA) / pro, deep anaphor in the DP domain." + judgment := .ungrammatical + alternatives := [] + readings := [] + paperFeatures := [("domain", "DP"), ("depth", "deep"), ("extractionAvailable", "false"), ("abarContext", "restrictive relative, interrogative, free relative")] + comment := "Landau 2026, §3.2 (31a). NCA/pro fails EIR: no internal structure. The existence of AE in Hebrew was debated; EIR provides a novel argument." + metaLanguage := "stan1293" + lgrConformance := "" } + +def hebrewAE : LinguisticExample := + { id := "landau2026_hebrewAE" + source := ⟨"landau-2026", "(32a)"⟩ + reportedIn := none + language := "hebr1245" + primaryText := "yeš li xaver še-kaniti et ha-oto šelo, ve-yeš li xaver axer še-maxarti." + discourseSegments := [] + glossedTokens := [] + translation := "I have one friend whose car I bought and I have another one whose car I sold." + context := "Argument ellipsis (AE) / DP-ellipsis, surface anaphor: full DP structure deleted under identity." + judgment := .acceptable + alternatives := [] + readings := [] + paperFeatures := [("domain", "DP"), ("depth", "surface"), ("extractionAvailable", "false"), ("abarContext", "restrictive relative, interrogative, free relative")] + comment := "Landau 2026, §3.2 (32a). AE passes EIR: a resumptive inside the elided DP supplies the variable. Novel argument for AE in Hebrew." + metaLanguage := "stan1293" + lgrConformance := "" } + +def hebrewNCA_PP : LinguisticExample := + { id := "landau2026_hebrewNCA_PP" + source := ⟨"landau-2026", "(37a)"⟩ + reportedIn := none + language := "hebr1245" + primaryText := "yeš irgunim še-ani lo xotemet." + discourseSegments := [] + glossedTokens := [] + translation := "(There are organizations that I don't sign.)" + context := "Null PP via NCA, deep anaphor: PP argument omitted, content recovered pragmatically." + judgment := .ungrammatical + alternatives := [] + readings := [] + paperFeatures := [("domain", "PP"), ("depth", "deep"), ("extractionAvailable", "false"), ("abarContext", "restrictive relative, interrogative, free relative")] + comment := "Landau 2026, §4.2 (37a). Null PP via NCA fails EIR: no internal structure." + metaLanguage := "stan1293" + lgrConformance := "" } + +def hebrewPPE : LinguisticExample := + { id := "landau2026_hebrewPPE" + source := ⟨"landau-2026", "(38a)"⟩ + reportedIn := none + language := "hebr1245" + primaryText := "yeš irgunim še-ani xotemet al ha-acumot šelahem, ve-yeš irgunim še-ani lo xotemet." + discourseSegments := [] + glossedTokens := [] + translation := "There are organizations that I sign their petitions and there are organizations that I don't." + context := "PP-ellipsis (PPE), surface anaphor: full PP structure deleted under identity. Data are Landau's own (first documented in Vardi 2022)." + judgment := .acceptable + alternatives := [] + readings := [] + paperFeatures := [("domain", "PP"), ("depth", "surface"), ("extractionAvailable", "false"), ("abarContext", "restrictive relative, interrogative, free relative")] + comment := "Landau 2026, §4.2 (38a). PPE passes EIR: first robust evidence for PP-ellipsis; the paper argues this holds cross-linguistically, not only in Hebrew." + metaLanguage := "stan1293" + lgrConformance := "" } + +def englishVPE : LinguisticExample := + { id := "landau2026_englishVPE" + source := ⟨"landau-2026", "(44a)"⟩ + reportedIn := none + language := "stan1293" + primaryText := "Deep Purple, I never bought their records. Led Zeppelin, I did." + discourseSegments := ["Deep Purple, I never bought their records.", "Led Zeppelin, I did."] + glossedTokens := [] + translation := "Deep Purple, I never bought their records. Led Zeppelin, I did." + context := "VP-ellipsis, surface anaphor: left-dislocated constituent binds a resumptive possessive inside the elided VP. Contrastive baseline for do so." + judgment := .acceptable + alternatives := [] + readings := [] + paperFeatures := [("domain", "VP"), ("depth", "surface"), ("extractionAvailable", "true"), ("abarContext", "left-dislocation")] + comment := "Landau 2026, §5 (44a). VP-ellipsis passes EIR." + metaLanguage := "stan1293" + lgrConformance := "" } + +def englishDoSo : LinguisticExample := + { id := "landau2026_englishDoSo" + source := ⟨"landau-2026", "(44c)"⟩ + reportedIn := none + language := "stan1293" + primaryText := "Deep Purple, I never bought their records. Led Zeppelin, I did so." + discourseSegments := ["Deep Purple, I never bought their records.", "Led Zeppelin, I did so."] + glossedTokens := [] + translation := "Deep Purple, I never bought their records. Led Zeppelin, I did so." + context := "do so, deep VP anaphor: left-dislocation with resumptive binding into do so is ungrammatical." + judgment := .ungrammatical + alternatives := [] + readings := [] + paperFeatures := [("domain", "VP"), ("depth", "deep"), ("extractionAvailable", "true"), ("abarContext", "left-dislocation")] + comment := "Landau 2026, §5 (44c). do so fails EIR; Ā-extraction is also impossible, but that is ambiguous between deep anaphor and derivational bleeding. EIR resolves the ambiguity: do so is deep [bruening-2019]." + metaLanguage := "stan1293" + lgrConformance := "" } + +def dutchDatDoen : LinguisticExample := + { id := "landau2026_dutchDatDoen" + source := ⟨"landau-2026", "(45a)"⟩ + reportedIn := none + language := "dutc1256" + primaryText := "Jan, ik heb zijn muffins al eens geproefd; maar Wim, ik heb dat nog niet gedaan." + discourseSegments := [] + glossedTokens := [] + translation := "Jan, I have already tasted his muffins once, but Wim, I haven't done that." + context := "dat doen 'do that', deep VP anaphor: blocks most Ā-extractions and fails EIR." + judgment := .ungrammatical + alternatives := [] + readings := [] + paperFeatures := [("domain", "VP"), ("depth", "deep"), ("extractionAvailable", "true"), ("abarContext", "left-dislocation")] + comment := "Landau 2026, §5 (45a). dat doen fails EIR (Marcel den Dikken, p.c.)." + metaLanguage := "stan1293" + lgrConformance := "" } + +def danishDet : LinguisticExample := + { id := "landau2026_danishDet" + source := ⟨"landau-2026", "(46b)"⟩ + reportedIn := none + language := "dani1285" + primaryText := "Den gamle bager, jeg har smagt hans rugbrød, men den nye bager, jeg har det ikke." + discourseSegments := [] + glossedTokens := [] + translation := "The old baker, I have tasted his rye bread, but the new baker, I haven't." + context := "det 'it', deep VP anaphor: allows A-dependencies but not Ā-dependencies, so it fails EIR." + judgment := .ungrammatical + alternatives := [] + readings := [] + paperFeatures := [("domain", "VP"), ("depth", "deep"), ("extractionAvailable", "true"), ("abarContext", "left-dislocation")] + comment := "Landau 2026, §5 (46b). det fails EIR (Line Mikkelsen, p.c.). The grammatical baseline is (46a)." + metaLanguage := "stan1293" + lgrConformance := "" } + +def koreanNullObj : LinguisticExample := + { id := "landau2026_koreanNullObj" + source := ⟨"landau-2026", "(47b)"⟩ + reportedIn := none + language := "kore1280" + primaryText := "Na-uy i chinkwu, nay-ka ku-uy cha-lul sasse. Nay-uy ce chinkwu, nay-ka phallasse." + discourseSegments := ["Na-uy i chinkwu, nay-ka ku-uy cha-lul sasse.", "Nay-uy ce chinkwu, nay-ka phallasse."] + glossedTokens := [] + translation := "This friend of mine, I bought his car. THAT friend of mine, I sold his car." + context := "Korean null object, deep anaphor (pro): left-dislocation mandates a resumptive, but the null object fails to host one." + judgment := .ungrammatical + alternatives := [] + readings := [] + paperFeatures := [("domain", "DP"), ("depth", "deep"), ("extractionAvailable", "true"), ("abarContext", "left-dislocation")] + comment := "Landau 2026, §5 (47b). Korean null object fails EIR (Heejeong Ko, p.c.), supporting the pro analysis over AE. The grammatical baseline is (47a)." + metaLanguage := "stan1293" + lgrConformance := "" } + +def all : List LinguisticExample := [hebrewEN, hebrewENP, hebrewNCA_DP, hebrewAE, hebrewNCA_PP, hebrewPPE, englishVPE, englishDoSo, dutchDatDoen, danishDet, koreanNullObj] + +end Landau2026.Examples diff --git a/Linglib/Studies/Landau2026.lean b/Linglib/Studies/Landau2026.lean index 8663c23ec..4b4f29711 100644 --- a/Linglib/Studies/Landau2026.lean +++ b/Linglib/Studies/Landau2026.lean @@ -1,71 +1,36 @@ +import Linglib.Syntax.Anaphora.Diagnostic +import Linglib.Data.Examples.Landau2026 import Linglib.Typology.RelativeClause.Basic import Linglib.Fragments.Hebrew.Relativization /-! # Landau 2026: Silent Resumption [landau-2026] -A New Test for Ellipsis. *Linguistic Inquiry*, Early Access. - -## The EIR Test - -The **Ellipsis-Internal Resumption** (EIR) test: a novel diagnostic for -distinguishing deep from surface anaphora ([hankamer-sag-1976]). - -The argumentation chain: - -1. **BVQ** ([chomsky-1982]): at LF, every Ā-operator must bind - some variable. -2. A resumptive pronoun inside a null constituent serves as a variable. -3. A resumptive pronoun can only exist inside a constituent with - LF-visible internal structure. -4. Therefore: an Ā-operator can bind into a null element iff - it is a surface anaphor (= ellipsis). - -The EIR test has a distinctive advantage over the extraction test. -When extraction fails out of a null element, the result is ambiguous: -the element could be a deep anaphor (no structure → BVQ violation), -*or* a surface anaphor where derivational timing bleeds Ā-extraction. -When EIR fails, only the deep-anaphor explanation survives, because -resumptive dependencies are established at LF without intermediate -movement steps that ellipsis could bleed. - -## Hebrew Results - -Three ellipsis types confirmed via EIR in domains where extraction -is impossible (Hebrew DPs are absolute islands; P-stranding is barred): -- **nP-ellipsis**: previously established; EIR provides additional - confirmation via EN/ENP contrast -- **DP-ellipsis**: debated; EIR provides novel argument for AE -- **PP-ellipsis**: novel; EIR provides first robust evidence for PPE - -## Cross-Linguistic Mixed Anaphors - -EIR diagnoses contested "mixed anaphors" as deep: -- English *do so* — fails EIR (cf. VP-ellipsis, which passes) -- Dutch *dat doen* — fails EIR -- Danish *det* — fails EIR -- Korean null objects — fail EIR (supporting *pro* over AE) +The **Ellipsis-Internal Resumption** (EIR) test distinguishes deep from +surface anaphora ([hankamer-sag-1976]). By the Ban on Vacuous Quantification +([chomsky-1982]), an Ā-operator can bind into a null site iff the site has +LF-visible structure to host a resumptive variable — iff it is a surface +anaphor (ellipsis). EIR improves on the extraction test: resumptive +dependencies are fixed at LF, so EIR failure cannot be bled by derivational +timing and is unambiguously diagnostic of a deep anaphor. + +`allData` gathers the paper's rows — Hebrew nP/DP/PP ellipsis (where the +extraction test is unavailable: DPs are absolute islands and P-stranding is +barred) and the cross-linguistic "mixed anaphors" (English *do so*, Dutch +*dat doen*, Danish *det*, Korean null objects), all diagnosed deep. -/ namespace Landau2026 open RelativeClause +open Anaphor (Depth DepthCause) +open Data.Examples (LinguisticExample) --- ═══════════════════════════════════════════════════════════════ --- § 1: Types --- ═══════════════════════════════════════════════════════════════ - -/-- Anaphoric depth: whether a null element has internal syntactic - structure at LF. [hankamer-sag-1976] +/-! ### Types - - Deep: no LF-visible structure; content recovered pragmatically - or deictically. EN, NCA, *pro*, *do so*, *dat doen*, *det*. - - Surface: full structure, phonologically deleted under identity - with a linguistic antecedent. VP-ellipsis, ENP, AE, PPE. -/ -inductive AnaphorDepth where - | deep - | surface - deriving DecidableEq, Repr +Anaphoric depth (deep/surface, [hankamer-sag-1976]) is the framework-neutral +substrate primitive `Anaphor.Depth`; EN/NCA/*pro*/*do so*/*dat doen*/*det* are +`.deep`, VP-ellipsis/ENP/AE/PPE are `.surface`. -/ /-- Syntactic domain of the null element. -/ inductive NullDomain where @@ -75,359 +40,218 @@ inductive NullDomain where | VP -- Verb phrase deriving DecidableEq, Repr --- ═══════════════════════════════════════════════════════════════ --- § 2: BVQ and EIR — Derivation Chain --- ═══════════════════════════════════════════════════════════════ - -/-- Step 1: A null element has LF-visible internal structure - iff it is a surface anaphor. -/ -def AnaphorDepth.hasStructure : AnaphorDepth → Bool - | .surface => true - | .deep => false - -/-- Step 2: A resumptive pronoun (= variable) can only be hosted - inside a constituent with internal structure; there must be a - syntactic position for it to occupy. -/ -def canContainVariable (d : AnaphorDepth) : Bool := d.hasStructure - -/-- Step 3: **BVQ** — an Ā-operator binding into a site is - well-formed iff the site provides a variable to bind. -/ -def bvqSatisfied (siteHasVariable : Bool) : Bool := siteHasVariable - -/-- **EIR prediction**, derived from the chain: - structure → can host resumptive → BVQ satisfied → grammatical. -/ -def eirPrediction (d : AnaphorDepth) : Bool := - bvqSatisfied (canContainVariable d) - -/-- The derivation chain collapses: EIR passes iff the null - element has internal structure. -/ -theorem eir_iff_structure (d : AnaphorDepth) : - eirPrediction d = d.hasStructure := by - cases d <;> rfl - --- ═══════════════════════════════════════════════════════════════ --- § 3: EIR vs Extraction — Diagnostic Comparison --- ═══════════════════════════════════════════════════════════════ - -/-- What can be concluded from a diagnostic test result. -/ -inductive Conclusion where - | definitelySurface - | definitelyDeep - | inconclusive - deriving DecidableEq, Repr - -/-- The extraction test. Success is unambiguous (the operator - binds a trace inside the overt structure → surface anaphor). - Failure is ambiguous: it could mean the element is deep - (no structure → BVQ), *or* that it is surface but derivational - timing bleeds Ā-movement through the ellipsis site. -/ -def extractionConclusion (success : Bool) : Conclusion := - if success then .definitelySurface else .inconclusive - -/-- The EIR test. Both outcomes are unambiguous. Resumptive - dependencies are established purely at LF (binding, not - movement), so there is no derivational step for ellipsis - timing to bleed. EIR is also insensitive to island constraints, - since resumption freely crosses islands. -/ -def eirConclusion (success : Bool) : Conclusion := - if success then .definitelySurface else .definitelyDeep - -/-- EIR is never inconclusive. -/ -theorem eir_always_conclusive (b : Bool) : - eirConclusion b ≠ .inconclusive := by - cases b <;> simp [eirConclusion] - -/-- Extraction failure is inherently inconclusive. -/ -theorem extraction_inconclusive_on_failure : - extractionConclusion false = .inconclusive := rfl - -/-- When extraction succeeds, it agrees with EIR: both conclude - surface. This means EIR is a strict refinement — it agrees - where extraction is informative, and resolves the cases where - extraction is not. -/ -theorem eir_refines_extraction (b : Bool) : - extractionConclusion b = .definitelySurface → - eirConclusion b = .definitelySurface := by - cases b <;> simp [extractionConclusion, eirConclusion] - --- ═══════════════════════════════════════════════════════════════ --- § 4: Data --- ═══════════════════════════════════════════════════════════════ - -/-- A datum for the Ellipsis-Internal Resumption test. -/ -structure EIRDatum where - language : String - nullElement : String - domain : NullDomain - depth : AnaphorDepth - /-- Does the null element pass the EIR test? - (= can it host a resumptive pronoun bound by an Ā-operator?) -/ - eirGrammatical : Bool - /-- Is extraction from this domain possible in the language? - When `false`, the extraction test is inapplicable and EIR - is the only viable syntactic diagnostic. -/ - extractionAvailable : Bool := true - abarContext : String := "" - source : String := "" - deriving Repr - --- ─────────────────────────────────────────────────────────────── --- § 4.1: Hebrew nP Domain --- ─────────────────────────────────────────────────────────────── - -/-- Empty noun (EN): deep anaphor. A bare n head; content recovered - from a restricted deictic set (PERSON, THING, TIME, PLACE). - No linguistic antecedent required. Fails EIR. - NP-ellipsis in Hebrew is previously established; EIR provides - additional confirmation. -/ -def hebrewEN : EIRDatum := - { language := "Hebrew" - nullElement := "Empty Noun (EN)" - domain := .nP - depth := .deep - eirGrammatical := false - extractionAvailable := false - abarContext := "restrictive relative, interrogative, free relative" - source := "§2.2, (18a–d)" } - -/-- Elided noun phrase (ENP): surface anaphor. Full nP structure - (root + arguments) deleted under identity with a linguistic - antecedent; licensed by [E] on Num. Passes EIR: the resumptive - pronoun inside the elided nP provides a variable. -/ -def hebrewENP : EIRDatum := - { language := "Hebrew" - nullElement := "Elided Noun Phrase (ENP)" - domain := .nP - depth := .surface - eirGrammatical := true - extractionAvailable := false - abarContext := "restrictive relative, interrogative, maximizing relative" - source := "§2.2, (19a–d)" } - --- ─────────────────────────────────────────────────────────────── --- § 4.2: Hebrew DP Domain --- ─────────────────────────────────────────────────────────────── - -/-- Null complement anaphora (NCA) / *pro*: deep anaphor. - No internal structure; content recovered pragmatically. - Fails EIR. The existence of AE in Hebrew was debated; - the EIR test provides a novel argument. -/ -def hebrewNCA_DP : EIRDatum := - { language := "Hebrew" - nullElement := "NCA / pro (null object)" - domain := .DP - depth := .deep - eirGrammatical := false - extractionAvailable := false - abarContext := "restrictive relative, interrogative, free relative" - source := "§3.2, (31a–d)" } - -/-- Argument ellipsis (AE) / DP-ellipsis: surface anaphor. - Full DP structure deleted under identity with a linguistic - antecedent. Passes EIR. Novel argument for AE in Hebrew. -/ -def hebrewAE : EIRDatum := - { language := "Hebrew" - nullElement := "Argument Ellipsis (AE)" - domain := .DP - depth := .surface - eirGrammatical := true - extractionAvailable := false - abarContext := "restrictive relative, interrogative, free relative" - source := "§3.2, (32a–d)" } - --- ─────────────────────────────────────────────────────────────── --- § 4.3: Hebrew PP Domain --- ─────────────────────────────────────────────────────────────── - -/-- Null PP via NCA: deep anaphor. PP argument omitted; content - recovered pragmatically. Fails EIR. -/ -def hebrewNCA_PP : EIRDatum := - { language := "Hebrew" - nullElement := "NCA (null PP)" - domain := .PP - depth := .deep - eirGrammatical := false - extractionAvailable := false - abarContext := "restrictive relative, interrogative, free relative" - source := "§4.2, (37a–d)" } - -/-- PP-ellipsis (PPE): surface anaphor. Full PP structure deleted - under identity with a linguistic antecedent. Passes EIR. - First robust evidence for PP-ellipsis; the paper argues this - holds cross-linguistically, not only in Hebrew. -/ -def hebrewPPE : EIRDatum := - { language := "Hebrew" - nullElement := "PP-Ellipsis (PPE)" - domain := .PP - depth := .surface - eirGrammatical := true - extractionAvailable := false - abarContext := "restrictive relative, interrogative, free relative" - source := "§4.2, (38a–d)" } - --- ─────────────────────────────────────────────────────────────── --- § 4.4: Cross-Linguistic Mixed Anaphors --- ─────────────────────────────────────────────────────────────── - -/-- English VP-ellipsis: surface anaphor. Left-dislocated - constituent binds a resumptive possessive inside the elided VP. - Passes EIR. Contrastive baseline for *do so*. -/ -def englishVPE : EIRDatum := - { language := "English" - nullElement := "VP-ellipsis" - domain := .VP - depth := .surface - eirGrammatical := true - abarContext := "left-dislocation" - source := "§5, (44a–b)" } - -/-- English *do so*: deep VP anaphor. Left-dislocation with - resumptive binding into *do so* is ungrammatical. - Ā-extraction is also impossible, but that is ambiguous between - deep anaphor and derivational bleeding. EIR resolves the - ambiguity: *do so* is deep. [bruening-2019] -/ -def englishDoSo : EIRDatum := - { language := "English" - nullElement := "do so" - domain := .VP - depth := .deep - eirGrammatical := false - abarContext := "left-dislocation" - source := "§5, (44c–d)" } - -/-- Dutch *dat doen* 'do that': deep VP anaphor. - Blocks most Ā-extractions. Fails EIR. -/ -def dutchDatDoen : EIRDatum := - { language := "Dutch" - nullElement := "dat doen" - domain := .VP - depth := .deep - eirGrammatical := false - abarContext := "left-dislocation" - source := "§5, (45a–b)" } - -/-- Danish *det* 'it': deep VP anaphor. Allows A-dependencies - but not Ā-dependencies. Fails EIR. -/ -def danishDet : EIRDatum := - { language := "Danish" - nullElement := "det" - domain := .VP - depth := .deep - eirGrammatical := false - abarContext := "left-dislocation" - source := "§5, (46a–b)" } - -/-- Korean null objects: deep anaphor (*pro*). Left-dislocation - mandates a resumptive in Korean, but null objects fail to host - one — supporting the *pro* analysis over AE. -/ -def koreanNullObj : EIRDatum := - { language := "Korean" - nullElement := "null object" - domain := .DP - depth := .deep - eirGrammatical := false - abarContext := "left-dislocation" - source := "§5, (47a–b)" } - --- ═══════════════════════════════════════════════════════════════ --- § 5: Data Collections --- ═══════════════════════════════════════════════════════════════ - -def hebrewData : List EIRDatum := - [hebrewEN, hebrewENP, hebrewNCA_DP, hebrewAE, hebrewNCA_PP, hebrewPPE] - -def mixedAnaphorData : List EIRDatum := - [englishDoSo, dutchDatDoen, danishDet, koreanNullObj] - -def crossLingData : List EIRDatum := - [englishVPE] ++ mixedAnaphorData - -def allData : List EIRDatum := hebrewData ++ crossLingData - --- ═══════════════════════════════════════════════════════════════ --- § 6: Per-Datum Verification --- ═══════════════════════════════════════════════════════════════ - --- Hebrew nP -theorem en_eir : hebrewEN.eirGrammatical = eirPrediction hebrewEN.depth := rfl -theorem enp_eir : hebrewENP.eirGrammatical = eirPrediction hebrewENP.depth := rfl - --- Hebrew DP -theorem nca_dp_eir : - hebrewNCA_DP.eirGrammatical = eirPrediction hebrewNCA_DP.depth := rfl -theorem ae_eir : hebrewAE.eirGrammatical = eirPrediction hebrewAE.depth := rfl - --- Hebrew PP -theorem nca_pp_eir : - hebrewNCA_PP.eirGrammatical = eirPrediction hebrewNCA_PP.depth := rfl -theorem ppe_eir : hebrewPPE.eirGrammatical = eirPrediction hebrewPPE.depth := rfl - --- Cross-linguistic -theorem vpe_eir : englishVPE.eirGrammatical = eirPrediction englishVPE.depth := rfl -theorem do_so_eir : - englishDoSo.eirGrammatical = eirPrediction englishDoSo.depth := rfl -theorem dat_doen_eir : - dutchDatDoen.eirGrammatical = eirPrediction dutchDatDoen.depth := rfl -theorem danish_det_eir : - danishDet.eirGrammatical = eirPrediction danishDet.depth := rfl -theorem korean_null_eir : - koreanNullObj.eirGrammatical = eirPrediction koreanNullObj.depth := rfl - --- ═══════════════════════════════════════════════════════════════ --- § 7: Summary Properties --- ═══════════════════════════════════════════════════════════════ - -/-- All data are consistent: every datum's observed EIR result - matches the prediction from its depth classification. -/ +/-! ### BVQ and the EIR prediction -/ + +/-- **EIR prediction.** A null site passes the Ellipsis-Internal Resumption test + iff it has LF-visible internal structure (`Anaphor.Depth.HasInternalStructure`) + — iff it is a surface anaphor. The paper's argument: by the BVQ + ([chomsky-1982]) an Ā-operator must bind a variable at LF; a resumptive + pronoun is that variable, and it can only be hosted inside a site with + internal structure; so binding into a null site is licensed iff the site is a + surface anaphor. The prediction is therefore *literally* the substrate + structural property, not a separate stipulation. -/ +abbrev PassesEIR (d : Depth) : Prop := d.HasInternalStructure + +/-! ### EIR vs the classic tests (extraction, agreement) + +All three are `Diagnostic`s over `Anaphor.Depth`: a pass means the site had +structure to host the variable/trace (surface). They differ only in what a +*failure* admits — and that single field is the paper's whole point. Extraction and +agreement are the two *reliable* classic diagnostics ([landau-2026], after +[merchant-2001]); EIR is the new third one. -/ + +/-- The **EIR test** as a depth diagnostic. A pass is `hostsVariable` (surface + structure hosts the resumptive); a failure can *only* be a `deepAnaphor`, + because resumptive dependencies are established at LF and cannot be bled by + derivational timing, nor blocked by islands. -/ +def eir : Diagnostic Bool Depth := + .ofCauses (fun pass => if pass then {.hostsVariable} else {.deepAnaphor}) DepthCause.entails + +/-- The **extraction test** as a depth diagnostic. A pass is `hostsVariable` + (surface); a failure is ambiguous — a `deepAnaphor`, or a *surface* site whose + Ā-movement was `derivationalBleeding` (bled by ellipsis timing) or + `islandBlocking`. That extra ambiguity is exactly the gap EIR closes. -/ +def extraction : Diagnostic Bool Depth := + .ofCauses + (fun pass => if pass then {.hostsVariable} + else {.deepAnaphor, .derivationalBleeding, .islandBlocking}) + DepthCause.entails + +@[simp] theorem eir_consistent_true : eir.consistent true = {Depth.surface} := by + simp [eir, DepthCause.entails] + +@[simp] theorem eir_consistent_false : eir.consistent false = {Depth.deep} := by + simp [eir, DepthCause.entails] + +@[simp] theorem extraction_consistent_true : extraction.consistent true = {Depth.surface} := by + simp [extraction, DepthCause.entails] + +@[simp] theorem extraction_consistent_false : + extraction.consistent false = {Depth.deep, Depth.surface} := by + simp [extraction, DepthCause.entails, Set.image_insert_eq] + +/-- **Headline — Landau's contribution.** EIR *decides* anaphoric depth: on every + site the verdict recovers the depth exactly — a pass means surface (ellipsis), + a failure means deep. This is what "a new test for ellipsis" amounts to, + made precise as `Diagnostic.Decides`. -/ +theorem eir_decides : eir.Decides Depth.testOutcome := + fun d => by cases d <;> simp [Depth.testOutcome] + +/-- Extraction does **not** decide depth: on a deep site its verdict still admits + `surface` (the failure could be a bled or island-blocked extraction), so it is + inconclusive exactly where EIR is decisive. -/ +theorem extraction_not_decides : ¬ extraction.Decides Depth.testOutcome := by + intro h + have hmem : Depth.surface ∈ extraction.consistent (Depth.testOutcome .deep) := + ⟨.derivationalBleeding, by simp [Depth.testOutcome], rfl⟩ + rw [h .deep] at hmem + simp at hmem + +/-- EIR strictly refines extraction: identical on a pass, but EIR resolves the + failure extraction leaves open (its failure-causes are a subset). -/ +theorem eir_refines_extraction : eir.Refines extraction := by + intro o c hc + cases o <;> simp_all [eir, extraction, DepthCause.entails] + +/-- The **agreement test** as a depth diagnostic. The other *reliable* classic + diagnostic ([landau-2026], after [merchant-2001]). A pass is `hostsVariable` + (surface); a failure is ambiguous between a `deepAnaphor` and a surface site + whose agreement was `derivationalBleeding` (bled by ellipsis timing). Agreement + is not movement, so it has no island confound — but it is still inconclusive on + failure. (It is also inapplicable in agreement-less languages, the analogue of + `extractionAvailable = false`.) -/ +def agreement : Diagnostic Bool Depth := + .ofCauses + (fun pass => if pass then {.hostsVariable} else {.deepAnaphor, .derivationalBleeding}) + DepthCause.entails + +@[simp] theorem agreement_consistent_false : + agreement.consistent false = {Depth.deep, Depth.surface} := by + simp [agreement, DepthCause.entails, Set.image_insert_eq] + +/-- Agreement, like extraction, does **not** decide depth: its failure also admits + a bled-but-surface site. EIR escapes this confound too. -/ +theorem agreement_not_decides : ¬ agreement.Decides Depth.testOutcome := by + intro h + have hmem : Depth.surface ∈ agreement.consistent (Depth.testOutcome .deep) := + ⟨.derivationalBleeding, by simp [Depth.testOutcome], rfl⟩ + rw [h .deep] at hmem + simp at hmem + +/-- EIR refines agreement too — so EIR decides depth where *both* reliable classic + tests are inconclusive, escaping both their derivational-bleeding (and, for + extraction, island) confounds. This is Landau's actual headline: EIR matches the + confidence of the classic diagnostics while their failure-confounds it lacks. -/ +theorem eir_refines_agreement : eir.Refines agreement := by + intro o c hc + cases o <;> simp_all [eir, agreement, DepthCause.entails] + +/-! ### Data + +The empirical rows live in `Data/Examples/Landau2026.json` (typed +`LinguisticExample`s — the paper's (18a)–(47b) stimuli). The study reads each +row's EIR-relevant classification by projection: `domain`/`depth`/extraction from +`paperFeatures`, and grammaticality straight off the example's `judgment` — +passing EIR *is* the resumptive-binding sentence being acceptable. -/ + +private def parseDomain : String → NullDomain + | "DP" => .DP | "PP" => .PP | "VP" => .VP | _ => .nP + +private def parseDepth : String → Depth + | "surface" => .surface | _ => .deep + +/-- The null-element domain of an EIR example (from `paperFeatures`). -/ +def domainOf (e : LinguisticExample) : NullDomain := + parseDomain ((e.paperFeatures.lookup "domain").getD "nP") + +/-- The Hankamer–Sag `Anaphor.Depth` of an EIR example (from `paperFeatures`). -/ +def depthOf (e : LinguisticExample) : Depth := + parseDepth ((e.paperFeatures.lookup "depth").getD "deep") + +/-- Whether the example passes EIR — this *is* its grammaticality `judgment`: the + resumptive-binding sentence is acceptable iff the site hosts the variable. -/ +def eirGrammatical (e : LinguisticExample) : Bool := + match e.judgment with | .acceptable => true | _ => false + +/-- Whether the extraction test is applicable to the example's domain (`false` + for the Hebrew nominal domains, which are absolute islands). -/ +def extractionAvailable (e : LinguisticExample) : Bool := + (e.paperFeatures.lookup "extractionAvailable").getD "true" == "true" + +/-! ### Data collections + +EN/NCA/*pro*/*do so*/*dat doen*/*det* are deep; ENP/AE/PPE/VP-ellipsis surface. +Each example's `comment` in the JSON carries the construction notes. -/ + +def hebrewData : List LinguisticExample := + [Examples.hebrewEN, Examples.hebrewENP, Examples.hebrewNCA_DP, + Examples.hebrewAE, Examples.hebrewNCA_PP, Examples.hebrewPPE] + +def mixedAnaphorData : List LinguisticExample := + [Examples.englishDoSo, Examples.dutchDatDoen, Examples.danishDet, Examples.koreanNullObj] + +def crossLingData : List LinguisticExample := + Examples.englishVPE :: mixedAnaphorData + +def allData : List LinguisticExample := Examples.all + +/-! ### Summary properties + +Every datum's observed grammaticality (`eirGrammatical`, off its `judgment`) +matches the `PassesEIR` prediction read off its depth — the `decide p = true ↔ p` +bridge between a recorded observation and the substrate structural property. -/ + +/-- All data are consistent: every example's EIR grammaticality matches the + prediction read off its depth classification. -/ theorem all_eir_consistent : - allData.all (λ d => d.eirGrammatical == eirPrediction d.depth) = true := by - native_decide + ∀ e ∈ allData, (eirGrammatical e = true ↔ PassesEIR (depthOf e)) := by decide -/-- Hebrew has both deep and surface strategies in all three - nominal domains (nP, DP, PP). -/ +/-- Hebrew has both deep and surface strategies in all three nominal domains + (nP, DP, PP). -/ theorem hebrew_both_depths_all_domains : - hebrewData.any (λ d => d.depth == .deep && d.domain == .nP) = true ∧ - hebrewData.any (λ d => d.depth == .surface && d.domain == .nP) = true ∧ - hebrewData.any (λ d => d.depth == .deep && d.domain == .DP) = true ∧ - hebrewData.any (λ d => d.depth == .surface && d.domain == .DP) = true ∧ - hebrewData.any (λ d => d.depth == .deep && d.domain == .PP) = true ∧ - hebrewData.any (λ d => d.depth == .surface && d.domain == .PP) = true := by - exact ⟨rfl, rfl, rfl, rfl, rfl, rfl⟩ - -/-- Extraction is unavailable for all Hebrew domains tested. - This is precisely why the EIR test is needed: it provides - syntactic evidence where the extraction test cannot. -/ + (∃ e ∈ hebrewData, depthOf e = .deep ∧ domainOf e = .nP) ∧ + (∃ e ∈ hebrewData, depthOf e = .surface ∧ domainOf e = .nP) ∧ + (∃ e ∈ hebrewData, depthOf e = .deep ∧ domainOf e = .DP) ∧ + (∃ e ∈ hebrewData, depthOf e = .surface ∧ domainOf e = .DP) ∧ + (∃ e ∈ hebrewData, depthOf e = .deep ∧ domainOf e = .PP) ∧ + (∃ e ∈ hebrewData, depthOf e = .surface ∧ domainOf e = .PP) := by decide + +/-- Extraction is unavailable for all Hebrew domains tested. This is precisely + why the EIR test is needed: it provides syntactic evidence where the + extraction test cannot. -/ theorem hebrew_extraction_unavailable : - hebrewData.all (λ d => !d.extractionAvailable) = true := by - native_decide + ∀ e ∈ hebrewData, extractionAvailable e = false := by decide /-- All four cross-linguistic mixed anaphors are diagnosed as deep. -/ theorem mixed_anaphors_deep : - mixedAnaphorData.all (λ d => d.depth == .deep) = true := by - native_decide - --- ═══════════════════════════════════════════════════════════════ --- § 8: Integration with Existing Infrastructure --- ═══════════════════════════════════════════════════════════════ - -/-- Hebrew has a productive resumptive strategy in relativization - — the prerequisite for applying the EIR test. The same - resumptive pronoun type that `RelativeClause.NPRelType.resumptive` models - for relative clauses is what the EIR test probes for inside - ellipsis sites. -/ + ∀ e ∈ mixedAnaphorData, depthOf e = .deep := by decide + +/-! ### Integration with existing infrastructure -/ + +/-- Hebrew has a productive resumptive strategy in relativization — the + prerequisite for applying the EIR test. The same resumptive pronoun type + that `RelativeClause.NPRelType.resumptive` models for relative clauses is + what the EIR test probes for inside ellipsis sites. -/ theorem hebrew_has_resumptive_strategy : Hebrew.relSheResumptive.npRel = .resumptive := rfl -/-- The resumptive strategy in Hebrew relativization covers the - genitive position on the Accessibility Hierarchy, which is - where possessive resumptive pronouns (the most common type - in the EIR data) sit. -/ +/-- **The EIR test relies on base-generated resumption.** The paper's argument + turns on resumptive dependencies being fixed at LF (binding, not movement), + so ellipsis timing cannot bleed them — exactly `ResumptiveKind.bound`. The + Sichel-refined Hebrew marker `relSheBoundResumptive` carries this kind, + making the paper's "binding, not movement" mechanism true by construction + against the relativization substrate ([sichel-2014]). -/ +theorem hebrew_resumptive_is_base_generated : + Hebrew.relSheBoundResumptive.npRel.resumptiveKind = some .bound := rfl + +/-- The resumptive strategy covers the genitive position on the Accessibility + Hierarchy, where possessive resumptive pronouns (the most common type in the + EIR data) sit. -/ theorem resumptive_covers_genitive : Hebrew.relSheResumptive.Covers .genitive := by decide -/-- The gap strategy does NOT cover genitive — this is why - possessive dependencies in Hebrew require resumption, making - the EIR test applicable. -/ +/-- The gap strategy does NOT cover genitive — this is why possessive + dependencies in Hebrew require resumption, making the EIR test applicable. -/ theorem gap_excludes_genitive : ¬ Hebrew.relSheGap.Covers .genitive := by decide diff --git a/Linglib/Studies/Steedman2000.lean b/Linglib/Studies/Steedman2000.lean index a35ab69bb..ddf2c0f9d 100644 --- a/Linglib/Studies/Steedman2000.lean +++ b/Linglib/Studies/Steedman2000.lean @@ -1,4 +1,5 @@ import Linglib.Data.Examples.Steedman2000 +import Linglib.Syntax.Anaphora.Basic import Linglib.Fragments.English.Toy import Linglib.Fragments.English.Coordination import Linglib.Studies.BresnanEtAl1982 @@ -369,6 +370,23 @@ def HasWordOrderConstraints : EllipsisType → Prop instance : DecidablePred HasWordOrderConstraints := fun x => by cases x <;> unfold HasWordOrderConstraints <;> infer_instance +/-- All four of Steedman's elliptical constructions are *surface* anaphora in +Hankamer & Sag's sense ([hankamer-sag-1976]): each deletes internal structure +under identity with a linguistic antecedent. Steedman's taxonomy contains no +deep anaphor (no *do so*-type pro-form), so the depth axis is constant +`.surface` over it. -/ +instance : Anaphor.HasDepth EllipsisType := ⟨fun _ => .surface⟩ + +/-- **Cross-framework non-alignment.** Steedman's CCG cut `isSyntacticallyMediated` +(gapping/stripping derived by category composition; VP-ellipsis/sluicing handled +anaphorically) is *not* Hankamer & Sag's deep/surface cut. VP-ellipsis is the +paradigm *surface* anaphor ([hankamer-sag-1976]; Landau's own surface baseline in +[landau-2026]) yet CCG treats it as non-mediated — so the two frameworks partition +the very same constructions differently. -/ +theorem surface_not_syntacticallyMediated : + Anaphor.HasDepth.IsSurface EllipsisType.vpEllipsis ∧ + ¬ isSyntacticallyMediated .vpEllipsis := by decide + end Gapping /-! ### Cross-serial dependencies diff --git a/Linglib/Syntax/Anaphora/Basic.lean b/Linglib/Syntax/Anaphora/Basic.lean new file mode 100644 index 000000000..62a5a5188 --- /dev/null +++ b/Linglib/Syntax/Anaphora/Basic.lean @@ -0,0 +1,153 @@ +/-! +# Anaphor — Hankamer & Sag's deep/surface anaphora theory + +[hankamer-sag-1976]'s theory of (unbounded) anaphoric processes. The organizing +distinction is **control** (pragmatic/deictic vs syntactic) — the conceptual +entry point of H&S's argument — with **depth** (deep vs surface) as the +conclusion. A **deep** anaphor is interpreted at an underlying/semantic level, +can be pragmatically (deictically) controlled, and needs only a coherent semantic +referent (definite pronouns, *do it*, null complement anaphora, *pro*). A +**surface** anaphor is derived by deletion under identity with a surface +antecedent, so it requires a coherent *syntactic* antecedent (VP-ellipsis, +sluicing, gapping, argument ellipsis). Pragmatic controllability is the conceptual +cut, *not* a reliable test on its own: [landau-2026], following [merchant-2001], +holds the reliable classic diagnostics to be extraction and agreement — this file +models extraction and Landau's EIR test (`Syntax/Anaphora/Diagnostic.lean`). + +`Depth` is the genus, not an ellipsis classification: ellipsis ⊊ surface anaphora +⊊ anaphora, and the deep values (*do so*, NCA, *pro*) are precisely the +*non-ellipsis* anaphors. Ellipsis-specific machinery (deletion domains, +`[E]`-feature, ellipsis-type taxonomy) lives in `Syntax/Minimalist/Ellipsis/` and +*consumes* this axis — an `EllipsisType` is a `HasDepth` carrier with depth +`.surface`. + +This is the **unbounded**-anaphora axis, orthogonal to (and a sibling of) the +binding-theoretic Principle-A/B/C axis (`Features.BindingClass` / the `Bound` +capability): H&S explicitly set aside bounded anaphora (reflexivization) as a +separate, always-syntactic process. A reflexive is a `Bound.IsAnaphor`; *do so* +is an `Anaphor.Depth.deep`. + +Other H&S diagnostics are *not* modeled here: the missing-antecedent test (itself +judged unreliable by [landau-2026]), the coherent-semantic-entity requirement on +deep anaphora, and the Backwards Anaphora Constraint. + +## Main declarations + +* `Anaphor.Depth` — deep/surface (the conclusion). +* `Anaphor.Control` — pragmatic/syntactic (H&S's organizing distinction). +* `Anaphor.Depth.HasInternalStructure` / `Anaphor.Depth.control` — the structural + property and the depth→control map; `allowsPragmaticControl_iff_…` proves the + typology's two correlated properties coincide by construction. +* `Anaphor.HasDepth` — the carrier capability (the depth-axis analogue of `Bound`). +-/ + +namespace Anaphor + +/-- Hankamer & Sag's deep/surface classification of (unbounded) anaphoric + processes [hankamer-sag-1976]. -/ +inductive Depth where + | deep + | surface + deriving DecidableEq, Repr + +/-- How an anaphoric relation is controlled [hankamer-sag-1976]: a `pragmatic` + (deictic) anaphor can be controlled by the nonlinguistic context with no + linguistic antecedent; a `syntactic` anaphor requires a coherent linguistic + antecedent in surface structure. This is H&S's organizing distinction; depth + is the conclusion drawn from it. -/ +inductive Control where + | pragmatic + | syntactic + deriving DecidableEq, Repr + +namespace Depth + +/-- The defining structural property: surface anaphora project *syntactically + present* internal structure (deletion under identity), deep anaphora do not. + Every account of the distinction ([hankamer-sag-1976], [merchant-2001], + [landau-2026]) shares this much; they differ on the *level* (H&S: surface + structure; Merchant/Landau: LF — the difference EIR turns on). -/ +def HasInternalStructure : Depth → Prop + | .surface => True + | .deep => False + +instance : DecidablePred HasInternalStructure := fun d => + match d with + | .surface => isTrue trivial + | .deep => isFalse (fun h => h) + +-- The value lemmas are subsumed by `hasInternalStructure_iff_surface` on the simp +-- set, so they carry no `@[simp]` (kept for direct `exact` use). +theorem hasInternalStructure_surface : HasInternalStructure .surface := trivial + +theorem not_hasInternalStructure_deep : ¬ HasInternalStructure .deep := fun h => h + +@[simp] theorem hasInternalStructure_iff_surface (d : Depth) : + HasInternalStructure d ↔ d = .surface := by + cases d <;> simp [HasInternalStructure] + +/-- The control type a depth determines [hankamer-sag-1976]: deep anaphora allow + pragmatic (deictic) control; surface anaphora are syntactically controlled + (deletion under identity with a surface antecedent). -/ +def control : Depth → Control + | .deep => .pragmatic + | .surface => .syntactic + +/-- A depth *allows pragmatic control* iff it is deep — the conceptual cut H&S + start from, here a consequence of `control`, not a separate stipulation. -/ +def AllowsPragmaticControl (d : Depth) : Prop := d.control = .pragmatic + +instance : DecidablePred AllowsPragmaticControl := fun d => + inferInstanceAs (Decidable (d.control = .pragmatic)) + +/-- The depth typology's two correlated properties coincide *by construction*: a + depth allows pragmatic control iff it lacks internal structure (is deep). This + is definitional given the 2-element typology — the *empirical* content is which + anaphors get which depth, tested against data in the consuming study, not this + biconditional. -/ +@[simp] theorem allowsPragmaticControl_iff_not_hasInternalStructure (d : Depth) : + AllowsPragmaticControl d ↔ ¬ HasInternalStructure d := by + cases d <;> simp [AllowsPragmaticControl, control, HasInternalStructure] + +end Depth + +/-! ### The `HasDepth` capability -/ + +/-- A carrier whose every element has a Hankamer & Sag `Anaphor.Depth` — the + depth-axis analogue of `Bound` (the binding-class axis). An ellipsis-type + record, a paper's datum struct, or a syntactic object each supplies its own + instance. -/ +class HasDepth (α : Type _) where + /-- The deep/surface depth of every element. -/ + depth : α → Depth + +/-- `a` is a deep anaphor (no internal structure; pragmatic control). -/ +def HasDepth.IsDeep {α : Type _} [HasDepth α] (a : α) : Prop := + HasDepth.depth a = .deep + +/-- `a` is a surface anaphor (deletion under identity; syntactic control). -/ +def HasDepth.IsSurface {α : Type _} [HasDepth α] (a : α) : Prop := + HasDepth.depth a = .surface + +/-- The element's structural property, via its depth. -/ +def HasDepth.HasInternalStructure {α : Type _} [HasDepth α] (a : α) : Prop := + (HasDepth.depth a).HasInternalStructure + +instance {α : Type _} [HasDepth α] (a : α) : Decidable (HasDepth.IsDeep a) := by + unfold HasDepth.IsDeep; infer_instance + +instance {α : Type _} [HasDepth α] (a : α) : Decidable (HasDepth.IsSurface a) := by + unfold HasDepth.IsSurface; infer_instance + +instance {α : Type _} [HasDepth α] (a : α) : + Decidable (HasDepth.HasInternalStructure a) := by + unfold HasDepth.HasInternalStructure; infer_instance + +/-- `IsSurface` is exactly having internal structure. -/ +@[simp] theorem HasDepth.hasInternalStructure_iff_isSurface + {α : Type _} [HasDepth α] (a : α) : + HasDepth.HasInternalStructure a ↔ HasDepth.IsSurface a := by + unfold HasDepth.HasInternalStructure HasDepth.IsSurface + exact Depth.hasInternalStructure_iff_surface _ + +end Anaphor diff --git a/Linglib/Syntax/Anaphora/Diagnostic.lean b/Linglib/Syntax/Anaphora/Diagnostic.lean new file mode 100644 index 000000000..025d64e49 --- /dev/null +++ b/Linglib/Syntax/Anaphora/Diagnostic.lean @@ -0,0 +1,160 @@ +import Mathlib.Data.Set.Subsingleton +import Mathlib.Data.Set.Image +import Linglib.Syntax.Anaphora.Basic + +/-! +# Diagnostic tests + +A **diagnostic** is an abductive test for a latent classification `C` from an +observed outcome `Ω`: each outcome is *consistent with* a set of latent values — +the values that could have produced it. From that single datum everything else +follows: + +* the test is **conclusive** at an outcome when the consistent set is a + `Set.Subsingleton` (it pins the latent value to at most one); +* it is **sound** against a ground-truth outcome model `truth : C → Ω` when the + true value is always among the consistent ones (the test never rules out the + truth); +* it **decides** `C` when, on every true value, the outcome's consistent set is + exactly that value — sound *and* conclusive everywhere it matters; +* one test **refines** another when its consistent sets are pointwise smaller (it + rules out at least as much), a preorder under which conclusiveness is monotone. + +`ofCauses` builds a diagnostic from named *causes* (each entailing a latent value) +— the faithful Hankamer–Sag picture, where a test outcome is explained by a set of +causes and the consistent latent values are what those causes entail. + +The structure is domain-general (any `Ω`, `C`); the anaphora specialisation +(`Anaphor.DepthCause`, the ground-truth `Depth.testOutcome`) instantiates it for +Hankamer & Sag depth, and the per-test instances (EIR, extraction) live in the +paper that draws the comparison. +-/ + +universe u v w + +/-- An abductive diagnostic: each observed outcome `o : Ω` is consistent with the + set `consistent o` of latent values in `C` that could have produced it. -/ +@[ext] +structure Diagnostic (Ω : Type u) (C : Type v) where + /-- The latent values consistent with an outcome (the verdict it licenses). -/ + consistent : Ω → Set C + +namespace Diagnostic + +variable {Ω : Type u} {C : Type v} {γ : Type w} + +/-- `t` is conclusive at `o` when the outcome pins the latent value to at most one + possibility. -/ +def IsConclusiveAt (t : Diagnostic Ω C) (o : Ω) : Prop := Set.Subsingleton (t.consistent o) + +/-- `t` is conclusive when every outcome is. -/ +def Conclusive (t : Diagnostic Ω C) : Prop := ∀ o, t.IsConclusiveAt o + +/-- `t` is sound for a ground-truth outcome model `truth : C → Ω` when the true + latent value is always among the explanations of the outcome it yields — the + test never rules out the truth. -/ +def SoundFor (t : Diagnostic Ω C) (truth : C → Ω) : Prop := + ∀ c, c ∈ t.consistent (truth c) + +/-- `t` decides `C` against `truth` when, on every true value, the outcome's + consistent set is exactly that value: a sound, conclusive verdict everywhere + it is exercised. -/ +def Decides (t : Diagnostic Ω C) (truth : C → Ω) : Prop := + ∀ c, t.consistent (truth c) = {c} + +/-- `t` refines `s` when every outcome's consistent set under `t` is contained in + `s`'s — `t` rules out at least as much. This is the pointwise-`⊆` informativeness + order on tests; a finer (more informative) test is `≤`. -/ +def Refines (t s : Diagnostic Ω C) : Prop := ∀ o, t.consistent o ⊆ s.consistent o + +@[refl] theorem Refines.refl (t : Diagnostic Ω C) : t.Refines t := fun _ => Set.Subset.rfl + +theorem Refines.trans {t s r : Diagnostic Ω C} (h₁ : t.Refines s) (h₂ : s.Refines r) : + t.Refines r := fun o => (h₁ o).trans (h₂ o) + +/-- Tests are preordered by refinement: `t ≤ s` iff `t.Refines s` (`t` is finer, + ruling out at least as much). -/ +instance : Preorder (Diagnostic Ω C) where + le := Refines + le_refl := Refines.refl + le_trans _ _ _ := Refines.trans + +/-- Refinement is monotone in conclusiveness: a refinement of a conclusive test is + conclusive (a subset of a subsingleton is a subsingleton). -/ +theorem Conclusive.mono {t s : Diagnostic Ω C} (h : t.Refines s) (hs : s.Conclusive) : + t.Conclusive := fun o => Set.Subsingleton.anti (hs o) (h o) + +/-- Deciding is exactly soundness plus conclusiveness on the truth outcomes. -/ +theorem decides_iff {t : Diagnostic Ω C} {truth : C → Ω} : + t.Decides truth ↔ t.SoundFor truth ∧ ∀ c, t.IsConclusiveAt (truth c) := by + constructor + · intro h + refine ⟨fun c => ?_, fun c => ?_⟩ + · simp [h c] + · show Set.Subsingleton (t.consistent (truth c)) + rw [h c]; exact Set.subsingleton_singleton + · rintro ⟨hsound, hsub⟩ c + exact Set.eq_singleton_iff_unique_mem.mpr ⟨hsound c, fun x hx => hsub c hx (hsound c)⟩ + +/-- A deciding test is sound. -/ +theorem Decides.soundFor {t : Diagnostic Ω C} {truth : C → Ω} (h : t.Decides truth) : + t.SoundFor truth := (decides_iff.mp h).1 + +/-! ### Building a diagnostic from causes -/ + +/-- Build a diagnostic from named *causes*: each outcome is explained by a set of + causes, and the consistent latent values are exactly what those causes entail + (`Set.image`). The verdict is inconclusive at an outcome precisely when its + causes entail more than one latent value — so ambiguity is *derived* from the + cause structure, not stipulated. -/ +def ofCauses (causes : Ω → Set γ) (entails : γ → C) : Diagnostic Ω C := + ⟨fun o => entails '' causes o⟩ + +@[simp] theorem ofCauses_consistent (causes : Ω → Set γ) (entails : γ → C) (o : Ω) : + (ofCauses causes entails).consistent o = entails '' causes o := rfl + +/-- Fewer causes ⇒ a refinement: shrinking the cause sets pointwise rules out at + least as much. -/ +theorem ofCauses_refines {c₁ c₂ : Ω → Set γ} (entails : γ → C) + (h : ∀ o, c₁ o ⊆ c₂ o) : (ofCauses c₁ entails).Refines (ofCauses c₂ entails) := + fun o => Set.image_mono (h o) + +end Diagnostic + +/-! ### Anaphoric-depth specialisation + +The latent classification for an anaphora diagnostic is `Anaphor.Depth`; the +causes are the Hankamer–Sag / Landau reasons a depth test can pass or fail. -/ + +namespace Anaphor + +/-- The causes of a depth-test outcome ([hankamer-sag-1976], [landau-2026], the + (42c)/(42d) "two reasons for the star" analysis): a site passes because it + `hostsVariable` (has surface structure for the resumptive or trace); it fails + because it is a `deepAnaphor` (no structure → (42d)), or — for movement-based + tests only — because a *surface* site's dependency is `derivationalBleeding` + (bled by ellipsis timing → (42c)) or `islandBlocking` (an inherent barrier; + resumption is "indifferent to island constraints"). Only `deepAnaphor` entails + `deep`; the surface-site confounds (`derivationalBleeding`/`islandBlocking`) + entail `surface`. The asymmetry between EIR (failure = `deepAnaphor` only) and + extraction (all three) is exactly which confounds each test admits. -/ +inductive DepthCause where + | hostsVariable + | deepAnaphor + | derivationalBleeding + | islandBlocking + deriving DecidableEq, Repr + +/-- The depth each cause entails. -/ +def DepthCause.entails : DepthCause → Depth + | .deepAnaphor => .deep + | _ => .surface + +/-- The ground-truth test outcome on a site of known depth: a surface anaphor + passes (it has internal structure to host the variable/trace), a deep one + fails. This is the `truth` model every depth diagnostic is judged against. -/ +def Depth.testOutcome : Depth → Bool + | .surface => true + | .deep => false + +end Anaphor diff --git a/Linglib/Syntax/Minimalist/Ellipsis/DeletionDomain.lean b/Linglib/Syntax/Minimalist/Ellipsis/DeletionDomain.lean index 75274edde..29dd80490 100644 --- a/Linglib/Syntax/Minimalist/Ellipsis/DeletionDomain.lean +++ b/Linglib/Syntax/Minimalist/Ellipsis/DeletionDomain.lean @@ -1,5 +1,6 @@ import Linglib.Syntax.Minimalist.Voice import Linglib.Semantics.Lexical.Roots.Template +import Linglib.Syntax.Anaphora.Basic /-! # Ellipsis: [E] Features and Deletion Domains @@ -210,6 +211,15 @@ structure EllipsisType where name : String := "" deriving Repr +/-- Every ellipsis type is a Hankamer–Sag **surface** anaphor ([hankamer-sag-1976]): + PF-deletion under identity leaves full internal structure in place. So + `EllipsisType` is an `Anaphor.HasDepth` carrier (depth `.surface` throughout) — + the depth-axis analogue of a `Bound` pronoun. -/ +instance : Anaphor.HasDepth EllipsisType := ⟨fun _ => .surface⟩ + +/-- Witness: every ellipsis type is surface. -/ +theorem isSurface (e : EllipsisType) : Anaphor.HasDepth.IsSurface e := rfl + /-- Is a spine position inside the deletion domain of an ellipsis type? A position is in the deletion domain iff it is strictly below the [E]-bearing head. -/ diff --git a/blog/references.bib b/blog/references.bib index 410c49c89..24a7200c2 100644 --- a/blog/references.bib +++ b/blog/references.bib @@ -22124,12 +22124,12 @@ @article{landau-2026 subfield = {syntax}, validated = {true}, role = {formalized}, - sources = {Phenomena/Ellipsis/Studies/Landau2026.lean} + sources = {Studies/Landau2026.lean} }% REF: Hankamer, J. & Sag, I. (1976). Deep and Surface Anaphors. @article{hankamer-sag-1976, author = {Hankamer, Jorge and Sag, Ivan A.}, year = {1976}, - title = {Deep and Surface Anaphors}, + title = {Deep and Surface Anaphora}, journal = {Linguistic Inquiry}, volume = {7}, number = {3}, @@ -22137,7 +22137,7 @@ @article{hankamer-sag-1976 subfield = {syntax}, validated = {true}, role = {cited}, - sources = {Phenomena/Ellipsis/Studies/Landau2026.lean} + sources = {Studies/Landau2026.lean} }% REF: Chomsky, N. (1982). Some Concepts and Consequences of the Theory of Government and Binding. @book{chomsky-1982, author = {Chomsky, Noam}, @@ -22149,7 +22149,7 @@ @book{chomsky-1982 subfield = {syntax}, validated = {true}, role = {cited}, - sources = {Phenomena/Ellipsis/Studies/Landau2026.lean} + sources = {Studies/Landau2026.lean} } @article{alexeyenko-zeijlstra-2025, author = {Alexeyenko, Sascha and Zeijlstra, Hedde}, @@ -22285,7 +22285,7 @@ @article{bruening-2019 subfield = {syntax}, validated = {true}, role = {cited}, - sources = {Phenomena/Ellipsis/Studies/Landau2026.lean} + sources = {Studies/Landau2026.lean} } @article{paape-vasishth-2026, author = {Paape, Dario and Vasishth, Shravan},