tenfourty · tenfourty · Jun 25, 2026 · Jun 25, 2026
diff --git a/docs/entities.md b/docs/entities.md
@@ -108,9 +108,34 @@ Five-tier matching against document metadata and content:
 
 - Longer alias matches are preferred (e.g. "Kit Martin" matches before "Kit")
 - Very short single names (<=3 chars, e.g. "Ed", "Jo") are skipped for content and title matching (still matched via tags)
-- Single names 4+ chars (e.g. "Anders", "Wren") are matched in both content and title
+- Single names 4+ chars (e.g. "Wren") are matched in both content and title
 - File-stem aliases with hyphens (e.g. "dave-martin") are excluded from content matching
 
+#### Bare ambiguous first names (#36)
+
+A **bare single first name** (≥4 chars, e.g. "Alexandre", "Thomas", "Jean") that is
+**ambiguous** — claimed by 2+ entities — does **not** auto-link on its own in the
+participant, title, or content tiers. It links only when the entity is *corroborated*
+elsewhere in the same document by a stronger signal:
+
+- a tag (`tagged`) or source-ref (`source_ref`) match,
+- a multi-word / full-name match (`participant`, `title`, or `discussed`),
+- an exact title-part match, or
+- appearing in the document's frontmatter `attendees`.
+
+Otherwise the bare match is dropped (it's a likely false positive — the wrong colleague).
+**Unambiguous** bare first names (owned by exactly one entity) still link as before.
+
+Ambiguity is computed by `build_first_name_index()`, which **accent-folds** names
+(`Jérémy` → `jeremy`) so an accented name and its ASCII near-twin collide into the same
+ambiguity bucket. Folding is used *only* to widen the ambiguity set (making the matcher
+more conservative) — it never broadens a match. So a bare ASCII "Jeremy" that could be
+either "Jérémy Cotineau" or "Jeremy Brown" is gated until one of them is corroborated.
+
+The indexer passes the document's `attendees` and a once-built `name_owners` index into
+`find_entity_mentions`. A genuinely wrong link that survives anyway can be removed with
+`kbx entity unlink` (see §Suppressions).
+
 ## Suppressions (manual unlink/relink)
 
 Automatic linking is heuristic, so it occasionally produces a false positive (a bare

diff --git a/src/kb/entities.py b/src/kb/entities.py
@@ -4,6 +4,7 @@
 
 import json
 import re
+import unicodedata
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
@@ -589,6 +590,52 @@ def _boundary_pattern(escaped: str, name: str, flags: int = 0) -> re.Pattern[str
     return re.compile(rf"\b{escaped}(?=\s|$|[^\w])", flags)
 
 
+def _fold_name(text: str) -> str:
+    """Lowercase + strip accents (NFKD → ASCII) for collision detection (#36).
+
+    Used only to *detect* ambiguity between an accented name and its ASCII near-twin
+    (e.g. "Jérémy" / "Jeremy"). Folding here only *widens* the set of entities a bare
+    name could refer to — making the matcher more conservative — never broadening a match.
+    """
+    return unicodedata.normalize("NFKD", text.lower()).encode("ascii", "ignore").decode("ascii")
+
+
+def _is_gateable_single_name(name: str) -> bool:
+    """True for a single-word personal first name that auto-links via content/title today.
+
+    These are the matches #36 gates on ambiguity. Mirrors the inclusion rules in
+    ``_build_name_patterns``: excludes file-stem aliases, short all-caps abbreviations,
+    common English words used as team names, ``src:`` IDs, and names < 4 chars (already
+    skipped for content matching).
+    """
+    if len(name.split()) != 1:
+        return False
+    if len(name) < 4:
+        return False
+    if "-" in name and name == name.lower():
+        return False  # file stem (e.g. "dave-kowalski")
+    if name.isupper() and len(name) <= 4:
+        return False  # abbreviation (e.g. "GG")
+    if name.lower() in _COMMON_WORDS:
+        return False
+    return not name.startswith("src:")
+
+
+def build_first_name_index(entities: list[Entity]) -> dict[str, set[int]]:
+    """Map each folded single first-name → set of entity ids that claim it (#36).
+
+    A folded name owned by 2+ entities is *ambiguous*: a bare occurrence of it in a
+    document can't be attributed to a single person without corroboration. Built once
+    per entity set (like ``build_entity_patterns``) and passed to ``find_entity_mentions``.
+    """
+    owners: dict[str, set[int]] = {}
+    for entity in entities:
+        for name in (entity.name, *entity.aliases):
+            if _is_gateable_single_name(name):
+                owners.setdefault(_fold_name(name), set()).add(entity.id)
+    return owners
+
+
 def _build_name_patterns(entity: Entity) -> list[tuple[re.Pattern[str], int]]:
     """Build regex patterns for matching entity names/aliases in content.
 
@@ -670,6 +717,8 @@ def find_entity_mentions(
     *,
     cached_patterns: list[tuple[re.Pattern[str], Entity]] | None = None,
     suppressed_ids: set[int] | None = None,
+    attendees: list[str] | None = None,
+    name_owners: dict[str, set[int]] | None = None,
 ) -> list[EntityMention]:
     """Find entity mentions in a document's metadata and content.
 
@@ -682,12 +731,24 @@ def find_entity_mentions(
     Pass cached_patterns (from build_entity_patterns()) to avoid recompiling
     regex patterns on every call. If None, patterns are built on the fly.
 
-    Disambiguation: prefer longer alias matches. If ambiguous (e.g. "Anders"),
-    match all possible entities.
+    First-name disambiguation (#36): a *bare* single first name that is **ambiguous**
+    (claimed by 2+ entities — accent-folded, so "Jérémy"/"Jeremy" collide) does not
+    auto-link on its own. It only links if the entity is *corroborated* in the same
+    document — by a tag, title participant, source-ref, full-name match, or by appearing
+    in ``attendees``. Unambiguous bare first names still link as before. Pass
+    ``name_owners`` (from build_first_name_index()) to skip rebuilding the ambiguity map.
     """
     mentions: list[EntityMention] = []
     seen: set[tuple[int, str]] = set()
     _suppressed = suppressed_ids or set()
+    if name_owners is None:
+        name_owners = build_first_name_index(entities)
+
+    # Entities anchored by a strong signal in this doc. A bare ambiguous first name
+    # only links if its entity ends up here (#36). Built incrementally as we go.
+    corroborated: set[int] = set()
+    # Deferred bare-ambiguous matches, resolved once all strong matches are known.
+    pending: list[tuple[int, str]] = []
 
     def _add(entity_id: int, mention_type: str) -> None:
         if entity_id in _suppressed:
@@ -697,70 +758,120 @@ def _add(entity_id: int, mention_type: str) -> None:
             seen.add(key)
             mentions.append(EntityMention(entity_id=entity_id, mention_type=mention_type))
 
-    # 1. Tag matching
+    def _add_strong(entity_id: int, mention_type: str) -> None:
+        """Add a mention and corroborate the entity (rescues its bare-name matches)."""
+        _add(entity_id, mention_type)
+        corroborated.add(entity_id)
+
+    def _is_ambiguous_bare(name: str) -> bool:
+        """A bare first name claimed by 2+ entities (accent-folded) — #36."""
+        return _is_gateable_single_name(name) and len(name_owners.get(_fold_name(name), ())) >= 2
+
+    # 1. Tag matching — explicit annotation, always strong
     for tag in tags:
         tag_lower = tag.lower().strip()
         if not tag_lower:
             continue
         for entity in entities:
             all_names = [entity.name.lower()] + [a.lower() for a in entity.aliases]
             if tag_lower in all_names:
-                _add(entity.id, "tagged")
+                _add_strong(entity.id, "tagged")
+
+    # 3.5. Source ID matching — unambiguous, case-sensitive substring check; strong
+    for entity in entities:
+        for alias in entity.aliases:
+            if not alias.startswith("src:"):
                 continue
-            # Partial match: tag is a first name or short form
-            for name in all_names:
-                if tag_lower == name:
-                    _add(entity.id, "tagged")
-                    break
-
-    # 2. Title participant parsing
-    # Split on common separators: " / ", " x ", " & ", " vs "
+            source_id = alias[4:]  # strip "src:" prefix
+            if source_id in content:
+                _add_strong(entity.id, "source_ref")
+                break  # one source_ref match per entity is enough
+
+    # Attendees corroborate (no mention emitted) — #36 context from frontmatter.
+    if attendees:
+        attendee_folded = {_fold_name(a) for a in attendees if a}
+        for entity in entities:
+            candidate = {_fold_name(entity.name), *(_fold_name(a) for a in entity.aliases)}
+            if candidate & attendee_folded:
+                corroborated.add(entity.id)
+
+    # 2. Title participant parsing — split on " / ", " x ", " & ", " vs "
     parts = re.split(r"\s+/\s+|\s+x\s+|\s+&\s+|\s+vs\s+", title, flags=re.IGNORECASE)
     for part in parts:
         part = part.strip()
         if not part:
             continue
         part_lower = part.lower()
         for entity in entities:
-            all_names = [entity.name.lower()] + [a.lower() for a in entity.aliases]
-            if part_lower in all_names or any(
-                re.search(rf"\b{re.escape(n)}\b", part_lower)
-                for n in all_names
-                if len(n) >= 4  # skip very short names for substring matching
-            ):
-                _add(entity.id, "participant")
-
-    # 3. Title substring matching — catch names embedded in the title
-    # e.g. "Anders Sync Notes", "Wren 1:1", "Helix Refactor Review"
+            matched_strong = False
+            matched_bare_ambiguous = False
+            for name in [entity.name, *entity.aliases]:
+                n_lower = name.lower()
+                is_exact = part_lower == n_lower
+                is_substr = len(n_lower) >= 4 and (
+                    re.search(rf"\b{re.escape(n_lower)}\b", part_lower) is not None
+                )
+                if not (is_exact or is_substr):
+                    continue
+                if _is_ambiguous_bare(name):
+                    matched_bare_ambiguous = True
+                    continue  # keep looking for a stronger name for this entity
+                # Exact whole-part match or a multi-word name corroborates; an
+                # unambiguous bare substring just links.
+                if is_exact or len(name.split()) >= 2:
+                    _add_strong(entity.id, "participant")
+                else:
+                    _add(entity.id, "participant")
+                matched_strong = True
+                break
+            if matched_bare_ambiguous and not matched_strong:
+                pending.append((entity.id, "participant"))
+
+    # 3. Title substring matching — names embedded in the title (e.g. "Wren 1:1")
     title_lower = title.lower()
     for entity in entities:
-        all_names = [entity.name, *list(entity.aliases)]
-        for name in all_names:
+        matched_strong = False
+        matched_bare_ambiguous = False
+        for name in [entity.name, *list(entity.aliases)]:
             # Skip very short names and file-stem aliases
             if len(name) <= 3:
                 continue
             if "-" in name and name == name.lower():
                 continue
-            if re.search(rf"\b{re.escape(name)}\b", title_lower, re.IGNORECASE):
-                _add(entity.id, "title")
-                break  # one match per entity is enough
-
-    # 3.5. Source ID matching — unambiguous, case-sensitive substring check
-    for entity in entities:
-        for alias in entity.aliases:
-            if not alias.startswith("src:"):
+            if not re.search(rf"\b{re.escape(name)}\b", title_lower, re.IGNORECASE):
                 continue
-            source_id = alias[4:]  # strip "src:" prefix
-            if source_id in content:
-                _add(entity.id, "source_ref")
-                break  # one source_ref match per entity is enough
+            if _is_ambiguous_bare(name):
+                matched_bare_ambiguous = True
+                continue  # keep looking for a stronger name for this entity
+            if len(name.split()) >= 2:
+                _add_strong(entity.id, "title")
+            else:
+                _add(entity.id, "title")
+            matched_strong = True
+            break  # one strong match per entity is enough
+        if matched_bare_ambiguous and not matched_strong:
+            pending.append((entity.id, "title"))
 
     # 4. Content name matching with disambiguation
     if cached_patterns is None:
         cached_patterns = build_entity_patterns(entities)
 
     for pattern, entity in cached_patterns:
-        if pattern.search(content):
+        m = pattern.search(content)
+        if m is None:
+            continue
+        matched = m.group(0)
+        if len(matched.split()) == 1 and len(name_owners.get(_fold_name(matched), ())) >= 2:
+            pending.append((entity.id, "discussed"))  # bare ambiguous — defer
+            continue
+        if len(matched.split()) >= 2:
+            _add_strong(entity.id, "discussed")
+        else:
             _add(entity.id, "discussed")
 
+    # Resolve deferred bare-ambiguous matches: link only if corroborated elsewhere.
+    for entity_id, mention_type in pending:
+        if entity_id in corroborated:
+            _add(entity_id, mention_type)
+
     return mentions
diff --git a/src/kb/indexer.py b/src/kb/indexer.py
@@ -13,6 +13,7 @@
     Entity,
     EntityMention,
     build_entity_patterns,
+    build_first_name_index,
     find_entity_mentions,
     load_entities,
     seed_entities,
@@ -178,6 +179,8 @@ def index_all(
         seed_entities(db, project_root)
     entities = load_entities(db)
     entity_patterns = build_entity_patterns(entities)
+    # Ambiguous-first-name index (#36): folded single name → owning entity ids.
+    name_owners = build_first_name_index(entities)
 
     # Entity-link suppressions (#35): per-document "do not link entity X here", kept in a
     # sidecar so they survive reindex + Granola sync. Resolve names → entity ids once.
@@ -322,13 +325,16 @@ def _flush_embeddings() -> None:
             _suppressed_ids = {
                 entity_name_to_id[n] for n in _doc_suppressed if n in entity_name_to_id
             }
+            _attendee_names = [a.get("name", "") for a in doc.attendees if a.get("name")]
             mentions = find_entity_mentions(
                 doc.title,
                 doc.tags,
                 section_content,
                 entities,
                 cached_patterns=entity_patterns,
                 suppressed_ids=_suppressed_ids,
+                attendees=_attendee_names,
+                name_owners=name_owners,
             )
             entity_id_set = {m.entity_id for m in mentions}
             result.entities_linked += len(mentions)
@@ -439,6 +445,7 @@ def _flush_embeddings() -> None:
             if result.entities_created > 0:
                 entities = load_entities(db)
                 entity_patterns = build_entity_patterns(entities)
+                name_owners = build_first_name_index(entities)
             if embedder:
                 embedder.release_gpu_memory()
             gc.collect()