MotleyAI · ZmeiGorynych · May 14, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
diff --git a/docs/concepts/ingestion.md b/docs/concepts/ingestion.md
@@ -153,6 +153,15 @@ flags compose: `--demo` runs first (creating the Jaffle Shop datasource),
 then the startup-ingest pass runs over every datasource including the
 freshly-created demo.
 
+Each per-datasource pass refreshes embeddings for the datasource doc,
+every visible model + its visible children, **and every memory whose
+canonical entities are rooted at the datasource** (DEV-1416). A stale
+`embeddings.db` (created without an `OPENAI_API_KEY`, or after a manual
+`memories.yaml` edit) is therefore repaired by the next
+`--ingest-on-startup` with no extra step. Per-memory embed failures
+surface as `IngestionError(model_name="memory:<id>", …)` in the
+result's `errors` list.
+
 ### CLI
 
 ```bash

diff --git a/docs/concepts/search.md b/docs/concepts/search.md
@@ -267,7 +267,12 @@ sql-mode and query-backed models are silently skipped in v1.
   (`EmbeddingService._apply_pending`) issues one batched
   `get_embeddings_for_canonical_ids` for the hash-skip filter and one
   batched `save_embeddings` for the persist step (DEV-1405) — refresh
-  cost is independent of subtree size.
+  cost is independent of subtree size. **Memories** are included in the
+  `slayer ingest` / `--ingest-on-startup` per-datasource refresh
+  (DEV-1416), filtered to memories with at least one canonical entity
+  rooted at the current datasource — so `embeddings.db` can be repaired
+  by re-running ingest, no separate `slayer embeddings refresh` step
+  required.
 - **Cascade** semantics (DEV-1405 fix): `delete_embeddings_for_canonical`
   matches the canonical id exactly OR as a strict dotted-path descendant
   (`<root>.<...>`) — never as a character prefix. So `delete_memory(4)`

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "motley-slayer"
-version = "0.6.7"
+version = "0.6.8"
 description = "A lightweight, agent-first semantic layer for AI agents"
 requires-python = ">=3.11"
 license = "MIT"

diff --git a/slayer/core/models.py b/slayer/core/models.py
@@ -27,38 +27,91 @@
 _STRING_LITERAL_RE = re.compile(r"'[^']*'")
 
 
-def _validate_model_name(name: str, context: str) -> str:
-    """Reject model/query names containing ``__`` or ``.``.
+class _SubstringRule:
+    """Single source of truth for a forbidden substring inside a name.
 
-    Model and query names become SQL table aliases where ``__`` encodes
-    join paths, so both separators are reserved.
+    Each rule pairs the forbidden character / digraph with the rationale.
+    Every validator that rejects the same substring uses the same rule
+    so the wording (and the rejection rationale) lives in one place.
     """
-    if "__" in name:
+
+    __slots__ = ("substring", "reason")
+
+    def __init__(self, *, substring: str, reason: str) -> None:
+        self.substring = substring
+        self.reason = reason
+
+    def check(self, name: str, context: str) -> None:
+        if self.substring in name:
+            raise ValueError(
+                f"{context} '{name}' must not contain "
+                f"{self.substring!r}; {self.reason}"
+            )
+
+
+_NO_DUNDER = _SubstringRule(
+    substring="__",
+    reason="double underscores are reserved for join path aliases in "
+           "generated SQL.",
+)
+_NO_DOT = _SubstringRule(
+    substring=".",
+    reason="dots are the canonical-id namespace delimiter "
+           "(``<ds>.<model>.<leaf>``) and the dotted-path reference "
+           "syntax in queries.",
+)
+_NO_COLON = _SubstringRule(
+    substring=":",
+    reason="colons are reserved as the aggregation separator "
+           "(``revenue:sum``) and the ``memory:<int>`` canonical-id "
+           "prefix.",
+)
+_NO_FWD_SLASH = _SubstringRule(
+    substring="/",
+    reason="path separators break the storage layout.",
+)
+_NO_BACK_SLASH = _SubstringRule(
+    substring="\\",
+    reason="path separators break the storage layout.",
+)
+_NO_NUL = _SubstringRule(
+    substring="\x00",
+    reason="NUL bytes are filesystem-unsafe.",
+)
+
+
+def _require_non_empty_trimmed(v: str, context: str) -> None:
+    """Reject empty / whitespace-only inputs and inputs with
+    leading or trailing whitespace."""
+    if not v or not v.strip():
         raise ValueError(
-            f"{context} name '{name}' must not contain '__'. "
-            f"Double underscores are reserved for join path aliases in generated SQL."
+            f"{context} must be a non-empty string; got {v!r}."
         )
-    if "." in name:
+    if v.strip() != v:
         raise ValueError(
-            f"{context} name '{name}' must not contain '.'. "
-            f"Dots are path syntax for referencing joined models in queries."
+            f"{context} must not have leading/trailing whitespace; "
+            f"got {v!r}."
         )
+
+
+def _validate_model_name(name: str, context: str) -> str:
+    """Reject model/query names containing ``__``, ``.``, or ``:``."""
+    label = f"{context} name"
+    _NO_DUNDER.check(name=name, context=label)
+    _NO_DOT.check(name=name, context=label)
+    _NO_COLON.check(name=name, context=label)
     return name
 
 
 def _validate_column_name(name: str, context: str) -> str:
-    """Reject dimension/measure names containing ``.``.
+    """Reject dimension/measure names containing ``.`` or ``:``.
 
-    Dots are path syntax in queries (``customers.name``), not part of names.
-    ``__`` is allowed — it encodes flattened join paths in virtual models
-    created by ``_query_as_model`` (e.g., ``stores__name``).
+    ``__`` is allowed — it encodes flattened join paths in virtual
+    models created by ``_query_as_model`` (e.g., ``stores__name``).
     """
-    if "." in name:
-        raise ValueError(
-            f"{context} name '{name}' must not contain '.'. "
-            f"Dots are path syntax for referencing joined models in queries, "
-            f"not part of dimension or measure names."
-        )
+    label = f"{context} name"
+    _NO_DOT.check(name=name, context=label)
+    _NO_COLON.check(name=name, context=label)
     return name
 
 
@@ -360,37 +413,26 @@ def _validate_name(cls, v: str) -> str:
     @field_validator("data_source")
     @classmethod
     def _validate_data_source_format(cls, v: str) -> str:
-        # Format-only checks (run on every input). Emptiness is enforced in
-        # ``_require_data_source_unless_query_backed`` below so query-backed
-        # models can be constructed before their cache populator fills in
-        # ``data_source`` from the resolved virtual model. Whitespace-strip
-        # mismatch and NUL are rejected here so the rule mirrors the
-        # ``DatasourceConfig.name`` validator and the storage-layer
-        # ``_validate_path_component``.
-        if v and v.strip() != v:
+        # Format-only checks (run on every input). Emptiness is enforced
+        # in ``_require_data_source_unless_query_backed`` below so
+        # query-backed models can be constructed before their cache
+        # populator fills in ``data_source`` from the resolved virtual
+        # model. Whitespace-strip mismatch and substring rules mirror
+        # ``DatasourceConfig.name`` so the two canonical-id ingress
+        # points share validation logic via the shared ``_NO_*`` rules.
+        if not v:
+            return v
+        if v.strip() != v:
             raise ValueError(
                 f"Model 'data_source' must not have leading/trailing "
                 f"whitespace; got {v!r}."
             )
-        if "\x00" in v:
-            raise ValueError(
-                f"Model 'data_source' must not contain NUL bytes; got {v!r}."
-            )
-        if "/" in v or "\\" in v:
-            raise ValueError(
-                f"Model 'data_source' must not contain path separators "
-                f"('/' or '\\'); got {v!r}."
-            )
-        # DEV-1405: dot is the canonical-id namespace delimiter
-        # (``<ds>.<model>.<leaf>``). Allowing dots in a data_source name
-        # would let ``delete_datasource('prod')`` cascade-nuke embeddings
-        # belonging to a sibling datasource named ``prod.legacy``.
-        if "." in v:
-            raise ValueError(
-                f"Model 'data_source' must not contain '.'; "
-                f"dots are the canonical-id namespace delimiter "
-                f"(``<ds>.<model>.<leaf>``). Got {v!r}."
-            )
+        label = "Model 'data_source'"
+        _NO_NUL.check(name=v, context=label)
+        _NO_FWD_SLASH.check(name=v, context=label)
+        _NO_BACK_SLASH.check(name=v, context=label)
+        _NO_DOT.check(name=v, context=label)
+        _NO_COLON.check(name=v, context=label)
         return v
 
     @model_validator(mode="after")
@@ -650,39 +692,23 @@ def _apply_schema_migrations_and_aliases(cls, data: Any) -> Any:
     @field_validator("name")
     @classmethod
     def _validate_name(cls, v: str) -> str:
-        # DEV-1405: datasource names are the leading segment of every
-        # canonical-id (``<ds>``, ``<ds>.<model>``, ``<ds>.<model>.<leaf>``)
-        # and become a path component in YAML storage
-        # (``datasources/<name>.yaml``, ``models/<name>/...``). They must
-        # therefore reject:
-        #   - path separators / NUL (filesystem safety, mirrors
-        #     ``_validate_path_component`` at the storage layer)
-        #   - ``.`` (canonical-id namespace delimiter: ``prod`` vs ``prod.db``
-        #     would otherwise collide in cascade-delete prefix matches)
-        #   - whitespace-only / empty (storage primary key)
-        # ``__`` is intentionally NOT rejected: datasource names never become
-        # SQL table aliases, so the join-path-alias reservation that applies
-        # to model and query names doesn't apply here.
-        if not v or not v.strip():
-            raise ValueError(
-                f"Datasource 'name' must be a non-empty string; got {v!r}."
-            )
-        if v.strip() != v:
-            raise ValueError(
-                f"Datasource 'name' must not have leading/trailing whitespace; "
-                f"got {v!r}."
-            )
-        for ch in ("/", "\\", "\x00"):
-            if ch in v:
-                raise ValueError(
-                    f"Datasource 'name' must not contain {ch!r}; got {v!r}."
-                )
-        if "." in v:
-            raise ValueError(
-                f"Datasource 'name' must not contain '.'; dots are the "
-                f"canonical-id namespace delimiter "
-                f"(``<ds>.<model>.<leaf>``). Got {v!r}."
-            )
+        # Datasource names are the leading segment of every canonical-id
+        # (``<ds>``, ``<ds>.<model>``, ``<ds>.<model>.<leaf>``) and a
+        # path component in YAML storage (``datasources/<name>.yaml``,
+        # ``models/<name>/...``). The substring rules are shared with
+        # ``SlayerModel.data_source`` via the module-level ``_NO_*``
+        # rules so the rationale lives in one place.
+        #
+        # ``__`` is intentionally NOT rejected: datasource names never
+        # become SQL table aliases, so the join-path-alias reservation
+        # that applies to model and query names doesn't apply here.
+        label = "Datasource 'name'"
+        _require_non_empty_trimmed(v=v, context=label)
+        _NO_NUL.check(name=v, context=label)
+        _NO_FWD_SLASH.check(name=v, context=label)
+        _NO_BACK_SLASH.check(name=v, context=label)
+        _NO_DOT.check(name=v, context=label)
+        _NO_COLON.check(name=v, context=label)
         return v
 
     def get_connection_string(self) -> str:

diff --git a/slayer/core/query.py b/slayer/core/query.py
@@ -319,13 +319,16 @@ def _apply_schema_migrations(cls, data: Any) -> Any:
 
     @field_validator("name")
     @classmethod
-    def _validate_no_dunder_in_name(cls, v: Optional[str]) -> Optional[str]:
-        if v is not None and "__" in v:
-            raise ValueError(
-                f"Query name '{v}' must not contain '__'. "
-                f"Double underscores are reserved for join path aliases in generated SQL."
-            )
-        return v
+    def _validate_query_name(cls, v: Optional[str]) -> Optional[str]:
+        # Share the same rejection rules as SlayerModel.name —
+        # SlayerQuery names occupy the same naming space when persisted
+        # as query-backed models. Rejects ``__`` (join-path alias
+        # separator), ``.`` (dotted reference syntax), and ``:`` (DSL
+        # aggregation separator).
+        if v is None:
+            return v
+        from slayer.core.models import _validate_model_name
+        return _validate_model_name(v, "Query")
     dimensions: Annotated[Optional[List[ColumnRef]], BeforeValidator(_coerce_dimensions)] = None
     time_dimensions: Optional[List[TimeDimension]] = None
     main_time_dimension: Optional[str] = None  # Explicit time dimension for transforms (overrides auto-detection)

diff --git a/slayer/embeddings/service.py b/slayer/embeddings/service.py
@@ -252,9 +252,14 @@ async def _apply_pending(
             try:
                 await self._storage.save_embeddings(rows)
             except Exception as exc:  # NOSONAR(S112) — best-effort persistence
+                # Include canonical ids so a caller doing failure
+                # attribution by entity (e.g. ``ingest_datasource_idempotent``
+                # tagging memory failures as ``model_name="memory:<id>"``)
+                # can see which rows did not land.
+                canonical_ids = ", ".join(r.canonical_id for r in rows)
                 warnings.append(
-                    f"embedding batch persist failed "
-                    f"({len(rows)} rows): {exc}"
+                    f"embedding batch persist failed for "
+                    f"{len(rows)} row(s) [{canonical_ids}]: {exc}"
                 )
         _log.debug(
             "EmbeddingService: refreshed=%d stale=%d total=%d warnings=%d",