Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md

Large diffs are not rendered by default.

80 changes: 76 additions & 4 deletions atomic_agents/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@
PersonaBackend,
get_default_persona_backend,
)
from .corpus import (
CorpusBackend,
get_default_corpus_backend,
)
from .logs.types import (
PRIMITIVE_AGENT_CALL,
PRIMITIVE_CAPTURE,
Expand Down Expand Up @@ -252,9 +256,16 @@ class AtomicAgent:
# PR 2). Without this, static analysis would narrow
# ``agent.persona_backend`` to the concrete
# ``FilesystemPersonaBackend`` default rather than treating it as any
# ``PersonaBackend`` Protocol implementer breaking the
# ``PersonaBackend`` Protocol implementer -- breaking the
# operator-pinned-SaaS/Postgres/git-backed case PR 3 forward.
persona_backend: PersonaBackend
# Same class-level annotation rationale for ``corpus_backend`` (#65
# PR 3). Without this, static analysis would narrow
# ``agent.corpus_backend`` to the concrete
# ``FilesystemCorpusBackend`` default rather than treating it as any
# ``CorpusBackend`` Protocol implementer -- breaking the
# operator-pinned-SQLite/pgvector case PR 3 forward.
corpus_backend: CorpusBackend
"""The main agent runtime.

Responsible for:
Expand All @@ -281,6 +292,7 @@ def __init__(
mandate_backend: MandateBackend | None = None,
policy_backend: PolicyBackend | None = None,
persona_backend: PersonaBackend | None = None,
corpus_backend: CorpusBackend | None = None,
):
self.name = name
self.trigger = trigger
Expand Down Expand Up @@ -437,6 +449,21 @@ def __init__(
# the constructor kwarg (the kwarg is no longer in scope there).
self._persona_backend_was_explicit = _persona_backend_was_explicit

# ── CorpusBackend resolution (#65 PR 3) ──────────────────────────
# Mirrors PersonaBackend's _persona_backend_was_explicit pattern.
# Corpus is per-agent semantic context (wiki + raw), NOT fleet-scoped
# like Policy or AgentProfile. Default-resolved backends do NOT thread
# to delegates (D-ER-2 corollary). Operators wanting a shared corpus
# across a coordinator and delegates pass corpus_backend= explicitly.
_corpus_backend_was_explicit = corpus_backend is not None
if corpus_backend is None:
self.corpus_backend = get_default_corpus_backend(self.agent_root)
else:
self.corpus_backend = corpus_backend
# Saved on self so delegate() can consult it without re-checking
# the constructor kwarg (the kwarg is no longer in scope there).
self._corpus_backend_was_explicit = _corpus_backend_was_explicit

# ── Mandate crash recovery + reservation managers (#124 PR 3b) ──────
# Per spec/29 §"Crash recovery for reservations" + plan-subagent
# Risks 8 (invocation site = agent init) + 9 (multi-scope iteration).
Expand Down Expand Up @@ -2934,9 +2961,52 @@ def _load_indexes(self) -> None:
summary = self.memory.render_index_summary()
if summary and summary.strip() != "# Memory Index\n":
self._memory_index_text = summary
wiki_index = self.agent_root / "wiki" / "INDEX.md"
if wiki_index.exists():
self._wiki_index_text = wiki_index.read_text(encoding="utf-8")
if self.corpus_backend is not None:
# Route through Protocol. After PR 3 default-resolution at
# __init__, this is the common production path. Broad except
# mirrors the legacy direct-read soft-degrade so a transient
# backend failure (OSError, UnicodeDecodeError, sqlite3.*,
# CorpusError, or any custom-backend exception) does not crash
# agent construction. The empty wiki section is observable via
# the logged warning marker wiki_index_unreadable.
try:
self._wiki_index_text = self.corpus_backend.render_index_summary(
corpus="wiki"
)
except Exception as exc:
_logger.warning(
"wiki_index_unreadable backend=%s agent_root=%s cause=%s",
type(self.corpus_backend).__name__,
self.agent_root,
exc,
)
self._wiki_index_text = ""
else:
# Legacy direct-read fallback. NOTE: after PR 3, this branch is
# unreachable in production because AtomicAgent.__init__ always
# default-resolves corpus_backend via get_default_corpus_backend.
# Retained as a safety net for any future refactor that removes
# the auto-resolve. Tests in test_corpus_migration_regression.py
# force corpus_backend=None post-construction to exercise this
# branch's byte-identity and OSError soft-degrade guarantees.
# Round 3 finding R3-F1: this branch does NOT catch
# UnicodeDecodeError. The Protocol path handles it inside
# FilesystemCorpusBackend.render_index_summary (see
# corpus/filesystem.py:699-715). If this branch is ever
# re-activated for production, add a UnicodeDecodeError catch
# matching the Protocol path's partial-content soft-degrade.
wiki_index = self.agent_root / "wiki" / "INDEX.md"
if wiki_index.exists():
try:
self._wiki_index_text = wiki_index.read_text(encoding="utf-8")
except OSError as exc:
_logger.warning(
"wiki_index_unreadable agent_root=%s path=%s cause=%s",
self.agent_root,
wiki_index,
exc,
)
self._wiki_index_text = ""

def _load_pinned_notes(self) -> None:
if not (self.agent_root / "memory").exists():
Expand Down Expand Up @@ -4577,6 +4647,8 @@ def delegate(
}
if self._persona_backend_was_explicit:
_delegate_kwargs["persona_backend"] = self.persona_backend
if self._corpus_backend_was_explicit:
_delegate_kwargs["corpus_backend"] = self.corpus_backend
target_agent = AtomicAgent(**_delegate_kwargs)

start = time.time()
Expand Down
75 changes: 68 additions & 7 deletions atomic_agents/bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,18 @@
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import TYPE_CHECKING

from . import _cascade
from ._io import atomic_write

if TYPE_CHECKING:
# Imported under TYPE_CHECKING to avoid circular imports -- bundle.py is
# loaded early and corpus may not yet be available in all import paths.
# All runtime references use the string annotation "CorpusBackend | None".
# (PR 3 wiring)
from .corpus.backend import CorpusBackend


SECTION_SEPARATOR = "\n\n═══════════════════════════\n\n"

Expand Down Expand Up @@ -106,6 +114,7 @@ def render_bundle(
cache_dir: Path | None = None,
extra_files: list[Path] | None = None,
if_stale: bool = False,
corpus_backend: "CorpusBackend | None" = None,
) -> BundleResult:
"""Render the cascade for *agent_root* into a single bundled file.

Expand Down Expand Up @@ -163,7 +172,7 @@ def render_bundle(
source_count=sum(1 for p in sources if p.is_file()),
)

sections = _render_sections(agent_root, all_extras)
sections = _render_sections(agent_root, all_extras, corpus_backend=corpus_backend)
header = _render_header(agent_root, sources)
body = SECTION_SEPARATOR.join(s for s in sections if s)
content = header + "\n\n" + body + "\n\n<!-- end bundle -->\n"
Expand Down Expand Up @@ -263,6 +272,7 @@ def _collect_extras(
# Source enumeration (for staleness tracking)


# TODO(v1.1): _source_paths returns filesystem paths for staleness tracking. SQLite backends have no equivalent path to track. See #65 PR 4 follow-up issue (to be filed at arc closer).
def _source_paths(agent_root: Path) -> list[Path]:
"""Enumerate every cascade source file whose mtime should drive staleness."""
paths: list[Path] = []
Expand Down Expand Up @@ -350,12 +360,20 @@ def _staleness_paths(agent_root: Path) -> list[Path]:
# Section rendering


def _render_sections(agent_root: Path, extras: list[Path]) -> list[str]:
def _render_sections(
agent_root: Path,
extras: list[Path],
corpus_backend: "CorpusBackend | None" = None,
) -> list[str]:
"""Build the ordered list of bundle sections per spec/04 + spec/06.

Section headers mirror spec/04 §"Cache breakpoints" so a future caller
that wants to map sections back to Anthropic prompt-cache breakpoints
can parse them.

``corpus_backend`` is threaded to ``_render_memory_breakpoint`` so PR 3
wiring can route wiki INDEX reads through the Protocol when available.
Defaults to ``None`` for full backward compatibility.
"""
cascade = _cascade.detect_cascade(agent_root)
sections: list[str] = ["# === BREAKPOINT 1: Stable cascade ==="]
Expand All @@ -371,7 +389,9 @@ def _render_sections(agent_root: Path, extras: list[Path]) -> list[str]:
sections.append(_render_file_section(p, label=f"Extra · {p.name}"))

instance_root = cascade.instance_root if cascade else agent_root
sections.extend(_render_memory_breakpoint(instance_root))
sections.extend(
_render_memory_breakpoint(instance_root, corpus_backend=corpus_backend)
)
sections.extend(_render_recent_notes_breakpoint(instance_root))
sections.extend(_render_journal_breakpoint(instance_root))

Expand Down Expand Up @@ -491,8 +511,30 @@ def _render_flat(agent_root: Path) -> list[str]:
return out


def _render_memory_breakpoint(instance_root: Path) -> list[str]:
"""Render memory INDEX + pinned + wiki INDEX (BP1 trailing or BP2)."""
def _render_wiki_index_section(label: str, path: Path, content: str) -> str:
"""Render wiki INDEX content into the standard bundle section format.

Both the corpus_backend Protocol path and the legacy direct-read path
call this helper so the output is byte-for-byte identical regardless of
which path produced the content. Matches ``_render_file_section``'s
``## {label}\\n`{path}`\\n\\n{body}`` shape exactly. The ``path`` is
always derivable from ``instance_root`` (``instance_root / "wiki" /
"INDEX.md"``) regardless of whether the content arrived through the
Protocol or a direct file read. (PR 3 wiring; IRON RULE assertion 4)
"""
return f"## {label}\n`{path}`\n\n{content}"


def _render_memory_breakpoint(
instance_root: Path,
corpus_backend: "CorpusBackend | None" = None,
) -> list[str]:
"""Render memory INDEX + pinned + wiki INDEX (BP1 trailing or BP2).

``corpus_backend`` threads the CorpusBackend Protocol for wiki INDEX
reads when available (PR 3 wiring). When ``None``, falls back to the
legacy direct-file read via ``_render_file_section``.
"""
memory_dir = instance_root / "memory"
wiki_dir = instance_root / "wiki"

Expand All @@ -506,8 +548,27 @@ def _render_memory_breakpoint(instance_root: Path) -> list[str]:
if pinned:
out.append("## Memory · Pinned atomic notes\n\n" + "\n\n---\n\n".join(pinned))

if (wiki_dir / "INDEX.md").is_file():
out.append(_render_file_section(wiki_dir / "INDEX.md", label="Wiki · INDEX.md"))
# Wiki INDEX: route through CorpusBackend Protocol when available (PR 3
# wiring). Both branches call _render_wiki_index_section with the same
# logical path so output is byte-identical between corpus_backend=None
# and corpus_backend=FilesystemCorpusBackend(...) (IRON RULE assertion 4).
# Both branches apply .strip() to match _render_file_section's
# _safe_read_text(...).strip() behavior. Skip the section when the
# content is empty (no file or empty file), matching the existing
# "skip empty wiki" behavior.
wiki_label = "Wiki · INDEX.md"
wiki_path = wiki_dir / "INDEX.md"
if corpus_backend is not None:
wiki_content = corpus_backend.render_index_summary(corpus="wiki").strip()
if wiki_content:
out.append(_render_wiki_index_section(wiki_label, wiki_path, wiki_content))
else:
if wiki_path.is_file():
wiki_content = _safe_read_text(wiki_path).strip()
if wiki_content:
out.append(
_render_wiki_index_section(wiki_label, wiki_path, wiki_content)
)

if out:
return ["# === BREAKPOINT 2: Weekly (INDEXes + pinned) ==="] + out
Expand Down
13 changes: 7 additions & 6 deletions atomic_agents/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -872,19 +872,20 @@ def _resolve_corpus_agent_root(args) -> Path:
def _cmd_corpus(args) -> int:
"""Dispatch corpus subcommands.

All corpus subcommands instantiate ``FilesystemCorpusBackend(agent_root)``
directly. Env-var resolution lives in
``atomic_agents/corpus/__init__.py:get_default_corpus_backend``.
Instantiates the operator-pinned backend via
``get_default_corpus_backend(agent_root)``, which reads
``ATOMIC_AGENTS_CORPUS_BACKEND`` (default ``"filesystem"``) so the CLI
surface honours the same env-var override as the runtime.

Exit codes: 0 on success, 1 on any error (CorpusError, OSError,
PermissionError, etc.). Errors go to stderr; normal output to stdout.
Zero LLM calls pure local I/O.
Zero LLM calls -- pure local I/O.
"""
from .corpus.filesystem import FilesystemCorpusBackend
from .corpus import get_default_corpus_backend
from .exceptions import CorpusError

agent_root = _resolve_corpus_agent_root(args)
backend = FilesystemCorpusBackend(agent_root)
backend = get_default_corpus_backend(agent_root)
corpus_cmd = args.corpus_cmd

try:
Expand Down
86 changes: 78 additions & 8 deletions atomic_agents/corpus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,26 +197,96 @@ def get_default_corpus_backend(agent_root: Path) -> CorpusBackend:
SQLite / Postgres / pgvector backends plug in via the same key
without operators having to relearn the env vocabulary.

An empty string (or whitespace-only) value for
``ATOMIC_AGENTS_CORPUS_BACKEND`` is treated as "not set" and falls
back to the filesystem default. This guards against shell
``export ATOMIC_AGENTS_CORPUS_BACKEND=`` accidents without masking
an accidental URL paste -- the doctor (Stream E) surfaces the case
where ``ATOMIC_AGENTS_CORPUS_BACKEND_URL`` is set but
``ATOMIC_AGENTS_CORPUS_BACKEND`` is unset, emitting a WARN so the
operator can correct the misconfiguration.

The ``agent_root`` parameter is honored by the filesystem backend
(wiki/ and raw/ subdirs live under that path); future distributed
backends ignore it in favor of the table-prefix or key-prefix
scoping inherent to their storage.
(wiki/ and raw/ subdirs live under that path) and by the sqlite
backend when no URL is supplied (db path defaults to
``<agent_root>/.corpus.db``). Future distributed backends ignore it
in favor of the table-prefix or key-prefix scoping inherent to
their storage.

For programmatic operators who want to construct the backend
themselves (custom database connection, custom path, etc.), the
``AtomicAgent(..., corpus_backend=...)`` constructor kwarg (wired
in PR 3) bypasses this factory entirely.

See spec/34 for the full env-var reference + the env-var-vs-kwarg
trade-off rationale.
See spec/34 §"Operator override surface" for the full env-var
reference + the env-var-vs-kwarg trade-off rationale.
"""
raw_backend_id = (
os.environ.get("ATOMIC_AGENTS_CORPUS_BACKEND", "filesystem").strip().lower()
)
raw_backend_id = os.environ.get("ATOMIC_AGENTS_CORPUS_BACKEND", "").strip().lower()

# Change 2: empty string (or whitespace-only) treated as "not set";
# falls through to the filesystem branch below. Matches the shell
# ``export ATOMIC_AGENTS_CORPUS_BACKEND=`` accident case.
if not raw_backend_id:
raw_backend_id = "filesystem"

if raw_backend_id == "filesystem":
# Change 3: filesystem URL support (spec/34 line 472 parity).
# When ATOMIC_AGENTS_CORPUS_BACKEND_URL is set alongside
# ATOMIC_AGENTS_CORPUS_BACKEND=filesystem, route through the URL
# factory so operators can supply a non-default agent_root path.
# When no URL is set, use the legacy direct construction -- this
# preserves byte-identical pre-#65 behavior for all existing agents.
url = os.environ.get("ATOMIC_AGENTS_CORPUS_BACKEND_URL", "").strip()
if url:
return make_filesystem_corpus_backend_from_url(url)
return FilesystemCorpusBackend(agent_root)

# Change 1: SQLite branch (spec/34 §"Operator override surface").
# Mirrors profile/__init__.py:227-235 exactly. When no URL is set,
# defaults to sqlite:///<agent_root>/.corpus.db?agent_scope=<agent_root.name>
# so single-host operators get a working default by flipping ONE env var.
if raw_backend_id == "sqlite":
url = os.environ.get("ATOMIC_AGENTS_CORPUS_BACKEND_URL", "").strip()
if not url:
# Build the default URL from agent_root. Require a non-empty
# name component -- a root path (e.g., Path("/")) has an empty
# name and would produce a meaningless agent_scope.
if not agent_root.name:
raise CorpusBackendNotRegistered(
f"ATOMIC_AGENTS_CORPUS_BACKEND=sqlite default requires "
f"agent_root with a non-empty name (got {agent_root}). "
f"Set ATOMIC_AGENTS_CORPUS_BACKEND_URL to override."
)
# URL-encode agent_root.name so names containing URL metacharacters
# (spaces, +, &, ?, =) don't silently corrupt the agent_scope or
# raise ValueError from the URL factory's query-parameter parser.
# Without quote_plus, an agent named "my+agent" would have its
# agent_scope decoded as "my agent" (parse_qsl interprets + as
# space), causing cross-scope contamination with another agent
# genuinely named "my agent".
from urllib.parse import quote_plus

db_path = agent_root / ".corpus.db"
url = f"sqlite:///{db_path}?agent_scope={quote_plus(agent_root.name)}"
try:
return make_sqlite_corpus_backend_from_url(url)
except CorpusBackendNotRegistered:
raise
except Exception as e:
# Broad catch (mirrors doctor.check_corpus_backend) so any
# construction failure becomes a clean operator-facing
# CorpusBackendNotRegistered with the URL remedy. Covers OSError /
# PermissionError (read-only mount, non-existent parent dir),
# ValueError (malformed URL, invalid agent_scope charset), and
# sqlite3.OperationalError (db locked at first connection, WAL
# transition failure on NFS) without leaking raw library exceptions.
raise CorpusBackendNotRegistered(
f"ATOMIC_AGENTS_CORPUS_BACKEND=sqlite: cannot create db "
f"(cause: {type(e).__name__}: {e!s}). Set "
f"ATOMIC_AGENTS_CORPUS_BACKEND_URL=sqlite:///path/to/corpus.db "
f"to use a different path."
) from e

# Unknown backend_id -- surface a fail-fast error with the FULL
# known-id list so operators can spot the typo. Credential safety:
# ``raw_backend_id`` is sanitized before interpolation in case an
Expand Down
Loading