Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ dist
.secrets
event.json
site/
.codex
40 changes: 35 additions & 5 deletions altk_evolve/backend/filesystem.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import datetime
import json
import logging
import os
import uuid
from pathlib import Path
from threading import Lock

from pydantic import Field
from pydantic import Field, ValidationError

from altk_evolve.backend.base import BaseEntityBackend
from altk_evolve.config.filesystem import FilesystemSettings, filesystem_settings
Expand Down Expand Up @@ -47,16 +48,45 @@ def _namespace_file(self, namespace_id: str) -> Path:
return self.data_dir / f"{namespace_id}.json"

def _load_namespace_data(self, namespace_id: str) -> FilesystemNamespace:
"""Load namespace data from JSON file."""
"""Load namespace data from JSON file.

Empty or corrupt files are treated as missing AND unlinked, so that a subsequent
create_namespace() call does not trip on the stale file and raise
NamespaceAlreadyExistsException, which would leave ensure_namespace() stuck.
"""
file_path = self._namespace_file(namespace_id)
if not file_path.exists():
raise NamespaceNotFoundException(f"Namespace `{namespace_id}` not found")
return FilesystemNamespace.model_validate(json.loads(file_path.read_text()))
raw = file_path.read_text()
if not raw.strip():
logger.warning("Namespace file %s is empty (likely an interrupted write); removing and treating as missing.", file_path)
file_path.unlink(missing_ok=True)
raise NamespaceNotFoundException(f"Namespace `{namespace_id}` not found")
try:
return FilesystemNamespace.model_validate(json.loads(raw))
except (json.JSONDecodeError, ValidationError) as e:
logger.warning("Namespace file %s is corrupt (%s); removing and treating as missing.", file_path, e)
file_path.unlink(missing_ok=True)
raise NamespaceNotFoundException(f"Namespace `{namespace_id}` not found") from e
Comment thread
coderabbitai[bot] marked this conversation as resolved.

def _save_namespace_data(self, namespace_id: str, data: FilesystemNamespace):
"""Save namespace data to JSON file."""
"""Save namespace data to JSON file atomically.

Why: Path.write_text truncates the file immediately and leaves it 0 bytes if the
process is interrupted mid-write (SIGTERM, kill, crash). Write to a uniquely-named
sibling tmp file and os.replace() so readers always see either the old or new
complete file. The uuid suffix keeps concurrent writers from the same data_dir
(e.g. CLI + MCP server) from clobbering each other's tmp file, which would cause
FileNotFoundError at os.replace time.
"""
file_path = self._namespace_file(namespace_id)
file_path.write_text(data.model_dump_json(indent=2))
tmp_path = file_path.with_suffix(f"{file_path.suffix}.tmp.{uuid.uuid4().hex}")
try:
tmp_path.write_text(data.model_dump_json(indent=2))
os.replace(tmp_path, file_path)
except Exception:
tmp_path.unlink(missing_ok=True)
raise

def ready(self) -> bool:
"""Check if the backend is healthy."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def _coerce_repo(entry):
return None
if not is_valid_repo_name(name.strip()):
print(
f"evolve-lite: ignoring repo entry {name!r} invalid name (only A-Z, a-z, 0-9, '.', '_', '-' allowed)",
f"evolve-lite: {name!r} (skipped - invalid subscription name) — only A-Z, a-z, 0-9, '.', '_', '-' allowed",
file=sys.stderr,
)
return None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def _coerce_repo(entry):
return None
if not is_valid_repo_name(name.strip()):
print(
f"evolve-lite: ignoring repo entry {name!r} invalid name (only A-Z, a-z, 0-9, '.', '_', '-' allowed)",
f"evolve-lite: {name!r} (skipped - invalid subscription name) — only A-Z, a-z, 0-9, '.', '_', '-' allowed",
file=sys.stderr,
)
return None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,16 +113,7 @@ def main():
remote=args.remote,
)
except Exception as exc:
repos.pop()
set_repos(cfg, repos)
try:
save_config(cfg, project_root)
except Exception:
pass
if dest.exists():
shutil.rmtree(dest)
print(f"Error: failed to record subscription — clone removed: {exc}", file=sys.stderr)
sys.exit(1)
print(f"Warning: failed to append audit entry for subscribe: {exc}", file=sys.stderr)

print(f"Subscribed to '{args.name}' (scope={args.scope}) from {args.remote}")

Expand Down
1 change: 1 addition & 0 deletions tests/e2e/test_e2e_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
TRAJECTORY = json.loads((Path(__file__).parent.parent / "fixtures" / "appworld_venmo_task_trajectory.json").read_text())


@pytest.mark.flaky(retries=2, delay=1)
def test_segment_trajectory_min_subtasks():
"""Real trajectory produces at least 7 subtasks (gold standard has 7)."""
subtasks = segment_trajectory(TRAJECTORY)
Expand Down
9 changes: 6 additions & 3 deletions tests/e2e/test_sharing.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,15 +122,18 @@ async def test_cross_namespace_public_discovery(mcp):
"get_entities",
{"task": "dependency injection", "entity_type": "guideline", "include_public": True},
)
assert "dependency injection" in with_public.content[0].text
assert "use dependency injection for testability" in with_public.content[0].text
assert "[public: alice]" in with_public.content[0].text

# Without include_public it must NOT appear (it lives in a different namespace)
# Without include_public it must NOT appear (it lives in a different namespace).
# Check for the entity content and owner marker — the query task echoes in
# the response header ("# Guidelines for: dependency injection") regardless of hits.
without_public = await client.call_tool_mcp(
"get_entities",
{"task": "dependency injection", "entity_type": "guideline", "include_public": False},
)
assert "dependency injection" not in without_public.content[0].text
assert "use dependency injection for testability" not in without_public.content[0].text
assert "[public: alice]" not in without_public.content[0].text
finally:
try:
second_client.delete_namespace(second_ns)
Expand Down
6 changes: 3 additions & 3 deletions tests/platform_integrations/test_bob_sharing.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ def test_skips_invalid_subscription_name(self, temp_project_dir):
cfg_path.write_text('repos:\n - name: "../outside"\n scope: "read"\n remote: "git@github.com:x/y.git"\n branch: "main"\n')
result = run_script(SYNC_SCRIPT, temp_project_dir, evolve_dir=evolve_dir)
assert result.returncode == 0
assert "invalid subscription name" in result.stdout
assert "invalid subscription name" in result.stderr
assert not (evolve_dir / "entities" / "subscribed" / "outside").exists()

def test_rejects_dot_and_double_dot_names(self, temp_project_dir):
Expand All @@ -386,12 +386,12 @@ def test_rejects_dot_and_double_dot_names(self, temp_project_dir):
cfg_path.write_text('repos:\n - name: "."\n scope: "read"\n remote: "git@github.com:x/y.git"\n branch: "main"\n')
result = run_script(SYNC_SCRIPT, temp_project_dir, evolve_dir=evolve_dir)
assert result.returncode == 0
assert "invalid subscription name" in result.stdout
assert "invalid subscription name" in result.stderr

cfg_path.write_text('repos:\n - name: ".."\n scope: "read"\n remote: "git@github.com:x/y.git"\n branch: "main"\n')
result = run_script(SYNC_SCRIPT, temp_project_dir, evolve_dir=evolve_dir)
assert result.returncode == 0
assert "invalid subscription name" in result.stdout
assert "invalid subscription name" in result.stderr

def test_removed_entity_disappears_after_sync(self, subscribed_project):
"""Entities deleted from a read-scope remote are removed from the mirror on next sync."""
Expand Down
2 changes: 1 addition & 1 deletion tests/platform_integrations/test_codex_sharing.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ def test_sync_skips_invalid_subscription_name(self, temp_project_dir):
)

assert result.returncode == 0
assert "'.' (skipped - invalid subscription name)" in result.stdout
assert "'.' (skipped - invalid subscription name)" in result.stderr
assert not (evolve_dir / "entities" / "subscribed").exists()

def test_sync_uses_workspace_config_with_custom_evolve_dir(self, temp_project_dir, local_repo):
Expand Down
2 changes: 1 addition & 1 deletion tests/platform_integrations/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def test_skips_invalid_subscription_name(self, temp_project_dir):
cfg_path.write_text("repos:\n - name: ../evil\n scope: read\n remote: git@github.com:x/y.git\n branch: main\n")
result = run_script(SYNC_SCRIPT, temp_project_dir, evolve_dir=evolve_dir)
assert result.returncode == 0
assert "invalid subscription name" in result.stdout
assert "invalid subscription name" in result.stderr
assert not (evolve_dir / "entities" / "evil").exists()

def test_manual_run_ignores_on_session_start_false(self, subscribed_project):
Expand Down
69 changes: 69 additions & 0 deletions tests/unit/test_filesystem_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from pathlib import Path

import pytest

from altk_evolve.backend.filesystem import FilesystemEntityBackend
from altk_evolve.config.evolve import EvolveConfig
from altk_evolve.config.filesystem import FilesystemSettings
from altk_evolve.frontend.client.evolve_client import EvolveClient


@pytest.fixture
def client(tmp_path: Path) -> EvolveClient:
return EvolveClient(config=EvolveConfig(backend="filesystem", settings=FilesystemSettings(data_dir=str(tmp_path))))


@pytest.fixture
def backend(tmp_path: Path) -> FilesystemEntityBackend:
return FilesystemEntityBackend(config=FilesystemSettings(data_dir=str(tmp_path)))


@pytest.mark.unit
def test_ensure_namespace_recovers_from_zero_byte_file(client: EvolveClient, tmp_path: Path):
stale = tmp_path / "ns_stale.json"
stale.write_text("")
assert stale.exists() and stale.stat().st_size == 0

ns = client.ensure_namespace("ns_stale")

assert ns.id == "ns_stale"
assert ns.num_entities == 0
assert stale.exists() and stale.stat().st_size > 0


@pytest.mark.unit
def test_ensure_namespace_recovers_from_corrupt_json(client: EvolveClient, tmp_path: Path):
corrupt = tmp_path / "ns_corrupt.json"
corrupt.write_text("{not json")

ns = client.ensure_namespace("ns_corrupt")

assert ns.id == "ns_corrupt"
assert ns.num_entities == 0


@pytest.mark.unit
def test_ensure_namespace_recovers_from_schema_invalid_json(client: EvolveClient, tmp_path: Path):
"""Valid JSON that doesn't match FilesystemNamespace must also trigger recovery,
not propagate pydantic.ValidationError and wedge startup."""
bogus = tmp_path / "ns_bogus.json"
bogus.write_text('{"not_a_namespace": true}')

ns = client.ensure_namespace("ns_bogus")

assert ns.id == "ns_bogus"
assert ns.num_entities == 0


@pytest.mark.unit
def test_save_tolerates_stale_shared_tmp(backend: FilesystemEntityBackend, tmp_path: Path):
"""Stale <ns>.json.tmp from an interrupted write must not block subsequent saves.

Regression guard for concurrent CLI+MCP writers that used to share the tmp name.
"""
(tmp_path / "ns_busy.json.tmp").write_text("leftover")

backend.create_namespace("ns_busy")

target = tmp_path / "ns_busy.json"
assert target.exists() and target.stat().st_size > 0
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Loading