Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions scripts/mneme_compile_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@
r"\bbounty\b", r"\bissue\b", r"\bprefers\b", r"\bavoid\b", r"\bno public\b",
r"\bproof summaries\b", r"\bdecision support\b",
],
"headingHints": [r"systems", r"infrastructure", r"key infrastructure", r"access", r"memory stack"],
"headingHints": [
r"systems", r"infrastructure", r"key infrastructure", r"access", r"memory stack",
r"server access", r"services on the box",
],
},
"decisions": {
"include": [
Expand Down Expand Up @@ -120,7 +123,7 @@
}
HEADING_BUCKET_HINTS = {
"projects": [r"active projects", r"durable project facts", r"bdeep", r"yibin", r"aqua", r"mneme"],
"systems": [r"key infrastructure", r"systems", r"access", r"memory stack"],
"systems": [r"key infrastructure", r"systems", r"access", r"memory stack", r"server access", r"services on the box"],
"decisions": [r"durable decisions", r"preferences", r"methods", r"hard constraints"],
"incidents": [r"incidents", r"warnings"],
"people": [r"identity", r"user", r"people", r"profile"],
Expand All @@ -138,7 +141,7 @@
"conversation summary", "source files", "sources", "tools", "methods", "pending",
"what it does", "what it is", "what it is not", "goal", "status",
"current status", "stable assumptions", "recommended next step", "practical interpretation",
"docs", "automation", "runtime orchestration", "continuation guide",
"docs", "automation", "runtime orchestration", "continuation guide", "build",
}
GENERIC_SECTION_PATTERNS = [
re.compile(r"\bactive projects\b", re.I),
Expand Down Expand Up @@ -322,6 +325,8 @@ def is_generic_or_noise_title(text: str) -> bool:
return True
if norm in GENERIC_TITLES:
return True
if any(p.search(norm) for p in GENERIC_SECTION_PATTERNS):
return True
if re.fullmatch(r"20\d{2} \d{2} \d{2}", norm):
return True
return False
Expand All @@ -337,8 +342,12 @@ def is_low_value_item(item: SourceLine, category: str | None = None) -> bool:
return True
if is_heading_only_text(text):
return True
if item.kind == "note_section" and not body_lines(text):
return True
if item.kind == "note_section":
section_title = extract_section_title(item)
if section_title and is_generic_or_noise_title(section_title):
return True
if not body_lines(text):
return True
if is_bulky_section_dump(item):
return True
body = first_body_line(text)
Expand All @@ -355,7 +364,7 @@ def heading_bucket(item: SourceLine) -> tuple[str | None, int]:
score = 0
for category, pats in HEADING_BUCKET_HINTS.items():
s = sum(1 for pat in pats if re.search(pat, heading_text))
if s > score:
if s > score or (s == score and s > 0 and best and CATEGORY_PRIORITY[category] > CATEGORY_PRIORITY[best]):
best, score = category, s
return best, score

Expand Down
77 changes: 59 additions & 18 deletions tests/test_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,22 +28,7 @@ def make_workspace(self) -> Path:
self.addCleanup(lambda: shutil.rmtree(tmp, ignore_errors=True))
return ws

def test_ingest_fixture_workspace(self) -> None:
ws = self.make_workspace()
raw_out = ws / 'raw-out'
data = run_json([
sys.executable,
str(SCRIPTS / 'mneme_ingest_memory.py'),
'--root', str(ws),
'--out', str(raw_out),
])
self.assertGreaterEqual(data['sourceCount'], 2)
self.assertGreater(data['itemCount'], 0)
self.assertTrue((raw_out / 'sources.jsonl').exists())
self.assertTrue((raw_out / 'items.jsonl').exists())

def test_compile_fixture_raw_into_outputs(self) -> None:
ws = self.make_workspace()
def compile_workspace(self, ws: Path) -> Path:
raw_out = ws / 'raw-out'
compiled_out = ws / 'compiled-out'
run_json([
Expand All @@ -61,14 +46,70 @@ def test_compile_fixture_raw_into_outputs(self) -> None:
], capture_output=True, text=True)
if cp.returncode != 0:
raise AssertionError(f"Compile failed ({cp.returncode})\nSTDOUT:\n{cp.stdout}\nSTDERR:\n{cp.stderr}")
return compiled_out

def test_ingest_fixture_workspace(self) -> None:
ws = self.make_workspace()
raw_out = ws / 'raw-out'
data = run_json([
sys.executable,
str(SCRIPTS / 'mneme_ingest_memory.py'),
'--root', str(ws),
'--out', str(raw_out),
])
self.assertGreaterEqual(data['sourceCount'], 2)
self.assertGreater(data['itemCount'], 0)
self.assertTrue((raw_out / 'sources.jsonl').exists())
self.assertTrue((raw_out / 'items.jsonl').exists())

def test_compile_fixture_raw_into_outputs(self) -> None:
ws = self.make_workspace()
compiled_out = self.compile_workspace(ws)
projects_text = (compiled_out / 'projects.md').read_text()
people_text = (compiled_out / 'people.md').read_text()
self.assertTrue((compiled_out / 'projects.md').exists())
self.assertTrue((compiled_out / 'people.md').exists())
self.assertTrue((compiled_out / 'timeline.md').exists())
self.assertIn('Project Alpha', (compiled_out / 'projects.md').read_text())
self.assertIn('Compiled Memory — People', (compiled_out / 'people.md').read_text())
self.assertIn('Project Alpha', projects_text)
self.assertNotIn('## Active Projects', projects_text)
self.assertIn('Compiled Memory — People', people_text)
self.assertIn('What to call them', people_text)
self.assertTrue((compiled_out / 'documents.jsonl').exists())
self.assertTrue((compiled_out / 'entries.jsonl').exists())

def test_compile_demotes_generic_sections_and_prefers_system_subheadings(self) -> None:
ws = self.make_workspace()
extra_note = ws / 'memory' / '2026-04-01-routing.md'
extra_note.write_text(
'# 2026-04-01\n\n'
'## Aqua-CQ Project\n\n'
'### Server Access\n'
'- Hostname: iv-ydyut13e9ss6ipm2he1t\n'
'- Ubuntu 24.04.4, 8GB RAM, 40GB disk\n'
'- Project at /opt/aqua-cq (backend only), frontend deployed to /var/www/html/cq/\n\n'
'### Three services on the box\n'
'- :8003 — aqua-qdh (backend-dev, /opt/aqua-qdh)\n\n'
'## Deploy process (correct)\n\n'
'```bash\n'
'# Build\n'
'export PATH="$HOME/.openclaw/tools/node-v22.22.0/bin:$PATH"\n'
'cd /opt/aqua-cq && npm run build\n'
'```\n',
encoding='utf-8',
)

compiled_out = self.compile_workspace(ws)
projects_text = (compiled_out / 'projects.md').read_text()
systems_text = (compiled_out / 'systems.md').read_text()

self.assertNotIn('## Build', projects_text)
self.assertNotIn('## Hostname', projects_text)
self.assertNotIn('## Ubuntu 24.04.4, 8GB RAM, 40GB disk', projects_text)
self.assertNotIn('## :8003 — aqua-qdh (backend-dev, /opt/aqua-qdh)', projects_text)
self.assertIn('## Server Access — Hostname', systems_text)
self.assertIn('## Ubuntu 24.04.4, 8GB RAM, 40GB disk', systems_text)
self.assertIn('## :8003 — aqua-qdh (backend-dev, /opt/aqua-qdh)', systems_text)

def test_runtime_prepare_people_category(self) -> None:
ws = self.make_workspace()
raw_out = ws / 'runtime-raw'
Expand Down