diff --git a/scripts/mneme_compile_memory.py b/scripts/mneme_compile_memory.py index aed9e9c..121475c 100755 --- a/scripts/mneme_compile_memory.py +++ b/scripts/mneme_compile_memory.py @@ -58,7 +58,10 @@ r"\bbounty\b", r"\bissue\b", r"\bprefers\b", r"\bavoid\b", r"\bno public\b", r"\bproof summaries\b", r"\bdecision support\b", ], - "headingHints": [r"systems", r"infrastructure", r"key infrastructure", r"access", r"memory stack"], + "headingHints": [ + r"systems", r"infrastructure", r"key infrastructure", r"access", r"memory stack", + r"server access", r"services on the box", + ], }, "decisions": { "include": [ @@ -120,7 +123,7 @@ } HEADING_BUCKET_HINTS = { "projects": [r"active projects", r"durable project facts", r"bdeep", r"yibin", r"aqua", r"mneme"], - "systems": [r"key infrastructure", r"systems", r"access", r"memory stack"], + "systems": [r"key infrastructure", r"systems", r"access", r"memory stack", r"server access", r"services on the box"], "decisions": [r"durable decisions", r"preferences", r"methods", r"hard constraints"], "incidents": [r"incidents", r"warnings"], "people": [r"identity", r"user", r"people", r"profile"], @@ -138,7 +141,7 @@ "conversation summary", "source files", "sources", "tools", "methods", "pending", "what it does", "what it is", "what it is not", "goal", "status", "current status", "stable assumptions", "recommended next step", "practical interpretation", - "docs", "automation", "runtime orchestration", "continuation guide", + "docs", "automation", "runtime orchestration", "continuation guide", "build", } GENERIC_SECTION_PATTERNS = [ re.compile(r"\bactive projects\b", re.I), @@ -322,6 +325,8 @@ def is_generic_or_noise_title(text: str) -> bool: return True if norm in GENERIC_TITLES: return True + if any(p.search(norm) for p in GENERIC_SECTION_PATTERNS): + return True if re.fullmatch(r"20\d{2} \d{2} \d{2}", norm): return True return False @@ -337,8 +342,12 @@ def is_low_value_item(item: SourceLine, category: str | None = None) -> bool: return True if is_heading_only_text(text): return True - if item.kind == "note_section" and not body_lines(text): - return True + if item.kind == "note_section": + section_title = extract_section_title(item) + if section_title and is_generic_or_noise_title(section_title): + return True + if not body_lines(text): + return True if is_bulky_section_dump(item): return True body = first_body_line(text) @@ -355,7 +364,7 @@ def heading_bucket(item: SourceLine) -> tuple[str | None, int]: score = 0 for category, pats in HEADING_BUCKET_HINTS.items(): s = sum(1 for pat in pats if re.search(pat, heading_text)) - if s > score: + if s > score or (s == score and s > 0 and best and CATEGORY_PRIORITY[category] > CATEGORY_PRIORITY[best]): best, score = category, s return best, score diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 89c5f2b..cc48c8a 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -28,22 +28,7 @@ def make_workspace(self) -> Path: self.addCleanup(lambda: shutil.rmtree(tmp, ignore_errors=True)) return ws - def test_ingest_fixture_workspace(self) -> None: - ws = self.make_workspace() - raw_out = ws / 'raw-out' - data = run_json([ - sys.executable, - str(SCRIPTS / 'mneme_ingest_memory.py'), - '--root', str(ws), - '--out', str(raw_out), - ]) - self.assertGreaterEqual(data['sourceCount'], 2) - self.assertGreater(data['itemCount'], 0) - self.assertTrue((raw_out / 'sources.jsonl').exists()) - self.assertTrue((raw_out / 'items.jsonl').exists()) - - def test_compile_fixture_raw_into_outputs(self) -> None: - ws = self.make_workspace() + def compile_workspace(self, ws: Path) -> Path: raw_out = ws / 'raw-out' compiled_out = ws / 'compiled-out' run_json([ @@ -61,14 +46,70 @@ def test_compile_fixture_raw_into_outputs(self) -> None: ], capture_output=True, text=True) if cp.returncode != 0: raise AssertionError(f"Compile failed ({cp.returncode})\nSTDOUT:\n{cp.stdout}\nSTDERR:\n{cp.stderr}") + return compiled_out + + def test_ingest_fixture_workspace(self) -> None: + ws = self.make_workspace() + raw_out = ws / 'raw-out' + data = run_json([ + sys.executable, + str(SCRIPTS / 'mneme_ingest_memory.py'), + '--root', str(ws), + '--out', str(raw_out), + ]) + self.assertGreaterEqual(data['sourceCount'], 2) + self.assertGreater(data['itemCount'], 0) + self.assertTrue((raw_out / 'sources.jsonl').exists()) + self.assertTrue((raw_out / 'items.jsonl').exists()) + + def test_compile_fixture_raw_into_outputs(self) -> None: + ws = self.make_workspace() + compiled_out = self.compile_workspace(ws) + projects_text = (compiled_out / 'projects.md').read_text() + people_text = (compiled_out / 'people.md').read_text() self.assertTrue((compiled_out / 'projects.md').exists()) self.assertTrue((compiled_out / 'people.md').exists()) self.assertTrue((compiled_out / 'timeline.md').exists()) - self.assertIn('Project Alpha', (compiled_out / 'projects.md').read_text()) - self.assertIn('Compiled Memory — People', (compiled_out / 'people.md').read_text()) + self.assertIn('Project Alpha', projects_text) + self.assertNotIn('## Active Projects', projects_text) + self.assertIn('Compiled Memory — People', people_text) + self.assertIn('What to call them', people_text) self.assertTrue((compiled_out / 'documents.jsonl').exists()) self.assertTrue((compiled_out / 'entries.jsonl').exists()) + def test_compile_demotes_generic_sections_and_prefers_system_subheadings(self) -> None: + ws = self.make_workspace() + extra_note = ws / 'memory' / '2026-04-01-routing.md' + extra_note.write_text( + '# 2026-04-01\n\n' + '## Aqua-CQ Project\n\n' + '### Server Access\n' + '- Hostname: iv-ydyut13e9ss6ipm2he1t\n' + '- Ubuntu 24.04.4, 8GB RAM, 40GB disk\n' + '- Project at /opt/aqua-cq (backend only), frontend deployed to /var/www/html/cq/\n\n' + '### Three services on the box\n' + '- :8003 — aqua-qdh (backend-dev, /opt/aqua-qdh)\n\n' + '## Deploy process (correct)\n\n' + '```bash\n' + '# Build\n' + 'export PATH="$HOME/.openclaw/tools/node-v22.22.0/bin:$PATH"\n' + 'cd /opt/aqua-cq && npm run build\n' + '```\n', + encoding='utf-8', + ) + + compiled_out = self.compile_workspace(ws) + projects_text = (compiled_out / 'projects.md').read_text() + systems_text = (compiled_out / 'systems.md').read_text() + + self.assertNotIn('## Build', projects_text) + self.assertNotIn('## Hostname', projects_text) + self.assertNotIn('## Ubuntu 24.04.4, 8GB RAM, 40GB disk', projects_text) + self.assertNotIn('## :8003 — aqua-qdh (backend-dev, /opt/aqua-qdh)', projects_text) + self.assertIn('## Server Access — Hostname', systems_text) + self.assertIn('## Ubuntu 24.04.4, 8GB RAM, 40GB disk', systems_text) + self.assertIn('## :8003 — aqua-qdh (backend-dev, /opt/aqua-qdh)', systems_text) + def test_runtime_prepare_people_category(self) -> None: ws = self.make_workspace() raw_out = ws / 'runtime-raw'