clay-good · clay-good · Apr 6, 2026 · Mar 28, 2026 · Mar 28, 2026 · Mar 28, 2026
diff --git a/.codelicious/BUILD_COMPLETE b/.codelicious/BUILD_COMPLETE
@@ -0,0 +1 @@
+DONE
diff --git a/.codelicious/STATE.md b/.codelicious/STATE.md
diff --git a/.codelicious/cache.json b/.codelicious/cache.json
@@ -0,0 +1 @@
+{"file_hashes": {}, "ast_exports": {}}
diff --git a/.codelicious/review_performance.json b/.codelicious/review_performance.json
@@ -0,0 +1,122 @@
+[
+  {
+    "severity": "P2",
+    "file": "src/codelicious/context/rag_engine.py",
+    "line": 297,
+    "title": "Full table scan with Python-side cosine similarity on every semantic search",
+    "description": "semantic_search fetches ALL rows from file_chunks via cursor.execute('SELECT ... FROM file_chunks') at line 297 and computes cosine similarity in pure Python for each row. The heap-based top-k (O(n log k)) is good, but the underlying brute-force scan is O(n*d) where d=384. For a codebase with 10K chunks this means 10K struct unpacks + 10K dot products per query. No index, no pruning, no early termination.",
+    "fix": "Short term: add a WHERE clause to filter by vector_norm range (skip chunks whose norm is too far from the query's) to prune obvious non-matches before the Python loop. Long term: migrate to sqlite-vss or a dedicated vector extension for approximate nearest-neighbor search."
+  },
+  {
+    "severity": "P2",
+    "file": "src/codelicious/logger.py",
+    "line": 199,
+    "title": "30+ sequential regex substitutions when any secret indicator substring matches",
+    "description": "When the substring pre-filter at line 194 detects any of ~50 indicator substrings (including common words like 'password', 'Bearer', 'authorization'), ALL 30+ compiled regexes are applied sequentially at lines 199-201. Each .sub() scans the entire message string. The SanitizingFilter at line 213 runs this on every log record that passes the pre-filter. A traceback containing 'password' in a variable name triggers 30+ full-string regex scans.",
+    "fix": "Map each indicator substring to the specific regex subset that could match it (e.g., 'password' only needs the sensitive-context pattern, not all 30+). This reduces the work from 30+ regex scans to 2-3 when a single indicator triggers."
+  },
+  {
+    "severity": "P2",
+    "file": "src/codelicious/context/rag_engine.py",
+    "line": 259,
+    "title": "Individual SQL INSERTs in a loop instead of executemany for chunk ingestion",
+    "description": "ingest_file iterates chunks at line 259 and calls cursor.execute('INSERT ...') individually for each chunk-vector pair. For a file that produces 100 chunks, this means 100 individual cursor.execute calls inside the transaction. SQLite's executemany() batches parameter binding and statement preparation, typically achieving 2-5x throughput improvement over individual execute() calls in a loop. During full codebase indexing (hundreds of files), this adds up significantly.",
+    "fix": "Collect rows into a list of tuples, then use cursor.executemany('INSERT INTO file_chunks (...) VALUES (?, ?, ?, ?, ?)', rows) in a single call. Pre-compute norms and blobs in the loop, append to a batch list, then execute once."
+  },
+  {
+    "severity": "P3",
+    "file": "src/codelicious/loop_controller.py",
+    "line": 276,
+    "title": "Double json.dumps serialization of large tool result for logging",
+    "description": "At line 269, tool_result is serialized with json.dumps(tool_result). If the result exceeds MAX_TOOL_RESULT_BYTES, the string is truncated at line 271, then the warning log at line 276 calls json.dumps(tool_result) a second time on the original dict solely to compute the original byte count. For a 5MB tool result, this wastes a full 5MB JSON serialization just to log the original size.",
+    "fix": "Save the original length before truncation: `original_len = len(tool_content)` before the truncation line, then use `original_len` in the logger.warning call."
+  },
+  {
+    "severity": "P3",
+    "file": "src/codelicious/executor.py",
+    "line": 229,
+    "title": "LLM response split into lines up to 4 times across parse strategies",
+    "description": "parse_llm_response tries up to 4 extraction strategies (_parse_strict_format at line 229, _parse_markdown_with_filename at line 256, _parse_markdown_preceded_by_path at line 300, _parse_single_file_fallback at line 343). Each strategy independently calls response.splitlines(keepends=True), creating up to 4 separate list copies of potentially tens of thousands of line strings. For a 1MB LLM response, this creates 4 redundant copies of the same line list.",
+    "fix": "Split the response once at the top of parse_llm_response and pass the pre-split lines list into each strategy function, changing their signatures to accept `lines: list[str]` instead of `response: str`."
+  },
+  {
+    "severity": "P3",
+    "file": "src/codelicious/logger.py",
+    "line": 217,
+    "title": "SanitizingFilter unconditionally rebuilds args tuple/dict on every filtered log record",
+    "description": "SanitizingFilter.filter() at lines 217-226 unconditionally reconstructs record.args as a new tuple or dict, calling str() and sanitize_message() on each element — even when no secrets are present (the vast majority of log calls). For a DEBUG log with 5 args, this means 5 str() conversions + 5 sanitize_message() calls + 1 tuple allocation per log statement.",
+    "fix": "Only rebuild args when sanitize_message actually changed something. Iterate args, track whether any element was modified, and only create a new container when at least one arg was changed."
+  },
+  {
+    "severity": "P3",
+    "file": "src/codelicious/context/cache_engine.py",
+    "line": 190,
+    "title": "Atomic JSON flush of entire ledger on every single memory mutation",
+    "description": "record_memory_mutation flushes the full state to disk (via _flush_state at line 190) on every single append. _flush_state creates a temp file, serializes the entire state dict to JSON, writes it, then calls os.replace. For a 500-entry ledger with interaction summaries, this is ~50-200KB of JSON serialization + file write per mutation. Over a session with 100 mutations, this is 100 atomic write cycles.",
+    "fix": "Flush periodically instead of on every mutation: e.g., every 10 appends, or after a time threshold (5s), or on explicit flush_state() call. The in-memory ledger is already the source of truth, so durability only requires periodic snapshots."
+  },
+  {
+    "severity": "P3",
+    "file": "src/codelicious/context/rag_engine.py",
+    "line": 265,
+    "title": "Vectors stored as both JSON text and binary blob — double write I/O and storage",
+    "description": "ingest_file stores each embedding vector in two columns: vector_json (JSON text, ~3KB per 384-dim vector via json.dumps at line 265) and vector_blob (binary, ~1.5KB). Both are always written. For 10K chunks, this doubles the write I/O (~15MB extra JSON) and increases the SQLite database size by ~30MB unnecessarily.",
+    "fix": "Write only vector_blob for new rows. Keep vector_json as NULL. Add a one-time migration step to populate vector_blob for legacy rows that only have vector_json. Short term: skip json.dumps(vector) when blob is successfully created."
+  },
+  {
+    "severity": "P3",
+    "file": "src/codelicious/context/rag_engine.py",
+    "line": 215,
+    "title": "math.fsum used for dot product in cosine similarity where standard sum() suffices",
+    "description": "_cosine_similarity_with_norms uses math.fsum (Kahan compensated summation) for the dot product at line 215. math.fsum has measurably higher per-call overhead due to its compensated accumulation algorithm, applied to 384 elements per chunk across potentially thousands of chunks in semantic_search. The extra precision is unnecessary for cosine similarity ranking — relative ordering is preserved with standard float64 addition.",
+    "fix": "Replace `math.fsum(a * b for a, b in zip(vec_a, vec_b))` with `sum(a * b for a, b in zip(vec_a, vec_b))`. The precision difference is negligible for ranking purposes."
+  },
+  {
+    "severity": "P3",
+    "file": "src/codelicious/context/rag_engine.py",
+    "line": 175,
+    "title": "math.fsum used for norm computation where standard sum() suffices",
+    "description": "_compute_norm uses math.fsum for the squared-sum computation at line 175: `math.sqrt(math.fsum(v * v for v in vec))`. Same issue as the dot product — compensated summation adds overhead for 384 elements with no benefit for ranking. This is called once per chunk during ingest (for vector_norm storage) and once per query in semantic_search.",
+    "fix": "Replace `math.fsum(v * v for v in vec)` with `sum(v * v for v in vec)`. The float64 precision is more than sufficient for norm computation used in similarity ranking."
+  },
+  {
+    "severity": "P3",
+    "file": "src/codelicious/context/rag_engine.py",
+    "line": 86,
+    "title": "Unnecessary tuple-to-list conversion in _blob_to_vec",
+    "description": "_blob_to_vec wraps struct.unpack() in list() at line 86, copying the 384-element tuple into a new list. The downstream cosine similarity functions iterate via zip() which works identically on tuples. The list conversion allocates a new 384-element container on every chunk during semantic_search.",
+    "fix": "Return the tuple directly: `return struct.unpack(cls._BLOB_FMT, blob)`. Update the return type annotation from List[float] to tuple[float, ...]. All call sites use zip() iteration which works on both types."
+  },
+  {
+    "severity": "P3",
+    "file": "src/codelicious/cli.py",
+    "line": 67,
+    "title": "Redundant filesystem walk in _print_result for completion summary",
+    "description": "_print_result calls _walk_for_specs(repo_path) at line 67 to re-scan the entire repo for spec files, then reads and regex-matches each one. This duplicates the filesystem walk already performed at startup (cli.py:250). For repos with deep directory trees, the redundant os.walk and file reads add noticeable latency to the summary display.",
+    "fix": "Pass the pre-computed all_specs list from main() into _print_result to avoid the second walk. The spec paths don't change during a build (only their content does)."
+  },
+  {
+    "severity": "P3",
+    "file": "src/codelicious/prompts.py",
+    "line": 238,
+    "title": "Multiple glob patterns trigger repeated directory traversals in scan_remaining_tasks",
+    "description": "scan_remaining_tasks iterates through 5 glob patterns in _SPEC_GLOBS (line 238), each of which may trigger a separate filesystem traversal via Path.glob(). Patterns like 'docs/**/*.md' and 'docs/specs/**/*.md' overlap, causing the docs directory to be walked multiple times. The `seen` set prevents double-counting but not double-walking.",
+    "fix": "Use a single os.walk() or Path.rglob('*.md') to find all markdown files, then filter by the spec filename patterns and exclusion list. This traverses the filesystem once instead of up to 5 times."
+  },
+  {
+    "severity": "P3",
+    "file": "src/codelicious/sandbox.py",
+    "line": 471,
+    "title": "Redundant per-file DENIED_PATTERNS check in list_files after directory pruning",
+    "description": "list_files at line 453 already prunes denied directory names via `dirs[:] = [d for d in dirs if d not in self.DENIED_PATTERNS]`, preventing os.walk from descending into .git, __pycache__, etc. But at lines 471-476, each file's full relative path parts are checked against DENIED_PATTERNS again in a nested loop. The only scenario this catches is a file in the root directory literally named '.git' or '__pycache__', which is extremely rare.",
+    "fix": "Replace the inner path-parts loop with a simple filename check: `if filename in self.DENIED_PATTERNS: continue`. This handles the root-level edge case without the O(parts * patterns) nested iteration on every file."
+  },
+  {
+    "severity": "P3",
+    "file": "src/codelicious/tools/audit_logger.py",
+    "line": 204,
+    "title": "Full JSON serialization of tool kwargs (including file content) on every tool call for audit log",
+    "description": "log_tool_intent at line 204 calls json.dumps(kwargs, default=str) for every tool dispatch. For write_file calls, kwargs includes the full file content — a 100KB file write causes 100KB of JSON serialization just for the audit trail. This serialized string is then written to both the console (via console_logger.info) and the audit log file. The audit trail is valuable, but serializing large payloads is wasteful when the content can be summarized.",
+    "fix": "Truncate large values in kwargs before serialization: e.g., if 'content' key is present and exceeds 1KB, replace with a summary like '<content: 102400 bytes>'. Log the full content at DEBUG level only if needed for investigation."
+  }
+]