From 1af4c92091f05b423625a48b81b0cc690f3b0314 Mon Sep 17 00:00:00 2001
From: valerii <chrisburrrma@gmail.com>
Date: Tue, 20 Jan 2026 14:07:13 +0100
Subject: [PATCH 01/17] modified gha check

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 862e04a..e9f1698 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -9,4 +9,4 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Say hi
-        run: echo "Hi from my self-hosted runner"
\ No newline at end of file
+        run: echo "Hello from my self-hosted runner"
\ No newline at end of file

From 9083757412cc55258e28cbc1b0a3d1fa2c11eeb0 Mon Sep 17 00:00:00 2001
From: valerii <chrisburrrma@gmail.com>
Date: Wed, 21 Jan 2026 09:55:55 +0100
Subject: [PATCH 02/17] review action added

---
 .github/workflows/ci.yml     | 12 ----------
 .github/workflows/review.yml | 43 ++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 12 deletions(-)
 delete mode 100644 .github/workflows/ci.yml
 create mode 100644 .github/workflows/review.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
deleted file mode 100644
index e9f1698..0000000
--- a/.github/workflows/ci.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-name: code-review
-
-on:
-  pull_request:
-
-jobs:
-  code-review:
-    runs-on: [self-hosted, appendix-reviewer]
-    steps:
-      - uses: actions/checkout@v4
-      - name: Say hi
-        run: echo "Hello from my self-hosted runner"
\ No newline at end of file
diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml
new file mode 100644
index 0000000..b86e7fc
--- /dev/null
+++ b/.github/workflows/review.yml
@@ -0,0 +1,43 @@
+name: auto code review
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+
+jobs:
+  review:
+    runs-on: [self-hosted, appendix-reviewer]
+    permissions:
+      pull-requests: write
+      contents: read
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Fetch PR base
+        shell: pwsh
+        run: |
+          git fetch origin ${{ github.base_ref }} --depth=1
+
+      - name: Build diff
+        shell: pwsh
+        run: |
+          git diff --unified=3 origin/${{ github.base_ref }}...HEAD > diff.txt
+          Get-Content diff.txt | Measure-Object -Line -Word -Character | Format-List
+
+      - name: Run multi-agent local review
+        shell: pwsh
+        env:
+          MODEL: qwen2.5.:14b-instruct
+        run: |
+          python .github/scripts/multi_agent_review.py
+
+      - name: Post comment to PR
+        shell: pwsh
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          gh pr comment ${{ github.event.pull_request.number }} --body-file review_comment.md
\ No newline at end of file

From ae87b2b26f9d72244fa4440988a73ca151f9cd2d Mon Sep 17 00:00:00 2001
From: valerii <chrisburrrma@gmail.com>
Date: Wed, 21 Jan 2026 10:50:59 +0100
Subject: [PATCH 03/17] actual review job added

---
 .github/scripts/multi_agent_review.py | 200 ++++++++++++++++++++++++++
 docs/review_guidelines.md             |   6 +
 2 files changed, 206 insertions(+)
 create mode 100644 .github/scripts/multi_agent_review.py
 create mode 100644 docs/review_guidelines.md

diff --git a/.github/scripts/multi_agent_review.py b/.github/scripts/multi_agent_review.py
new file mode 100644
index 0000000..0419ef6
--- /dev/null
+++ b/.github/scripts/multi_agent_review.py
@@ -0,0 +1,200 @@
+import json
+import os
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+DIFF_PATH = Path("diff.txt")
+GUIDELINES_PATH = Path("docs/review_guidelines.md")
+OUT_MD = Path("review_comment.md")
+
+MODEL = os.environ.get("MODEL", "qwen2.5:14b-instruct")
+MAX_DIFF_CHARS = int(os.environ.get("MAX_DIFF_CHARS", "80000"))
+
+# --- Utilities ---
+
+def read_text(path: Path) -> str:
+    if path.exists():
+        return path.read_text(encoding="utf-8", errors="ignore")
+    return ""
+
+def run_ollama(model: str, prompt: str) -> str:
+    # Use stdin to avoid CLI arg limits on Windows.
+    p = subprocess.run(
+        ["ollama", "run", model],
+        input=prompt.encode("utf-8"),
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        check=False,
+    )
+    if p.returncode != 0:
+        err = p.stderr.decode("utf-8", errors="ignore")
+        raise RuntimeError(f"Ollama failed (code={p.returncode}): {err[:2000]}")
+    return p.stdout.decode("utf-8", errors="ignore").strip()
+
+def safe_json_loads(s: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
+    try:
+        return json.loads(s), None
+    except Exception as e:
+        return None, str(e)
+
+def truncate(s: str, limit: int) -> str:
+    if len(s) <= limit:
+        return s
+    return s[:limit] + "\n\n[TRUNCATED]\n"
+
+def md_escape(s: str) -> str:
+    # Minimal escaping to avoid accidental markdown formatting explosions
+    return s.replace("\r", "").strip()
+
+# --- Agent definitions ---
+
+@dataclass
+class Agent:
+    name: str
+    focus: str
+
+AGENTS: List[Agent] = [
+    Agent(
+        name="Correctness & Reliability",
+        focus=(
+            "Find correctness risks: idempotency, retries/backoff, timeouts, transactional boundaries, "
+            "race conditions, partial failures, error handling, and data consistency."
+        ),
+    ),
+    Agent(
+        name="Architecture & Boundaries",
+        focus=(
+            "Review architecture: module boundaries, coupling, layering, dependency direction, "
+            "API contracts, naming of abstractions, and maintainability trade-offs."
+        ),
+    ),
+    Agent(
+        name="Tests & Observability",
+        focus=(
+            "Suggest tests and observability: unit/integration tests worth adding, edge cases, "
+            "logging/metrics/tracing, and how to reproduce failures."
+        ),
+    ),
+    Agent(
+        name="Cost & LLM Discipline",
+        focus=(
+            "Look for cost/perf traps: unnecessary LLM calls, large payloads, missing caching, "
+            "missing state+delta pattern, and risk of GPU contention."
+        ),
+    ),
+]
+
+# JSON schema agent must output
+JSON_SCHEMA = """
+Return ONLY valid JSON (no markdown, no extra text).
+Schema:
+{
+  "summary": "string (1-3 sentences)",
+  "blocking": [{"issue":"string","evidence":"string","fix":"string"}],
+  "non_blocking": [{"issue":"string","evidence":"string","fix":"string"}],
+  "tests_to_add": ["string"],
+  "questions": ["string"]
+}
+Rules:
+- Use only the provided DIFF and GUIDELINES.
+- Do not guess. If uncertain, put it into "questions".
+- Keep evidence concrete (file/line hints if visible in diff).
+""".strip()
+
+def build_prompt(agent: Agent, guidelines: str, diff: str) -> str:
+    return f"""
+SYSTEM:
+You are a strict senior software engineer acting as a pull request reviewer.
+
+TASK:
+{agent.focus}
+
+GUIDELINES:
+{guidelines if guidelines else "(none provided)"}
+
+DIFF:
+{diff}
+
+OUTPUT INSTRUCTIONS:
+{JSON_SCHEMA}
+""".strip()
+
+# --- Markdown report building ---
+
+def fmt_issue_list(title: str, issues: List[Dict[str, Any]]) -> str:
+    if not issues:
+        return f"**{title}:** None ✅\n"
+    out = f"**{title}:**\n"
+    for it in issues:
+        issue = md_escape(str(it.get("issue", "")))
+        evidence = md_escape(str(it.get("evidence", "")))
+        fix = md_escape(str(it.get("fix", "")))
+        out += f"- **{issue}**\n  - Evidence: {evidence}\n  - Fix: {fix}\n"
+    return out + "\n"
+
+def fmt_list(title: str, items: List[str]) -> str:
+    if not items:
+        return f"**{title}:** None\n\n"
+    out = f"**{title}:**\n"
+    for x in items:
+        out += f"- {md_escape(str(x))}\n"
+    return out + "\n"
+
+def main() -> None:
+    diff = read_text(DIFF_PATH)
+    guidelines = read_text(GUIDELINES_PATH)
+
+    if not diff.strip():
+        OUT_MD.write_text("### Local Multi-Agent AI Review\n\nNo diff found.\n", encoding="utf-8")
+        return
+
+    diff = truncate(diff, MAX_DIFF_CHARS)
+
+    results: List[Tuple[Agent, Optional[Dict[str, Any]], Optional[str], str]] = []
+    # tuple: (agent, json, json_error, raw)
+
+    for agent in AGENTS:
+        prompt = build_prompt(agent, guidelines, diff)
+        raw = run_ollama(MODEL, prompt)
+        data, err = safe_json_loads(raw)
+        results.append((agent, data, err, raw))
+
+    # Build consolidated markdown
+    md = "### Local Multi-Agent AI Review\n\n"
+    md += f"Model: `{MODEL}`\n\n"
+    if guidelines.strip():
+        md += "_Project guidelines were provided._\n\n"
+
+    # High-level rollup
+    md += "## Rollup\n\n"
+    rollup_lines = []
+    for agent, data, err, raw in results:
+        if data:
+            s = md_escape(str(data.get("summary", "")))
+            rollup_lines.append(f"- **{agent.name}:** {s}")
+        else:
+            rollup_lines.append(f"- **{agent.name}:** ⚠️ invalid JSON output ({err})")
+    md += "\n".join(rollup_lines) + "\n\n"
+
+    # Detailed sections
+    for agent, data, err, raw in results:
+        md += f"---\n\n## {agent.name}\n\n"
+        if not data:
+            md += "**⚠️ Agent returned invalid JSON.**\n\n"
+            md += "Raw output (truncated):\n\n```text\n"
+            md += truncate(raw, 3000)
+            md += "\n```\n\n"
+            continue
+
+        md += f"**Summary:** {md_escape(str(data.get('summary','')))}\n\n"
+        md += fmt_issue_list("Blocking", data.get("blocking", []) or [])
+        md += fmt_issue_list("Non-blocking", data.get("non_blocking", []) or [])
+        md += fmt_list("Tests to add", data.get("tests_to_add", []) or [])
+        md += fmt_list("Questions", data.get("questions", []) or [])
+
+    OUT_MD.write_text(md, encoding="utf-8")
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/review_guidelines.md b/docs/review_guidelines.md
new file mode 100644
index 0000000..4c1fa73
--- /dev/null
+++ b/docs/review_guidelines.md
@@ -0,0 +1,6 @@
+# Review Guidelines
+
+- Kotlin + coroutines allowed.
+- Processing is at-least-once; handlers must be idempotent.
+- Postgres is source of truth; raw events are immutable; corrections create new versions.
+- External calls must have timeouts + retries + backoff.
\ No newline at end of file

From 601617f249cf2ade9c058a26a8a87773dd322af5 Mon Sep 17 00:00:00 2001
From: Valerii Burmistrov <chrisburrrma@gmail.com>
Date: Wed, 21 Jan 2026 11:06:10 +0100
Subject: [PATCH 04/17] Add debug step for PATH and Python location

Added a step to debug the PATH environment variable and locate Python.
---
 .github/workflows/review.yml | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml
index b86e7fc..006bf8d 100644
--- a/.github/workflows/review.yml
+++ b/.github/workflows/review.yml
@@ -28,6 +28,12 @@ jobs:
           git diff --unified=3 origin/${{ github.base_ref }}...HEAD > diff.txt
           Get-Content diff.txt | Measure-Object -Line -Word -Character | Format-List
 
+      - name: Debug PATH
+        shell: pwsh
+        run: |
+          echo $env:PATH
+          where.exe python
+
       - name: Run multi-agent local review
         shell: pwsh
         env:
@@ -40,4 +46,4 @@ jobs:
         env:
           GH_TOKEN: ${{ github.token }}
         run: |
-          gh pr comment ${{ github.event.pull_request.number }} --body-file review_comment.md
\ No newline at end of file
+          gh pr comment ${{ github.event.pull_request.number }} --body-file review_comment.md

From 75e48ee0da93c93e0ed0499c67c3dcdcfa1b0130 Mon Sep 17 00:00:00 2001
From: Valerii Burmistrov <chrisburrrma@gmail.com>
Date: Wed, 21 Jan 2026 11:09:32 +0100
Subject: [PATCH 05/17] Update Python path command in review.yml

---
 .github/workflows/review.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml
index 006bf8d..432eb63 100644
--- a/.github/workflows/review.yml
+++ b/.github/workflows/review.yml
@@ -32,7 +32,7 @@ jobs:
         shell: pwsh
         run: |
           echo $env:PATH
-          where.exe python
+          where python
 
       - name: Run multi-agent local review
         shell: pwsh

From 777ecfdd556ba10bc68b1b6d2a73eeaa45660b28 Mon Sep 17 00:00:00 2001
From: Valerii Burmistrov <chrisburrrma@gmail.com>
Date: Wed, 21 Jan 2026 11:25:04 +0100
Subject: [PATCH 06/17] Enhance Python debugging in review workflow

Updated the debug step to provide more detailed Python resolution checks.
---
 .github/workflows/review.yml | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml
index 432eb63..d25253a 100644
--- a/.github/workflows/review.yml
+++ b/.github/workflows/review.yml
@@ -28,11 +28,15 @@ jobs:
           git diff --unified=3 origin/${{ github.base_ref }}...HEAD > diff.txt
           Get-Content diff.txt | Measure-Object -Line -Word -Character | Format-List
 
-      - name: Debug PATH
+      - name: Debug Python resolution
         shell: pwsh
         run: |
-          echo $env:PATH
-          where python
+          echo "PATH=$env:PATH"
+          where.exe python || echo "where.exe can't find python"
+          Get-Command python -All || echo "Get-Command can't find python"
+          Test-Path "C:\Users\zxc\AppData\Local\Programs\Python\Python314\python.exe"
+          Get-ChildItem "C:\Users\zxc\AppData\Local\Programs\Python\Python314\" -Filter python*.exe -ErrorAction SilentlyContinue
+
 
       - name: Run multi-agent local review
         shell: pwsh

From c45d18009844a367c4ef6e0499472195cefa6bae Mon Sep 17 00:00:00 2001
From: valerii <chrisburrrma@gmail.com>
Date: Wed, 21 Jan 2026 12:54:06 +0100
Subject: [PATCH 07/17] ollama rest api call instead of a subprocess

---
 .github/scripts/multi_agent_review.py | 43 +++++++++++++++++++--------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/.github/scripts/multi_agent_review.py b/.github/scripts/multi_agent_review.py
index 0419ef6..29d37ce 100644
--- a/.github/scripts/multi_agent_review.py
+++ b/.github/scripts/multi_agent_review.py
@@ -4,6 +4,7 @@
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
+import urllib.request
 
 DIFF_PATH = Path("diff.txt")
 GUIDELINES_PATH = Path("docs/review_guidelines.md")
@@ -11,6 +12,7 @@
 
 MODEL = os.environ.get("MODEL", "qwen2.5:14b-instruct")
 MAX_DIFF_CHARS = int(os.environ.get("MAX_DIFF_CHARS", "80000"))
+OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://127.0.0.1:11434")
 
 # --- Utilities ---
 
@@ -20,18 +22,26 @@ def read_text(path: Path) -> str:
     return ""
 
 def run_ollama(model: str, prompt: str) -> str:
-    # Use stdin to avoid CLI arg limits on Windows.
-    p = subprocess.run(
-        ["ollama", "run", model],
-        input=prompt.encode("utf-8"),
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        check=False,
+    payload = {
+        "model": model,
+        "prompt": prompt,
+        "stream": False,
+    }
+
+    req = urllib.request.Request(
+        f"{OLLAMA_URL}/api/generate",
+        data=json.dumps(payload).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+        method="POST",
     )
-    if p.returncode != 0:
-        err = p.stderr.decode("utf-8", errors="ignore")
-        raise RuntimeError(f"Ollama failed (code={p.returncode}): {err[:2000]}")
-    return p.stdout.decode("utf-8", errors="ignore").strip()
+
+    try:
+        with urllib.request.urlopen(req, timeout=300) as resp:
+            body = resp.read().decode("utf-8", errors="ignore")
+            data = json.loads(body)
+            return data.get("response", "").strip()
+    except Exception as e:
+        raise RuntimeError(f"Ollama HTTP call failed: {e}")
 
 def safe_json_loads(s: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
     try:
@@ -39,15 +49,18 @@ def safe_json_loads(s: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
     except Exception as e:
         return None, str(e)
 
+
 def truncate(s: str, limit: int) -> str:
     if len(s) <= limit:
         return s
     return s[:limit] + "\n\n[TRUNCATED]\n"
 
+
 def md_escape(s: str) -> str:
     # Minimal escaping to avoid accidental markdown formatting explosions
     return s.replace("\r", "").strip()
 
+
 # --- Agent definitions ---
 
 @dataclass
@@ -55,6 +68,7 @@ class Agent:
     name: str
     focus: str
 
+
 AGENTS: List[Agent] = [
     Agent(
         name="Correctness & Reliability",
@@ -103,6 +117,7 @@ class Agent:
 - Keep evidence concrete (file/line hints if visible in diff).
 """.strip()
 
+
 def build_prompt(agent: Agent, guidelines: str, diff: str) -> str:
     return f"""
 SYSTEM:
@@ -121,6 +136,7 @@ def build_prompt(agent: Agent, guidelines: str, diff: str) -> str:
 {JSON_SCHEMA}
 """.strip()
 
+
 # --- Markdown report building ---
 
 def fmt_issue_list(title: str, issues: List[Dict[str, Any]]) -> str:
@@ -134,6 +150,7 @@ def fmt_issue_list(title: str, issues: List[Dict[str, Any]]) -> str:
         out += f"- **{issue}**\n  - Evidence: {evidence}\n  - Fix: {fix}\n"
     return out + "\n"
 
+
 def fmt_list(title: str, items: List[str]) -> str:
     if not items:
         return f"**{title}:** None\n\n"
@@ -142,6 +159,7 @@ def fmt_list(title: str, items: List[str]) -> str:
         out += f"- {md_escape(str(x))}\n"
     return out + "\n"
 
+
 def main() -> None:
     diff = read_text(DIFF_PATH)
     guidelines = read_text(GUIDELINES_PATH)
@@ -188,7 +206,7 @@ def main() -> None:
             md += "\n```\n\n"
             continue
 
-        md += f"**Summary:** {md_escape(str(data.get('summary','')))}\n\n"
+        md += f"**Summary:** {md_escape(str(data.get('summary', '')))}\n\n"
         md += fmt_issue_list("Blocking", data.get("blocking", []) or [])
         md += fmt_issue_list("Non-blocking", data.get("non_blocking", []) or [])
         md += fmt_list("Tests to add", data.get("tests_to_add", []) or [])
@@ -196,5 +214,6 @@ def main() -> None:
 
     OUT_MD.write_text(md, encoding="utf-8")
 
+
 if __name__ == "__main__":
     main()

From 294d3eb089064739fd6341dee877933c1c147f62 Mon Sep 17 00:00:00 2001
From: valerii <chrisburrrma@gmail.com>
Date: Wed, 21 Jan 2026 19:48:56 +0100
Subject: [PATCH 08/17] incremental review

---
 .github/scripts/multi_agent_review.py | 81 +++++++++++++++++++++++++--
 .github/workflows/review.yml          | 53 ++++++++++++++----
 2 files changed, 117 insertions(+), 17 deletions(-)

diff --git a/.github/scripts/multi_agent_review.py b/.github/scripts/multi_agent_review.py
index 29d37ce..514c6ee 100644
--- a/.github/scripts/multi_agent_review.py
+++ b/.github/scripts/multi_agent_review.py
@@ -14,6 +14,11 @@
 MAX_DIFF_CHARS = int(os.environ.get("MAX_DIFF_CHARS", "80000"))
 OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://127.0.0.1:11434")
 
+# Set in workflow (optional) by parsing previous AI comment:
+# PREV_SHA=<sha>
+PREV_SHA = (os.environ.get("PREV_SHA") or "").strip()
+
+
 # --- Utilities ---
 
 def read_text(path: Path) -> str:
@@ -21,11 +26,32 @@ def read_text(path: Path) -> str:
         return path.read_text(encoding="utf-8", errors="ignore")
     return ""
 
+
+def git(*args: str) -> str:
+    p = subprocess.run(
+        ["git", *args],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        check=False,
+    )
+    if p.returncode != 0:
+        err = p.stderr.decode("utf-8", errors="ignore")
+        raise RuntimeError(f"git {' '.join(args)} failed: {err[:2000]}")
+    return p.stdout.decode("utf-8", errors="ignore").strip()
+
+
+def get_head_sha() -> str:
+    # Works in GHA runner after checkout
+    return git("rev-parse", "HEAD")
+
+
 def run_ollama(model: str, prompt: str) -> str:
     payload = {
         "model": model,
         "prompt": prompt,
         "stream": False,
+        # If you want to aggressively unload after each call:
+        # "keep_alive": 0,
     }
 
     req = urllib.request.Request(
@@ -43,6 +69,7 @@ def run_ollama(model: str, prompt: str) -> str:
     except Exception as e:
         raise RuntimeError(f"Ollama HTTP call failed: {e}")
 
+
 def safe_json_loads(s: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
     try:
         return json.loads(s), None
@@ -102,7 +129,9 @@ class Agent:
 
 # JSON schema agent must output
 JSON_SCHEMA = """
-Return ONLY valid JSON (no markdown, no extra text).
+Return ONLY valid JSON. No markdown. No explanations. No extra keys. No trailing comments.
+Any output outside JSON is considered a failure.
+
 Schema:
 {
   "summary": "string (1-3 sentences)",
@@ -111,17 +140,32 @@ class Agent:
   "tests_to_add": ["string"],
   "questions": ["string"]
 }
+
 Rules:
 - Use only the provided DIFF and GUIDELINES.
-- Do not guess. If uncertain, put it into "questions".
+- Do not guess. If uncertain, put it into "questions" and set evidence to "unknown".
 - Keep evidence concrete (file/line hints if visible in diff).
 """.strip()
 
 
-def build_prompt(agent: Agent, guidelines: str, diff: str) -> str:
+def build_prompt(agent: Agent, guidelines: str, diff: str, mode: str, prev_sha: str, head_sha: str) -> str:
+    followup_hint = ""
+    if mode == "FOLLOW_UP":
+        followup_hint = f"""
+FOLLOW-UP CONTEXT:
+- This is a follow-up review after previous AI feedback.
+- Previous reviewed HEAD SHA was: {prev_sha}
+- Current HEAD SHA is: {head_sha}
+- The DIFF is expected to contain ONLY changes since the previous review.
+- Focus on verifying whether prior likely issues are addressed, and whether new issues were introduced.
+""".strip()
+
     return f"""
 SYSTEM:
-You are a strict senior software engineer acting as a pull request reviewer.
+You are a strict senior/staff+ software engineer acting as a pull request reviewer.
+
+MODE: {mode}
+{followup_hint}
 
 TASK:
 {agent.focus}
@@ -170,18 +214,39 @@ def main() -> None:
 
     diff = truncate(diff, MAX_DIFF_CHARS)
 
+    head_sha = get_head_sha()
+    mode = "FOLLOW_UP" if PREV_SHA else "INITIAL"
+
     results: List[Tuple[Agent, Optional[Dict[str, Any]], Optional[str], str]] = []
     # tuple: (agent, json, json_error, raw)
 
     for agent in AGENTS:
-        prompt = build_prompt(agent, guidelines, diff)
+        prompt = build_prompt(
+            agent=agent,
+            guidelines=guidelines,
+            diff=diff,
+            mode=mode,
+            prev_sha=PREV_SHA,
+            head_sha=head_sha,
+        )
         raw = run_ollama(MODEL, prompt)
         data, err = safe_json_loads(raw)
         results.append((agent, data, err, raw))
 
     # Build consolidated markdown
-    md = "### Local Multi-Agent AI Review\n\n"
+    title = "Local Multi-Agent AI Review"
+    if mode == "FOLLOW_UP":
+        title += " (Follow-up)"
+    else:
+        title += " (Initial)"
+
+    md = f"### {title}\n\n"
     md += f"Model: `{MODEL}`\n\n"
+    md += f"Current HEAD: `{head_sha}`\n"
+    if PREV_SHA:
+        md += f"Previous reviewed HEAD: `{PREV_SHA}`\n"
+    md += "\n"
+
     if guidelines.strip():
         md += "_Project guidelines were provided._\n\n"
 
@@ -212,6 +277,10 @@ def main() -> None:
         md += fmt_list("Tests to add", data.get("tests_to_add", []) or [])
         md += fmt_list("Questions", data.get("questions", []) or [])
 
+    # Marker for next run to pick up
+    md += "\n---\n\n"
+    md += f"Reviewed-Head-SHA: {head_sha}\n"
+
     OUT_MD.write_text(md, encoding="utf-8")
 
 
diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml
index d25253a..f888539 100644
--- a/.github/workflows/review.yml
+++ b/.github/workflows/review.yml
@@ -22,26 +22,51 @@ jobs:
         run: |
           git fetch origin ${{ github.base_ref }} --depth=1
 
-      - name: Build diff
+      - name: Get previous AI review SHA (if any)
         shell: pwsh
+        env:
+          GH_TOKEN: ${{ github.token }}
         run: |
-          git diff --unified=3 origin/${{ github.base_ref }}...HEAD > diff.txt
-          Get-Content diff.txt | Measure-Object -Line -Word -Character | Format-List
+          $pr = "${{ github.event.pull_request.number }}"
+          $comments = gh api repos/${{ github.repository }}/issues/$pr/comments | ConvertFrom-Json
+          $ai = $comments | Where-Object { $_.body -match "Reviewed-Head-SHA:" } | Select-Object -Last 1
+          if ($null -ne $ai) {
+            $m = [regex]::Match($ai.body, "Reviewed-Head-SHA:\s*([0-9a-fA-F]+)")
+            if ($m.Success) {
+              "PREV_SHA=$($m.Groups[1].Value)" | Out-File -FilePath $env:GITHUB_ENV -Append
+              Write-Host "Found previous review SHA: $($m.Groups[1].Value)"
+            }
+          } else {
+            Write-Host "No previous AI review comment found."
+          }
 
-      - name: Debug Python resolution
+      - name: Build diff (incremental if possible)
         shell: pwsh
         run: |
-          echo "PATH=$env:PATH"
-          where.exe python || echo "where.exe can't find python"
-          Get-Command python -All || echo "Get-Command can't find python"
-          Test-Path "C:\Users\zxc\AppData\Local\Programs\Python\Python314\python.exe"
-          Get-ChildItem "C:\Users\zxc\AppData\Local\Programs\Python\Python314\" -Filter python*.exe -ErrorAction SilentlyContinue
+          git fetch origin ${{ github.base_ref }} --depth=1
+
+          if ($env:PREV_SHA) {
+            # Validate that PREV_SHA exists in this checkout (force-push can break it)
+            git cat-file -e "$env:PREV_SHA^{commit}" 2>$null
+            if ($LASTEXITCODE -eq 0) {
+              git diff --unified=3 $env:PREV_SHA...HEAD > diff.txt
+              Write-Host "Using incremental diff: $env:PREV_SHA...HEAD"
+            } else {
+              Write-Host "PREV_SHA not found locally (maybe force-push). Falling back to full diff."
+              git diff --unified=3 origin/${{ github.base_ref }}...HEAD > diff.txt
+            }
+          } else {
+            git diff --unified=3 origin/${{ github.base_ref }}...HEAD > diff.txt
+            Write-Host "Using full PR diff against base."
+          }
 
+          Get-Content diff.txt | Measure-Object -Line -Word -Character | Format-List
 
       - name: Run multi-agent local review
         shell: pwsh
         env:
-          MODEL: qwen2.5.:14b-instruct
+          MODEL: qwen2.5:14b-instruct
+          MAX_DIFF_CHARS: "80000"
         run: |
           python .github/scripts/multi_agent_review.py
 
@@ -50,4 +75,10 @@ jobs:
         env:
           GH_TOKEN: ${{ github.token }}
         run: |
-          gh pr comment ${{ github.event.pull_request.number }} --body-file review_comment.md
+          $pr = "${{ github.event.pull_request.number }}"
+          # Try to edit last comment; if it fails, create a new one.
+          gh pr comment $pr --edit-last --body-file review_comment.md
+          if ($LASTEXITCODE -ne 0) {
+            Write-Host "Edit-last failed, creating a new comment."
+            gh pr comment $pr --body-file review_comment.md
+          }

From 4ed668217a147fad7da3a8e74ffb2cb2f6fbf184 Mon Sep 17 00:00:00 2001
From: valerii <chrisburrrma@gmail.com>
Date: Wed, 21 Jan 2026 20:35:15 +0100
Subject: [PATCH 09/17] incremental review

---
 .github/scripts/multi_agent_review.py | 2 +-
 .github/workflows/review.yml          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/multi_agent_review.py b/.github/scripts/multi_agent_review.py
index 514c6ee..e2d19dc 100644
--- a/.github/scripts/multi_agent_review.py
+++ b/.github/scripts/multi_agent_review.py
@@ -279,7 +279,7 @@ def main() -> None:
 
     # Marker for next run to pick up
     md += "\n---\n\n"
-    md += f"Reviewed-Head-SHA: {head_sha}\n"
+    md += f"<!-- AI_REVIEW:HEAD_SHA={head_sha} -->"
 
     OUT_MD.write_text(md, encoding="utf-8")
 
diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml
index f888539..995ba6c 100644
--- a/.github/workflows/review.yml
+++ b/.github/workflows/review.yml
@@ -31,7 +31,7 @@ jobs:
           $comments = gh api repos/${{ github.repository }}/issues/$pr/comments | ConvertFrom-Json
           $ai = $comments | Where-Object { $_.body -match "Reviewed-Head-SHA:" } | Select-Object -Last 1
           if ($null -ne $ai) {
-            $m = [regex]::Match($ai.body, "Reviewed-Head-SHA:\s*([0-9a-fA-F]+)")
+            $m = [regex]::Match($ai.body, "AI_REVIEW:HEAD_SHA=([0-9a-fA-F]{7,40})")
             if ($m.Success) {
               "PREV_SHA=$($m.Groups[1].Value)" | Out-File -FilePath $env:GITHUB_ENV -Append
               Write-Host "Found previous review SHA: $($m.Groups[1].Value)"

From cce9b1676d8f60f4814dee63c9610b5aedf6f6da Mon Sep 17 00:00:00 2001
From: valerii <chrisburrrma@gmail.com>
Date: Wed, 21 Jan 2026 21:18:28 +0100
Subject: [PATCH 10/17] curator introduced

---
 .github/scripts/multi_agent_review.py | 210 +++++++++++++++++++++-----
 1 file changed, 171 insertions(+), 39 deletions(-)

diff --git a/.github/scripts/multi_agent_review.py b/.github/scripts/multi_agent_review.py
index e2d19dc..cd5c742 100644
--- a/.github/scripts/multi_agent_review.py
+++ b/.github/scripts/multi_agent_review.py
@@ -11,12 +11,15 @@
 OUT_MD = Path("review_comment.md")
 
 MODEL = os.environ.get("MODEL", "qwen2.5:14b-instruct")
+# Optional: use a cheaper/faster model for the curator step
+CURATOR_MODEL = os.environ.get("CURATOR_MODEL", MODEL)
+
 MAX_DIFF_CHARS = int(os.environ.get("MAX_DIFF_CHARS", "80000"))
-OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://127.0.0.1:11434")
+MAX_PREV_REVIEW_CHARS = int(os.environ.get("MAX_PREV_REVIEW_CHARS", "12000"))
 
-# Set in workflow (optional) by parsing previous AI comment:
-# PREV_SHA=<sha>
+OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://127.0.0.1:11434")
 PREV_SHA = (os.environ.get("PREV_SHA") or "").strip()
+PREV_REVIEW_PATH = Path(os.environ.get("PREV_REVIEW_PATH", "prev_review.txt"))  # optional
 
 
 # --- Utilities ---
@@ -41,11 +44,21 @@ def git(*args: str) -> str:
 
 
 def get_head_sha() -> str:
-    # Works in GHA runner after checkout
     return git("rev-parse", "HEAD")
 
 
-def run_ollama(model: str, prompt: str) -> str:
+def truncate(s: str, limit: int) -> str:
+    if len(s) <= limit:
+        return s
+    return s[:limit] + "\n\n[TRUNCATED]\n"
+
+
+def md_escape(s: str) -> str:
+    # Minimal escaping to avoid accidental markdown formatting explosions
+    return s.replace("\r", "").strip()
+
+
+def run_ollama(model: str, prompt: str, timeout_s: int = 300) -> str:
     payload = {
         "model": model,
         "prompt": prompt,
@@ -62,7 +75,7 @@ def run_ollama(model: str, prompt: str) -> str:
     )
 
     try:
-        with urllib.request.urlopen(req, timeout=300) as resp:
+        with urllib.request.urlopen(req, timeout=timeout_s) as resp:
             body = resp.read().decode("utf-8", errors="ignore")
             data = json.loads(body)
             return data.get("response", "").strip()
@@ -77,15 +90,20 @@ def safe_json_loads(s: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
         return None, str(e)
 
 
-def truncate(s: str, limit: int) -> str:
-    if len(s) <= limit:
-        return s
-    return s[:limit] + "\n\n[TRUNCATED]\n"
+def cap_list(x: Any, max_items: int) -> List[Any]:
+    if not isinstance(x, list):
+        return []
+    return x[:max_items]
 
 
-def md_escape(s: str) -> str:
-    # Minimal escaping to avoid accidental markdown formatting explosions
-    return s.replace("\r", "").strip()
+def cap_issues(x: Any, max_items: int) -> List[Dict[str, Any]]:
+    if not isinstance(x, list):
+        return []
+    out = []
+    for it in x[:max_items]:
+        if isinstance(it, dict):
+            out.append(it)
+    return out
 
 
 # --- Agent definitions ---
@@ -127,20 +145,26 @@ class Agent:
     ),
 ]
 
-# JSON schema agent must output
+
+# JSON schema for each agent (strict + short)
 JSON_SCHEMA = """
-Return ONLY valid JSON. No markdown. No explanations. No extra keys. No trailing comments.
-Any output outside JSON is considered a failure.
+Return ONLY valid JSON. No markdown. No explanations. No extra keys.
 
 Schema:
 {
-  "summary": "string (1-3 sentences)",
+  "summary": "string (max 2 sentences)",
   "blocking": [{"issue":"string","evidence":"string","fix":"string"}],
   "non_blocking": [{"issue":"string","evidence":"string","fix":"string"}],
   "tests_to_add": ["string"],
   "questions": ["string"]
 }
 
+Hard limits:
+- blocking: max 2 items
+- non_blocking: max 3 items
+- tests_to_add: max 6 items
+- questions: max 5 items
+
 Rules:
 - Use only the provided DIFF and GUIDELINES.
 - Do not guess. If uncertain, put it into "questions" and set evidence to "unknown".
@@ -148,6 +172,27 @@ class Agent:
 """.strip()
 
 
+CURATOR_SCHEMA = """
+Return ONLY valid JSON. No markdown. No explanations. No extra keys.
+
+Schema:
+{
+  "short_summary": ["string (max 3 bullets)"],
+  "top_actions": ["string (max 5 items)"],
+  "resolved": ["string (max 5 items)"],
+  "still_open": ["string (max 5 items)"],
+  "new_risks": ["string (max 5 items)"]
+}
+
+Rules:
+- Deduplicate and prioritize.
+- Prefer concrete, actionable items.
+- If evidence is weak/unknown, downgrade or omit it.
+- If mode is INITIAL: resolved/still_open can be empty.
+- If mode is FOLLOW_UP: classify items as resolved/still_open/new_risks when possible.
+""".strip()
+
+
 def build_prompt(agent: Agent, guidelines: str, diff: str, mode: str, prev_sha: str, head_sha: str) -> str:
     followup_hint = ""
     if mode == "FOLLOW_UP":
@@ -157,7 +202,7 @@ def build_prompt(agent: Agent, guidelines: str, diff: str, mode: str, prev_sha:
 - Previous reviewed HEAD SHA was: {prev_sha}
 - Current HEAD SHA is: {head_sha}
 - The DIFF is expected to contain ONLY changes since the previous review.
-- Focus on verifying whether prior likely issues are addressed, and whether new issues were introduced.
+- Focus on what changed, what got resolved, and what new issues were introduced.
 """.strip()
 
     return f"""
@@ -181,7 +226,34 @@ def build_prompt(agent: Agent, guidelines: str, diff: str, mode: str, prev_sha:
 """.strip()
 
 
-# --- Markdown report building ---
+def build_curator_prompt(
+        mode: str,
+        prev_sha: str,
+        head_sha: str,
+        agent_json: List[Dict[str, Any]],
+        prev_review_text: str,
+) -> str:
+    payload = {
+        "mode": mode,
+        "previous_reviewed_sha": prev_sha or None,
+        "current_sha": head_sha,
+        "agent_reviews": agent_json,
+        "previous_review_text": truncate(prev_review_text, MAX_PREV_REVIEW_CHARS) if prev_review_text else "",
+    }
+
+    return f"""
+SYSTEM:
+You are a lead reviewer (staff+). Merge multiple agent reviews into a short, actionable PR comment.
+
+INPUT (JSON):
+{json.dumps(payload, ensure_ascii=False)}
+
+OUTPUT INSTRUCTIONS:
+{CURATOR_SCHEMA}
+""".strip()
+
+
+# --- Markdown building ---
 
 def fmt_issue_list(title: str, issues: List[Dict[str, Any]]) -> str:
     if not issues:
@@ -204,9 +276,16 @@ def fmt_list(title: str, items: List[str]) -> str:
     return out + "\n"
 
 
+def fmt_bullets(items: List[str]) -> str:
+    if not items:
+        return "- None\n"
+    return "".join([f"- {md_escape(str(x))}\n" for x in items])
+
+
 def main() -> None:
     diff = read_text(DIFF_PATH)
     guidelines = read_text(GUIDELINES_PATH)
+    prev_review_text = read_text(PREV_REVIEW_PATH)
 
     if not diff.strip():
         OUT_MD.write_text("### Local Multi-Agent AI Review\n\nNo diff found.\n", encoding="utf-8")
@@ -217,31 +296,60 @@ def main() -> None:
     head_sha = get_head_sha()
     mode = "FOLLOW_UP" if PREV_SHA else "INITIAL"
 
+    # Run specialist agents
     results: List[Tuple[Agent, Optional[Dict[str, Any]], Optional[str], str]] = []
-    # tuple: (agent, json, json_error, raw)
+    agent_payloads: List[Dict[str, Any]] = []
 
     for agent in AGENTS:
-        prompt = build_prompt(
-            agent=agent,
-            guidelines=guidelines,
-            diff=diff,
-            mode=mode,
-            prev_sha=PREV_SHA,
-            head_sha=head_sha,
-        )
+        prompt = build_prompt(agent, guidelines, diff, mode, PREV_SHA, head_sha)
         raw = run_ollama(MODEL, prompt)
         data, err = safe_json_loads(raw)
+
+        # Normalize & cap to keep things tight even if model violates limits
+        if data:
+            data["summary"] = str(data.get("summary", ""))[:500]
+            data["blocking"] = cap_issues(data.get("blocking"), 2)
+            data["non_blocking"] = cap_issues(data.get("non_blocking"), 3)
+            data["tests_to_add"] = cap_list(data.get("tests_to_add"), 6)
+            data["questions"] = cap_list(data.get("questions"), 5)
+
+            agent_payloads.append({
+                "agent": agent.name,
+                "summary": data.get("summary", ""),
+                "blocking": data.get("blocking", []),
+                "non_blocking": data.get("non_blocking", []),
+                "tests_to_add": data.get("tests_to_add", []),
+                "questions": data.get("questions", []),
+            })
+
         results.append((agent, data, err, raw))
 
-    # Build consolidated markdown
-    title = "Local Multi-Agent AI Review"
-    if mode == "FOLLOW_UP":
-        title += " (Follow-up)"
+    # Curator step
+    curator_prompt = build_curator_prompt(mode, PREV_SHA, head_sha, agent_payloads, prev_review_text)
+    curator_raw = run_ollama(CURATOR_MODEL, curator_prompt)
+    curator, curator_err = safe_json_loads(curator_raw)
+
+    if curator:
+        curator["short_summary"] = cap_list(curator.get("short_summary"), 3)
+        curator["top_actions"] = cap_list(curator.get("top_actions"), 5)
+        curator["resolved"] = cap_list(curator.get("resolved"), 5)
+        curator["still_open"] = cap_list(curator.get("still_open"), 5)
+        curator["new_risks"] = cap_list(curator.get("new_risks"), 5)
     else:
-        title += " (Initial)"
+        curator = {
+            "short_summary": [f"⚠️ Curator returned invalid JSON: {curator_err}"],
+            "top_actions": [],
+            "resolved": [],
+            "still_open": [],
+            "new_risks": [],
+        }
+
+    # Build markdown
+    title = "Local Multi-Agent AI Review"
+    title += " (Follow-up)" if mode == "FOLLOW_UP" else " (Initial)"
 
     md = f"### {title}\n\n"
-    md += f"Model: `{MODEL}`\n\n"
+    md += f"Models: specialists=`{MODEL}`, curator=`{CURATOR_MODEL}`\n\n"
     md += f"Current HEAD: `{head_sha}`\n"
     if PREV_SHA:
         md += f"Previous reviewed HEAD: `{PREV_SHA}`\n"
@@ -250,7 +358,25 @@ def main() -> None:
     if guidelines.strip():
         md += "_Project guidelines were provided._\n\n"
 
-    # High-level rollup
+    md += "## Curated summary\n\n"
+    md += fmt_bullets(curator.get("short_summary", [])) + "\n"
+
+    md += "### Top actions\n\n"
+    md += fmt_bullets(curator.get("top_actions", [])) + "\n"
+
+    if mode == "FOLLOW_UP":
+        md += "### Resolved\n\n"
+        md += fmt_bullets(curator.get("resolved", [])) + "\n"
+
+        md += "### Still open\n\n"
+        md += fmt_bullets(curator.get("still_open", [])) + "\n"
+
+        md += "### New risks\n\n"
+        md += fmt_bullets(curator.get("new_risks", [])) + "\n"
+
+    # Keep details collapsible to reduce noise in PR
+    md += "<details>\n<summary>Agent details</summary>\n\n"
+
     md += "## Rollup\n\n"
     rollup_lines = []
     for agent, data, err, raw in results:
@@ -261,7 +387,6 @@ def main() -> None:
             rollup_lines.append(f"- **{agent.name}:** ⚠️ invalid JSON output ({err})")
     md += "\n".join(rollup_lines) + "\n\n"
 
-    # Detailed sections
     for agent, data, err, raw in results:
         md += f"---\n\n## {agent.name}\n\n"
         if not data:
@@ -277,9 +402,16 @@ def main() -> None:
         md += fmt_list("Tests to add", data.get("tests_to_add", []) or [])
         md += fmt_list("Questions", data.get("questions", []) or [])
 
-    # Marker for next run to pick up
-    md += "\n---\n\n"
-    md += f"<!-- AI_REVIEW:HEAD_SHA={head_sha} -->"
+    # Curator raw (optional debug)
+    md += "\n---\n\n## Curator (debug)\n\n"
+    if curator_raw:
+        md += "```json\n" + truncate(curator_raw, 3000) + "\n```\n"
+
+    md += "\n</details>\n\n"
+
+    # Stable marker for next run to pick up
+    md += "---\n\n"
+    md += f"<!-- AI_REVIEW:HEAD_SHA={head_sha} -->\n"
 
     OUT_MD.write_text(md, encoding="utf-8")
 

From 41f6f3532beeba13c3f45874e10b38cb9cc049af Mon Sep 17 00:00:00 2001
From: valerii <chrisburrrma@gmail.com>
Date: Fri, 23 Jan 2026 14:50:26 +0100
Subject: [PATCH 11/17] review adjustments

---
 .github/scripts/multi_agent_review.py | 64 ++++++++++++++++-----------
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/.github/scripts/multi_agent_review.py b/.github/scripts/multi_agent_review.py
index cd5c742..0b7afae 100644
--- a/.github/scripts/multi_agent_review.py
+++ b/.github/scripts/multi_agent_review.py
@@ -11,7 +11,6 @@
 OUT_MD = Path("review_comment.md")
 
 MODEL = os.environ.get("MODEL", "qwen2.5:14b-instruct")
-# Optional: use a cheaper/faster model for the curator step
 CURATOR_MODEL = os.environ.get("CURATOR_MODEL", MODEL)
 
 MAX_DIFF_CHARS = int(os.environ.get("MAX_DIFF_CHARS", "80000"))
@@ -20,6 +19,7 @@
 OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://127.0.0.1:11434")
 PREV_SHA = (os.environ.get("PREV_SHA") or "").strip()
 PREV_REVIEW_PATH = Path(os.environ.get("PREV_REVIEW_PATH", "prev_review.txt"))  # optional
+SHOW_DEBUG = (os.environ.get("SHOW_DEBUG") or "").lower() in ("1", "true", "yes")
 
 
 # --- Utilities ---
@@ -54,7 +54,6 @@ def truncate(s: str, limit: int) -> str:
 
 
 def md_escape(s: str) -> str:
-    # Minimal escaping to avoid accidental markdown formatting explosions
     return s.replace("\r", "").strip()
 
 
@@ -63,8 +62,7 @@ def run_ollama(model: str, prompt: str, timeout_s: int = 300) -> str:
         "model": model,
         "prompt": prompt,
         "stream": False,
-        # If you want to aggressively unload after each call:
-        # "keep_alive": 0,
+        # "keep_alive": 0,  # uncomment if you want to unload model after each call
     }
 
     req = urllib.request.Request(
@@ -99,13 +97,26 @@ def cap_list(x: Any, max_items: int) -> List[Any]:
 def cap_issues(x: Any, max_items: int) -> List[Dict[str, Any]]:
     if not isinstance(x, list):
         return []
-    out = []
+    out: List[Dict[str, Any]] = []
     for it in x[:max_items]:
         if isinstance(it, dict):
             out.append(it)
     return out
 
 
+def drop_weak_issues(issues: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Remove issues with weak evidence. This is critical to reduce hallucinations.
+    """
+    out: List[Dict[str, Any]] = []
+    for it in issues:
+        ev = str(it.get("evidence", "")).strip().lower()
+        if not ev or ev == "unknown":
+            continue
+        out.append(it)
+    return out
+
+
 # --- Agent definitions ---
 
 @dataclass
@@ -146,7 +157,6 @@ class Agent:
 ]
 
 
-# JSON schema for each agent (strict + short)
 JSON_SCHEMA = """
 Return ONLY valid JSON. No markdown. No explanations. No extra keys.
 
@@ -169,6 +179,7 @@ class Agent:
 - Use only the provided DIFF and GUIDELINES.
 - Do not guess. If uncertain, put it into "questions" and set evidence to "unknown".
 - Keep evidence concrete (file/line hints if visible in diff).
+- If you cannot cite evidence from the DIFF, set evidence to "unknown" AND prefer putting it into "questions" instead of issues.
 """.strip()
 
 
@@ -177,8 +188,8 @@ class Agent:
 
 Schema:
 {
-  "short_summary": ["string (max 3 bullets)"],
-  "top_actions": ["string (max 5 items)"],
+  "short_summary": ["string (max 2 bullets)"],
+  "top_actions": ["string (max 3 items)"],
   "resolved": ["string (max 5 items)"],
   "still_open": ["string (max 5 items)"],
   "new_risks": ["string (max 5 items)"]
@@ -198,7 +209,6 @@ def build_prompt(agent: Agent, guidelines: str, diff: str, mode: str, prev_sha:
     if mode == "FOLLOW_UP":
         followup_hint = f"""
 FOLLOW-UP CONTEXT:
-- This is a follow-up review after previous AI feedback.
 - Previous reviewed HEAD SHA was: {prev_sha}
 - Current HEAD SHA is: {head_sha}
 - The DIFF is expected to contain ONLY changes since the previous review.
@@ -243,7 +253,7 @@ def build_curator_prompt(
 
     return f"""
 SYSTEM:
-You are a lead reviewer (staff+). Merge multiple agent reviews into a short, actionable PR comment.
+You are a lead reviewer (staff+). Merge multiple agent reviews into a SHORT, actionable PR comment.
 
 INPUT (JSON):
 {json.dumps(payload, ensure_ascii=False)}
@@ -305,19 +315,22 @@ def main() -> None:
         raw = run_ollama(MODEL, prompt)
         data, err = safe_json_loads(raw)
 
-        # Normalize & cap to keep things tight even if model violates limits
         if data:
             data["summary"] = str(data.get("summary", ""))[:500]
-            data["blocking"] = cap_issues(data.get("blocking"), 2)
-            data["non_blocking"] = cap_issues(data.get("non_blocking"), 3)
+
+            blocking = drop_weak_issues(cap_issues(data.get("blocking"), 2))
+            non_blocking = drop_weak_issues(cap_issues(data.get("non_blocking"), 3))
+
+            data["blocking"] = blocking
+            data["non_blocking"] = non_blocking
             data["tests_to_add"] = cap_list(data.get("tests_to_add"), 6)
             data["questions"] = cap_list(data.get("questions"), 5)
 
             agent_payloads.append({
                 "agent": agent.name,
                 "summary": data.get("summary", ""),
-                "blocking": data.get("blocking", []),
-                "non_blocking": data.get("non_blocking", []),
+                "blocking": blocking,
+                "non_blocking": non_blocking,
                 "tests_to_add": data.get("tests_to_add", []),
                 "questions": data.get("questions", []),
             })
@@ -330,8 +343,8 @@ def main() -> None:
     curator, curator_err = safe_json_loads(curator_raw)
 
     if curator:
-        curator["short_summary"] = cap_list(curator.get("short_summary"), 3)
-        curator["top_actions"] = cap_list(curator.get("top_actions"), 5)
+        curator["short_summary"] = cap_list(curator.get("short_summary"), 2)
+        curator["top_actions"] = cap_list(curator.get("top_actions"), 3)
         curator["resolved"] = cap_list(curator.get("resolved"), 5)
         curator["still_open"] = cap_list(curator.get("still_open"), 5)
         curator["new_risks"] = cap_list(curator.get("new_risks"), 5)
@@ -361,20 +374,20 @@ def main() -> None:
     md += "## Curated summary\n\n"
     md += fmt_bullets(curator.get("short_summary", [])) + "\n"
 
-    md += "### Top actions\n\n"
+    md += "## Top actions\n\n"
     md += fmt_bullets(curator.get("top_actions", [])) + "\n"
 
     if mode == "FOLLOW_UP":
-        md += "### Resolved\n\n"
+        md += "## Resolved\n\n"
         md += fmt_bullets(curator.get("resolved", [])) + "\n"
 
-        md += "### Still open\n\n"
+        md += "## Still open\n\n"
         md += fmt_bullets(curator.get("still_open", [])) + "\n"
 
-        md += "### New risks\n\n"
+        md += "## New risks\n\n"
         md += fmt_bullets(curator.get("new_risks", [])) + "\n"
 
-    # Keep details collapsible to reduce noise in PR
+    # Details
     md += "<details>\n<summary>Agent details</summary>\n\n"
 
     md += "## Rollup\n\n"
@@ -402,14 +415,13 @@ def main() -> None:
         md += fmt_list("Tests to add", data.get("tests_to_add", []) or [])
         md += fmt_list("Questions", data.get("questions", []) or [])
 
-    # Curator raw (optional debug)
-    md += "\n---\n\n## Curator (debug)\n\n"
-    if curator_raw:
+    if SHOW_DEBUG:
+        md += "\n---\n\n## Curator (debug)\n\n"
         md += "```json\n" + truncate(curator_raw, 3000) + "\n```\n"
 
     md += "\n</details>\n\n"
 
-    # Stable marker for next run to pick up
+    # Stable marker for next run
     md += "---\n\n"
     md += f"<!-- AI_REVIEW:HEAD_SHA={head_sha} -->\n"
 

From 0d6d5786837f0eca990dbd97940a8356c10b3ecb Mon Sep 17 00:00:00 2001
From: valerii <chrisburrrma@gmail.com>
Date: Fri, 23 Jan 2026 15:02:49 +0100
Subject: [PATCH 12/17] review adjustments

---
 .github/scripts/multi_agent_review.py | 88 +++++++++++++++++++++++++--
 1 file changed, 82 insertions(+), 6 deletions(-)

diff --git a/.github/scripts/multi_agent_review.py b/.github/scripts/multi_agent_review.py
index 0b7afae..6220ba5 100644
--- a/.github/scripts/multi_agent_review.py
+++ b/.github/scripts/multi_agent_review.py
@@ -62,7 +62,7 @@ def run_ollama(model: str, prompt: str, timeout_s: int = 300) -> str:
         "model": model,
         "prompt": prompt,
         "stream": False,
-        # "keep_alive": 0,  # uncomment if you want to unload model after each call
+        # "keep_alive": 0,  # uncomment to unload model after each call
     }
 
     req = urllib.request.Request(
@@ -104,6 +104,31 @@ def cap_issues(x: Any, max_items: int) -> List[Dict[str, Any]]:
     return out
 
 
+def extract_changed_files(diff: str) -> List[str]:
+    """
+    Parse `git diff` output and extract changed file paths. We use these paths to:
+    - constrain model references
+    - filter hallucinated evidence
+    """
+    files: List[str] = []
+    for line in diff.splitlines():
+        if line.startswith("diff --git "):
+            parts = line.split()
+            if len(parts) >= 4:
+                b = parts[3]  # b/...
+                if b.startswith("b/"):
+                    files.append(b[2:])
+
+    # dedupe, keep order
+    seen = set()
+    out: List[str] = []
+    for f in files:
+        if f not in seen:
+            out.append(f)
+            seen.add(f)
+    return out
+
+
 def drop_weak_issues(issues: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
     """
     Remove issues with weak evidence. This is critical to reduce hallucinations.
@@ -117,6 +142,23 @@ def drop_weak_issues(issues: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
     return out
 
 
+def drop_issues_not_in_files(issues: List[Dict[str, Any]], allowed_files: List[str]) -> List[Dict[str, Any]]:
+    """
+    If evidence doesn't mention any changed file, the model likely hallucinated the reference.
+    Drop it.
+    """
+    if not allowed_files:
+        return issues
+
+    allowed_lower = [f.lower() for f in allowed_files]
+    out: List[Dict[str, Any]] = []
+    for it in issues:
+        ev = str(it.get("evidence", "")).lower()
+        if any(f in ev for f in allowed_lower):
+            out.append(it)
+    return out
+
+
 # --- Agent definitions ---
 
 @dataclass
@@ -198,13 +240,22 @@ class Agent:
 Rules:
 - Deduplicate and prioritize.
 - Prefer concrete, actionable items.
+- Each bullet/action should cite at least one file from `changed_files` OR be framed as a question.
 - If evidence is weak/unknown, downgrade or omit it.
 - If mode is INITIAL: resolved/still_open can be empty.
 - If mode is FOLLOW_UP: classify items as resolved/still_open/new_risks when possible.
 """.strip()
 
 
-def build_prompt(agent: Agent, guidelines: str, diff: str, mode: str, prev_sha: str, head_sha: str) -> str:
+def build_prompt(
+        agent: Agent,
+        guidelines: str,
+        diff: str,
+        mode: str,
+        prev_sha: str,
+        head_sha: str,
+        allowed_files: List[str],
+) -> str:
     followup_hint = ""
     if mode == "FOLLOW_UP":
         followup_hint = f"""
@@ -215,6 +266,8 @@ def build_prompt(agent: Agent, guidelines: str, diff: str, mode: str, prev_sha:
 - Focus on what changed, what got resolved, and what new issues were introduced.
 """.strip()
 
+    allowed_files_text = "\n".join(allowed_files) if allowed_files else "(none)"
+
     return f"""
 SYSTEM:
 You are a strict senior/staff+ software engineer acting as a pull request reviewer.
@@ -222,6 +275,13 @@ def build_prompt(agent: Agent, guidelines: str, diff: str, mode: str, prev_sha:
 MODE: {mode}
 {followup_hint}
 
+ALLOWED_FILES (critical):
+{allowed_files_text}
+
+Critical rules:
+- You may ONLY reference file paths from ALLOWED_FILES.
+- If you cannot point to evidence in the DIFF, DO NOT create an issue. Put it into "questions".
+
 TASK:
 {agent.focus}
 
@@ -242,11 +302,13 @@ def build_curator_prompt(
         head_sha: str,
         agent_json: List[Dict[str, Any]],
         prev_review_text: str,
+        changed_files: List[str],
 ) -> str:
     payload = {
         "mode": mode,
         "previous_reviewed_sha": prev_sha or None,
         "current_sha": head_sha,
+        "changed_files": changed_files,
         "agent_reviews": agent_json,
         "previous_review_text": truncate(prev_review_text, MAX_PREV_REVIEW_CHARS) if prev_review_text else "",
     }
@@ -302,6 +364,7 @@ def main() -> None:
         return
 
     diff = truncate(diff, MAX_DIFF_CHARS)
+    changed_files = extract_changed_files(diff)
 
     head_sha = get_head_sha()
     mode = "FOLLOW_UP" if PREV_SHA else "INITIAL"
@@ -311,15 +374,21 @@ def main() -> None:
     agent_payloads: List[Dict[str, Any]] = []
 
     for agent in AGENTS:
-        prompt = build_prompt(agent, guidelines, diff, mode, PREV_SHA, head_sha)
+        prompt = build_prompt(agent, guidelines, diff, mode, PREV_SHA, head_sha, changed_files)
         raw = run_ollama(MODEL, prompt)
         data, err = safe_json_loads(raw)
 
         if data:
             data["summary"] = str(data.get("summary", ""))[:500]
 
-            blocking = drop_weak_issues(cap_issues(data.get("blocking"), 2))
-            non_blocking = drop_weak_issues(cap_issues(data.get("non_blocking"), 3))
+            blocking = drop_issues_not_in_files(
+                drop_weak_issues(cap_issues(data.get("blocking"), 2)),
+                changed_files
+            )
+            non_blocking = drop_issues_not_in_files(
+                drop_weak_issues(cap_issues(data.get("non_blocking"), 3)),
+                changed_files
+            )
 
             data["blocking"] = blocking
             data["non_blocking"] = non_blocking
@@ -338,7 +407,14 @@ def main() -> None:
         results.append((agent, data, err, raw))
 
     # Curator step
-    curator_prompt = build_curator_prompt(mode, PREV_SHA, head_sha, agent_payloads, prev_review_text)
+    curator_prompt = build_curator_prompt(
+        mode=mode,
+        prev_sha=PREV_SHA,
+        head_sha=head_sha,
+        agent_json=agent_payloads,
+        prev_review_text=prev_review_text,
+        changed_files=changed_files,
+    )
     curator_raw = run_ollama(CURATOR_MODEL, curator_prompt)
     curator, curator_err = safe_json_loads(curator_raw)
 

From db135c2fa6d963fcb76c2eb6bf25c3cdf313e7b9 Mon Sep 17 00:00:00 2001
From: valerii <chrisburrrma@gmail.com>
Date: Fri, 23 Jan 2026 15:08:40 +0100
Subject: [PATCH 13/17] review adjustments

---
 .github/scripts/multi_agent_review.py | 80 +++++++++++++++++++--------
 1 file changed, 56 insertions(+), 24 deletions(-)

diff --git a/.github/scripts/multi_agent_review.py b/.github/scripts/multi_agent_review.py
index 6220ba5..d320dc0 100644
--- a/.github/scripts/multi_agent_review.py
+++ b/.github/scripts/multi_agent_review.py
@@ -19,7 +19,9 @@
 OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://127.0.0.1:11434")
 PREV_SHA = (os.environ.get("PREV_SHA") or "").strip()
 PREV_REVIEW_PATH = Path(os.environ.get("PREV_REVIEW_PATH", "prev_review.txt"))  # optional
+
 SHOW_DEBUG = (os.environ.get("SHOW_DEBUG") or "").lower() in ("1", "true", "yes")
+SHOW_AGENT_TESTS_QUESTIONS = (os.environ.get("SHOW_AGENT_TESTS_QUESTIONS") or "").lower() in ("1", "true", "yes")
 
 
 # --- Utilities ---
@@ -106,9 +108,7 @@ def cap_issues(x: Any, max_items: int) -> List[Dict[str, Any]]:
 
 def extract_changed_files(diff: str) -> List[str]:
     """
-    Parse `git diff` output and extract changed file paths. We use these paths to:
-    - constrain model references
-    - filter hallucinated evidence
+    Parse `git diff` output and extract changed file paths.
     """
     files: List[str] = []
     for line in diff.splitlines():
@@ -129,9 +129,19 @@ def extract_changed_files(diff: str) -> List[str]:
     return out
 
 
+def filter_allowed_files(files: List[str]) -> List[str]:
+    """
+    To prevent models from "reviewing guidelines" instead of code,
+    drop docs/ by default. If PR changes ONLY docs, allow them.
+    """
+    dropped_prefixes = ("docs/",)
+    filtered = [f for f in files if not f.startswith(dropped_prefixes)]
+    return filtered if filtered else files
+
+
 def drop_weak_issues(issues: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
     """
-    Remove issues with weak evidence. This is critical to reduce hallucinations.
+    Remove issues with weak evidence. Critical to reduce hallucinations.
     """
     out: List[Dict[str, Any]] = []
     for it in issues:
@@ -144,7 +154,7 @@ def drop_weak_issues(issues: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 
 def drop_issues_not_in_files(issues: List[Dict[str, Any]], allowed_files: List[str]) -> List[Dict[str, Any]]:
     """
-    If evidence doesn't mention any changed file, the model likely hallucinated the reference.
+    If evidence doesn't mention any allowed file, the model likely hallucinated.
     Drop it.
     """
     if not allowed_files:
@@ -159,6 +169,15 @@ def drop_issues_not_in_files(issues: List[Dict[str, Any]], allowed_files: List[s
     return out
 
 
+def must_mention_file(line: str, allowed_files: List[str]) -> bool:
+    """
+    Helper for debugging curator output quality:
+    check whether a bullet mentions any allowed file.
+    """
+    s = (line or "").lower()
+    return any(f.lower() in s for f in allowed_files)
+
+
 # --- Agent definitions ---
 
 @dataclass
@@ -172,28 +191,32 @@ class Agent:
         name="Correctness & Reliability",
         focus=(
             "Find correctness risks: idempotency, retries/backoff, timeouts, transactional boundaries, "
-            "race conditions, partial failures, error handling, and data consistency."
+            "race conditions, partial failures, error handling, and data consistency. "
+            "Focus ONLY on code/workflow changes in ALLOWED_FILES."
         ),
     ),
     Agent(
         name="Architecture & Boundaries",
         focus=(
             "Review architecture: module boundaries, coupling, layering, dependency direction, "
-            "API contracts, naming of abstractions, and maintainability trade-offs."
+            "API contracts, naming of abstractions, and maintainability trade-offs. "
+            "Focus ONLY on code/workflow changes in ALLOWED_FILES."
         ),
     ),
     Agent(
         name="Tests & Observability",
         focus=(
             "Suggest tests and observability: unit/integration tests worth adding, edge cases, "
-            "logging/metrics/tracing, and how to reproduce failures."
+            "logging/metrics/tracing, and how to reproduce failures. "
+            "Do NOT review the guideline document itself; review only ALLOWED_FILES changes."
         ),
     ),
     Agent(
         name="Cost & LLM Discipline",
         focus=(
             "Look for cost/perf traps: unnecessary LLM calls, large payloads, missing caching, "
-            "missing state+delta pattern, and risk of GPU contention."
+            "missing state+delta pattern, and risk of GPU contention. "
+            "Focus ONLY on the actual automation code/workflow changes in ALLOWED_FILES."
         ),
     ),
 ]
@@ -219,8 +242,7 @@ class Agent:
 
 Rules:
 - Use only the provided DIFF and GUIDELINES.
-- Do not guess. If uncertain, put it into "questions" and set evidence to "unknown".
-- Keep evidence concrete (file/line hints if visible in diff).
+- You may ONLY reference file paths from ALLOWED_FILES.
 - If you cannot cite evidence from the DIFF, set evidence to "unknown" AND prefer putting it into "questions" instead of issues.
 """.strip()
 
@@ -237,11 +259,11 @@ class Agent:
   "new_risks": ["string (max 5 items)"]
 }
 
-Rules:
+Hard rules:
 - Deduplicate and prioritize.
-- Prefer concrete, actionable items.
-- Each bullet/action should cite at least one file from `changed_files` OR be framed as a question.
-- If evidence is weak/unknown, downgrade or omit it.
+- Each short_summary/top_actions item MUST mention at least one file from `allowed_files` by name.
+  If you cannot tie it to a file, write it as a question (still mention a file that it relates to).
+- If evidence is weak/unknown, omit it.
 - If mode is INITIAL: resolved/still_open can be empty.
 - If mode is FOLLOW_UP: classify items as resolved/still_open/new_risks when possible.
 """.strip()
@@ -302,13 +324,13 @@ def build_curator_prompt(
         head_sha: str,
         agent_json: List[Dict[str, Any]],
         prev_review_text: str,
-        changed_files: List[str],
+        allowed_files: List[str],
 ) -> str:
     payload = {
         "mode": mode,
         "previous_reviewed_sha": prev_sha or None,
         "current_sha": head_sha,
-        "changed_files": changed_files,
+        "allowed_files": allowed_files,
         "agent_reviews": agent_json,
         "previous_review_text": truncate(prev_review_text, MAX_PREV_REVIEW_CHARS) if prev_review_text else "",
     }
@@ -364,7 +386,9 @@ def main() -> None:
         return
 
     diff = truncate(diff, MAX_DIFF_CHARS)
+
     changed_files = extract_changed_files(diff)
+    allowed_files = filter_allowed_files(changed_files)
 
     head_sha = get_head_sha()
     mode = "FOLLOW_UP" if PREV_SHA else "INITIAL"
@@ -374,7 +398,7 @@ def main() -> None:
     agent_payloads: List[Dict[str, Any]] = []
 
     for agent in AGENTS:
-        prompt = build_prompt(agent, guidelines, diff, mode, PREV_SHA, head_sha, changed_files)
+        prompt = build_prompt(agent, guidelines, diff, mode, PREV_SHA, head_sha, allowed_files)
         raw = run_ollama(MODEL, prompt)
         data, err = safe_json_loads(raw)
 
@@ -383,11 +407,11 @@ def main() -> None:
 
             blocking = drop_issues_not_in_files(
                 drop_weak_issues(cap_issues(data.get("blocking"), 2)),
-                changed_files
+                allowed_files,
             )
             non_blocking = drop_issues_not_in_files(
                 drop_weak_issues(cap_issues(data.get("non_blocking"), 3)),
-                changed_files
+                allowed_files,
             )
 
             data["blocking"] = blocking
@@ -413,7 +437,7 @@ def main() -> None:
         head_sha=head_sha,
         agent_json=agent_payloads,
         prev_review_text=prev_review_text,
-        changed_files=changed_files,
+        allowed_files=allowed_files,
     )
     curator_raw = run_ollama(CURATOR_MODEL, curator_prompt)
     curator, curator_err = safe_json_loads(curator_raw)
@@ -433,6 +457,13 @@ def main() -> None:
             "new_risks": [],
         }
 
+    # Optional: basic sanity check for curator outputs (debug only)
+    if SHOW_DEBUG and allowed_files:
+        for section in ("short_summary", "top_actions"):
+            for item in curator.get(section, []) or []:
+                if not must_mention_file(str(item), allowed_files):
+                    print(f"[DEBUG] Curator {section} item missing file mention: {item}")
+
     # Build markdown
     title = "Local Multi-Agent AI Review"
     title += " (Follow-up)" if mode == "FOLLOW_UP" else " (Initial)"
@@ -463,7 +494,6 @@ def main() -> None:
         md += "## New risks\n\n"
         md += fmt_bullets(curator.get("new_risks", [])) + "\n"
 
-    # Details
     md += "<details>\n<summary>Agent details</summary>\n\n"
 
     md += "## Rollup\n\n"
@@ -488,8 +518,10 @@ def main() -> None:
         md += f"**Summary:** {md_escape(str(data.get('summary', '')))}\n\n"
         md += fmt_issue_list("Blocking", data.get("blocking", []) or [])
         md += fmt_issue_list("Non-blocking", data.get("non_blocking", []) or [])
-        md += fmt_list("Tests to add", data.get("tests_to_add", []) or [])
-        md += fmt_list("Questions", data.get("questions", []) or [])
+
+        if SHOW_AGENT_TESTS_QUESTIONS:
+            md += fmt_list("Tests to add", data.get("tests_to_add", []) or [])
+            md += fmt_list("Questions", data.get("questions", []) or [])
 
     if SHOW_DEBUG:
         md += "\n---\n\n## Curator (debug)\n\n"

From 2008e73b2aea1c2b0dfcc1911d9346ec421b8d1c Mon Sep 17 00:00:00 2001
From: valerii <chrisburrrma@gmail.com>
Date: Fri, 23 Jan 2026 15:15:29 +0100
Subject: [PATCH 14/17] review adjustments

---
 .github/scripts/multi_agent_review.py | 84 +++++++++++++++++++++++----
 1 file changed, 72 insertions(+), 12 deletions(-)

diff --git a/.github/scripts/multi_agent_review.py b/.github/scripts/multi_agent_review.py
index d320dc0..f970ffc 100644
--- a/.github/scripts/multi_agent_review.py
+++ b/.github/scripts/multi_agent_review.py
@@ -23,6 +23,9 @@
 SHOW_DEBUG = (os.environ.get("SHOW_DEBUG") or "").lower() in ("1", "true", "yes")
 SHOW_AGENT_TESTS_QUESTIONS = (os.environ.get("SHOW_AGENT_TESTS_QUESTIONS") or "").lower() in ("1", "true", "yes")
 
+# Retry agent call once if we fail to parse JSON (common with local models)
+RETRY_ON_BAD_JSON = (os.environ.get("RETRY_ON_BAD_JSON") or "").lower() in ("1", "true", "yes")
+
 
 # --- Utilities ---
 
@@ -83,9 +86,49 @@ def run_ollama(model: str, prompt: str, timeout_s: int = 300) -> str:
         raise RuntimeError(f"Ollama HTTP call failed: {e}")
 
 
+def extract_first_json_object(s: str) -> Optional[str]:
+    """
+    Extract the first top-level JSON object from a string.
+    Handles cases where the model wraps JSON in markdown fences or appends extra text.
+    """
+    if not s:
+        return None
+
+    stripped = s.strip()
+
+    # Strip a leading markdown fence if present
+    if stripped.startswith("```"):
+        lines = stripped.splitlines()
+        # drop first fence line (``` or ```json)
+        lines = lines[1:]
+        # drop last fence line if present
+        if lines and lines[-1].strip().startswith("```"):
+            lines = lines[:-1]
+        stripped = "\n".join(lines).strip()
+
+    start = stripped.find("{")
+    if start == -1:
+        return None
+
+    depth = 0
+    for i in range(start, len(stripped)):
+        ch = stripped[i]
+        if ch == "{":
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+            if depth == 0:
+                return stripped[start:i + 1]
+
+    return None
+
+
 def safe_json_loads(s: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
     try:
-        return json.loads(s), None
+        js = extract_first_json_object(s)
+        if not js:
+            return None, "No JSON object found"
+        return json.loads(js), None
     except Exception as e:
         return None, str(e)
 
@@ -170,10 +213,6 @@ def drop_issues_not_in_files(issues: List[Dict[str, Any]], allowed_files: List[s
 
 
 def must_mention_file(line: str, allowed_files: List[str]) -> bool:
-    """
-    Helper for debugging curator output quality:
-    check whether a bullet mentions any allowed file.
-    """
     s = (line or "").lower()
     return any(f.lower() in s for f in allowed_files)
 
@@ -263,6 +302,7 @@ class Agent:
 - Deduplicate and prioritize.
 - Each short_summary/top_actions item MUST mention at least one file from `allowed_files` by name.
   If you cannot tie it to a file, write it as a question (still mention a file that it relates to).
+- Do NOT recommend changing guidelines unless `docs/review_guidelines.md` is in allowed_files.
 - If evidence is weak/unknown, omit it.
 - If mode is INITIAL: resolved/still_open can be empty.
 - If mode is FOLLOW_UP: classify items as resolved/still_open/new_risks when possible.
@@ -376,6 +416,28 @@ def fmt_bullets(items: List[str]) -> str:
     return "".join([f"- {md_escape(str(x))}\n" for x in items])
 
 
+def call_agent_with_optional_retry(model: str, prompt: str) -> Tuple[str, Optional[Dict[str, Any]], Optional[str]]:
+    """
+    Returns: (raw, parsed_json_or_none, err_or_none)
+    If RETRY_ON_BAD_JSON is enabled, retries once with a stricter reminder.
+    """
+    raw = run_ollama(model, prompt)
+    data, err = safe_json_loads(raw)
+    if data is not None:
+        return raw, data, None
+
+    if not RETRY_ON_BAD_JSON:
+        return raw, None, err
+
+    # retry once with a strict reminder (helps local models a lot)
+    retry_prompt = prompt + "\n\nREMINDER: OUTPUT ONLY VALID JSON. NO MARKDOWN. NO EXTRA TEXT."
+    raw2 = run_ollama(model, retry_prompt)
+    data2, err2 = safe_json_loads(raw2)
+    if data2 is not None:
+        return raw2, data2, None
+    return raw2, None, err2
+
+
 def main() -> None:
     diff = read_text(DIFF_PATH)
     guidelines = read_text(GUIDELINES_PATH)
@@ -399,8 +461,7 @@ def main() -> None:
 
     for agent in AGENTS:
         prompt = build_prompt(agent, guidelines, diff, mode, PREV_SHA, head_sha, allowed_files)
-        raw = run_ollama(MODEL, prompt)
-        data, err = safe_json_loads(raw)
+        raw, data, err = call_agent_with_optional_retry(MODEL, prompt)
 
         if data:
             data["summary"] = str(data.get("summary", ""))[:500]
@@ -439,8 +500,7 @@ def main() -> None:
         prev_review_text=prev_review_text,
         allowed_files=allowed_files,
     )
-    curator_raw = run_ollama(CURATOR_MODEL, curator_prompt)
-    curator, curator_err = safe_json_loads(curator_raw)
+    curator_raw, curator, curator_err = call_agent_with_optional_retry(CURATOR_MODEL, curator_prompt)
 
     if curator:
         curator["short_summary"] = cap_list(curator.get("short_summary"), 2)
@@ -457,8 +517,8 @@ def main() -> None:
             "new_risks": [],
         }
 
-    # Optional: basic sanity check for curator outputs (debug only)
-    if SHOW_DEBUG and allowed_files:
+    # Optional sanity check (debug only)
+    if SHOW_DEBUG and allowed_files and curator and isinstance(curator, dict):
         for section in ("short_summary", "top_actions"):
             for item in curator.get(section, []) or []:
                 if not must_mention_file(str(item), allowed_files):
@@ -525,7 +585,7 @@ def main() -> None:
 
     if SHOW_DEBUG:
         md += "\n---\n\n## Curator (debug)\n\n"
-        md += "```json\n" + truncate(curator_raw, 3000) + "\n```\n"
+        md += "```text\n" + truncate(curator_raw, 3000) + "\n```\n"
 
     md += "\n</details>\n\n"
 

From 11abec81539ed2d0d151facb2f1f1ef499406774 Mon Sep 17 00:00:00 2001
From: valerii <chrisburrrma@gmail.com>
Date: Fri, 23 Jan 2026 15:24:27 +0100
Subject: [PATCH 15/17] review adjustments

---
 .github/scripts/multi_agent_review.py | 96 ++++++++++++++-------------
 1 file changed, 51 insertions(+), 45 deletions(-)

diff --git a/.github/scripts/multi_agent_review.py b/.github/scripts/multi_agent_review.py
index f970ffc..abd2ff5 100644
--- a/.github/scripts/multi_agent_review.py
+++ b/.github/scripts/multi_agent_review.py
@@ -22,8 +22,6 @@
 
 SHOW_DEBUG = (os.environ.get("SHOW_DEBUG") or "").lower() in ("1", "true", "yes")
 SHOW_AGENT_TESTS_QUESTIONS = (os.environ.get("SHOW_AGENT_TESTS_QUESTIONS") or "").lower() in ("1", "true", "yes")
-
-# Retry agent call once if we fail to parse JSON (common with local models)
 RETRY_ON_BAD_JSON = (os.environ.get("RETRY_ON_BAD_JSON") or "").lower() in ("1", "true", "yes")
 
 
@@ -99,9 +97,7 @@ def extract_first_json_object(s: str) -> Optional[str]:
     # Strip a leading markdown fence if present
     if stripped.startswith("```"):
         lines = stripped.splitlines()
-        # drop first fence line (``` or ```json)
-        lines = lines[1:]
-        # drop last fence line if present
+        lines = lines[1:]  # drop first fence line
         if lines and lines[-1].strip().startswith("```"):
             lines = lines[:-1]
         stripped = "\n".join(lines).strip()
@@ -184,7 +180,7 @@ def filter_allowed_files(files: List[str]) -> List[str]:
 
 def drop_weak_issues(issues: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
     """
-    Remove issues with weak evidence. Critical to reduce hallucinations.
+    Remove issues with weak evidence.
     """
     out: List[Dict[str, Any]] = []
     for it in issues:
@@ -198,7 +194,6 @@ def drop_weak_issues(issues: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 def drop_issues_not_in_files(issues: List[Dict[str, Any]], allowed_files: List[str]) -> List[Dict[str, Any]]:
     """
     If evidence doesn't mention any allowed file, the model likely hallucinated.
-    Drop it.
     """
     if not allowed_files:
         return issues
@@ -217,6 +212,17 @@ def must_mention_file(line: str, allowed_files: List[str]) -> bool:
     return any(f.lower() in s for f in allowed_files)
 
 
+def diff_facts(diff: str, allowed_files: List[str], max_lines: int = 60) -> str:
+    """
+    Provide a minimal "facts" header to reduce model drift:
+    - list changed files
+    - show first N lines of diff
+    """
+    files = "\n".join(allowed_files) if allowed_files else "(none)"
+    head = "\n".join(diff.splitlines()[:max_lines])
+    return f"CHANGED_FILES:\n{files}\n\nDIFF_HEAD (first {max_lines} lines):\n{head}"
+
+
 # --- Agent definitions ---
 
 @dataclass
@@ -279,10 +285,12 @@ class Agent:
 - tests_to_add: max 6 items
 - questions: max 5 items
 
-Rules:
-- Use only the provided DIFF and GUIDELINES.
+Hard rules (facts):
+- You MUST NOT mention technologies, files, functions, or problems that are not present in DIFF.
+- If DIFF does not show Kotlin/coroutines/external calls/etc, do NOT mention them.
+- GUIDELINES are for evaluation only; DIFF is the ONLY source of facts.
 - You may ONLY reference file paths from ALLOWED_FILES.
-- If you cannot cite evidence from the DIFF, set evidence to "unknown" AND prefer putting it into "questions" instead of issues.
+- If you cannot cite evidence from the DIFF, set evidence to "unknown" AND prefer putting it into "questions".
 """.strip()
 
 
@@ -301,11 +309,10 @@ class Agent:
 Hard rules:
 - Deduplicate and prioritize.
 - Each short_summary/top_actions item MUST mention at least one file from `allowed_files` by name.
-  If you cannot tie it to a file, write it as a question (still mention a file that it relates to).
+  If you cannot tie it to a file, write it as a question (still mention a related file).
 - Do NOT recommend changing guidelines unless `docs/review_guidelines.md` is in allowed_files.
-- If evidence is weak/unknown, omit it.
-- If mode is INITIAL: resolved/still_open can be empty.
-- If mode is FOLLOW_UP: classify items as resolved/still_open/new_risks when possible.
+- Do NOT output "mega lists" of test categories. Each action should be a single concrete thing.
+- Use DIFF as the only source of facts; do not invent Kotlin/coroutines/etc.
 """.strip()
 
 
@@ -330,6 +337,8 @@ def build_prompt(
 
     allowed_files_text = "\n".join(allowed_files) if allowed_files else "(none)"
 
+    facts = diff_facts(diff, allowed_files, max_lines=60)
+
     return f"""
 SYSTEM:
 You are a strict senior/staff+ software engineer acting as a pull request reviewer.
@@ -340,14 +349,18 @@ def build_prompt(
 ALLOWED_FILES (critical):
 {allowed_files_text}
 
+FACTS_FROM_DIFF (use for factual claims):
+{facts}
+
 Critical rules:
+- DIFF is the ONLY source of facts. GUIDELINES are only for judging what you see in DIFF.
 - You may ONLY reference file paths from ALLOWED_FILES.
-- If you cannot point to evidence in the DIFF, DO NOT create an issue. Put it into "questions".
+- You MUST NOT mention technologies/files not present in DIFF. If not shown, don't say it.
 
 TASK:
 {agent.focus}
 
-GUIDELINES:
+GUIDELINES (evaluation only):
 {guidelines if guidelines else "(none provided)"}
 
 DIFF:
@@ -365,6 +378,7 @@ def build_curator_prompt(
         agent_json: List[Dict[str, Any]],
         prev_review_text: str,
         allowed_files: List[str],
+        diff: str,
 ) -> str:
     payload = {
         "mode": mode,
@@ -373,6 +387,7 @@ def build_curator_prompt(
         "allowed_files": allowed_files,
         "agent_reviews": agent_json,
         "previous_review_text": truncate(prev_review_text, MAX_PREV_REVIEW_CHARS) if prev_review_text else "",
+        "facts_from_diff": diff_facts(diff, allowed_files, max_lines=60),
     }
 
     return f"""
@@ -387,6 +402,23 @@ def build_curator_prompt(
 """.strip()
 
 
+def call_with_optional_retry(model: str, prompt: str) -> Tuple[str, Optional[Dict[str, Any]], Optional[str]]:
+    raw = run_ollama(model, prompt)
+    data, err = safe_json_loads(raw)
+    if data is not None:
+        return raw, data, None
+
+    if not RETRY_ON_BAD_JSON:
+        return raw, None, err
+
+    retry_prompt = prompt + "\n\nREMINDER: OUTPUT ONLY VALID JSON. NO MARKDOWN. NO EXTRA TEXT."
+    raw2 = run_ollama(model, retry_prompt)
+    data2, err2 = safe_json_loads(raw2)
+    if data2 is not None:
+        return raw2, data2, None
+    return raw2, None, err2
+
+
 # --- Markdown building ---
 
 def fmt_issue_list(title: str, issues: List[Dict[str, Any]]) -> str:
@@ -416,28 +448,6 @@ def fmt_bullets(items: List[str]) -> str:
     return "".join([f"- {md_escape(str(x))}\n" for x in items])
 
 
-def call_agent_with_optional_retry(model: str, prompt: str) -> Tuple[str, Optional[Dict[str, Any]], Optional[str]]:
-    """
-    Returns: (raw, parsed_json_or_none, err_or_none)
-    If RETRY_ON_BAD_JSON is enabled, retries once with a stricter reminder.
-    """
-    raw = run_ollama(model, prompt)
-    data, err = safe_json_loads(raw)
-    if data is not None:
-        return raw, data, None
-
-    if not RETRY_ON_BAD_JSON:
-        return raw, None, err
-
-    # retry once with a strict reminder (helps local models a lot)
-    retry_prompt = prompt + "\n\nREMINDER: OUTPUT ONLY VALID JSON. NO MARKDOWN. NO EXTRA TEXT."
-    raw2 = run_ollama(model, retry_prompt)
-    data2, err2 = safe_json_loads(raw2)
-    if data2 is not None:
-        return raw2, data2, None
-    return raw2, None, err2
-
-
 def main() -> None:
     diff = read_text(DIFF_PATH)
     guidelines = read_text(GUIDELINES_PATH)
@@ -455,13 +465,12 @@ def main() -> None:
     head_sha = get_head_sha()
     mode = "FOLLOW_UP" if PREV_SHA else "INITIAL"
 
-    # Run specialist agents
     results: List[Tuple[Agent, Optional[Dict[str, Any]], Optional[str], str]] = []
     agent_payloads: List[Dict[str, Any]] = []
 
     for agent in AGENTS:
         prompt = build_prompt(agent, guidelines, diff, mode, PREV_SHA, head_sha, allowed_files)
-        raw, data, err = call_agent_with_optional_retry(MODEL, prompt)
+        raw, data, err = call_with_optional_retry(MODEL, prompt)
 
         if data:
             data["summary"] = str(data.get("summary", ""))[:500]
@@ -491,7 +500,6 @@ def main() -> None:
 
         results.append((agent, data, err, raw))
 
-    # Curator step
     curator_prompt = build_curator_prompt(
         mode=mode,
         prev_sha=PREV_SHA,
@@ -499,8 +507,9 @@ def main() -> None:
         agent_json=agent_payloads,
         prev_review_text=prev_review_text,
         allowed_files=allowed_files,
+        diff=diff,
     )
-    curator_raw, curator, curator_err = call_agent_with_optional_retry(CURATOR_MODEL, curator_prompt)
+    curator_raw, curator, curator_err = call_with_optional_retry(CURATOR_MODEL, curator_prompt)
 
     if curator:
         curator["short_summary"] = cap_list(curator.get("short_summary"), 2)
@@ -517,14 +526,12 @@ def main() -> None:
             "new_risks": [],
         }
 
-    # Optional sanity check (debug only)
     if SHOW_DEBUG and allowed_files and curator and isinstance(curator, dict):
         for section in ("short_summary", "top_actions"):
             for item in curator.get(section, []) or []:
                 if not must_mention_file(str(item), allowed_files):
                     print(f"[DEBUG] Curator {section} item missing file mention: {item}")
 
-    # Build markdown
     title = "Local Multi-Agent AI Review"
     title += " (Follow-up)" if mode == "FOLLOW_UP" else " (Initial)"
 
@@ -589,7 +596,6 @@ def main() -> None:
 
     md += "\n</details>\n\n"
 
-    # Stable marker for next run
     md += "---\n\n"
     md += f"<!-- AI_REVIEW:HEAD_SHA={head_sha} -->\n"
 

From d2954f0b07de2dd43fafd1399b6f21c37dbbf5f7 Mon Sep 17 00:00:00 2001
From: valerii <chrisburrrma@gmail.com>
Date: Fri, 23 Jan 2026 15:32:36 +0100
Subject: [PATCH 16/17] review adjustments

---
 .github/scripts/multi_agent_review.py | 138 +++++++++++++++++++-------
 1 file changed, 104 insertions(+), 34 deletions(-)

diff --git a/.github/scripts/multi_agent_review.py b/.github/scripts/multi_agent_review.py
index abd2ff5..25985ec 100644
--- a/.github/scripts/multi_agent_review.py
+++ b/.github/scripts/multi_agent_review.py
@@ -24,6 +24,9 @@
 SHOW_AGENT_TESTS_QUESTIONS = (os.environ.get("SHOW_AGENT_TESTS_QUESTIONS") or "").lower() in ("1", "true", "yes")
 RETRY_ON_BAD_JSON = (os.environ.get("RETRY_ON_BAD_JSON") or "").lower() in ("1", "true", "yes")
 
+# Optional: aggressively prune vague summaries/top_actions
+STRICT_FACT_GATING = (os.environ.get("STRICT_FACT_GATING") or "").lower() in ("1", "true", "yes")
+
 
 # --- Utilities ---
 
@@ -85,10 +88,6 @@ def run_ollama(model: str, prompt: str, timeout_s: int = 300) -> str:
 
 
 def extract_first_json_object(s: str) -> Optional[str]:
-    """
-    Extract the first top-level JSON object from a string.
-    Handles cases where the model wraps JSON in markdown fences or appends extra text.
-    """
     if not s:
         return None
 
@@ -146,9 +145,6 @@ def cap_issues(x: Any, max_items: int) -> List[Dict[str, Any]]:
 
 
 def extract_changed_files(diff: str) -> List[str]:
-    """
-    Parse `git diff` output and extract changed file paths.
-    """
     files: List[str] = []
     for line in diff.splitlines():
         if line.startswith("diff --git "):
@@ -158,7 +154,6 @@ def extract_changed_files(diff: str) -> List[str]:
                 if b.startswith("b/"):
                     files.append(b[2:])
 
-    # dedupe, keep order
     seen = set()
     out: List[str] = []
     for f in files:
@@ -169,19 +164,12 @@ def extract_changed_files(diff: str) -> List[str]:
 
 
 def filter_allowed_files(files: List[str]) -> List[str]:
-    """
-    To prevent models from "reviewing guidelines" instead of code,
-    drop docs/ by default. If PR changes ONLY docs, allow them.
-    """
     dropped_prefixes = ("docs/",)
     filtered = [f for f in files if not f.startswith(dropped_prefixes)]
     return filtered if filtered else files
 
 
 def drop_weak_issues(issues: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """
-    Remove issues with weak evidence.
-    """
     out: List[Dict[str, Any]] = []
     for it in issues:
         ev = str(it.get("evidence", "")).strip().lower()
@@ -192,9 +180,6 @@ def drop_weak_issues(issues: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 
 
 def drop_issues_not_in_files(issues: List[Dict[str, Any]], allowed_files: List[str]) -> List[Dict[str, Any]]:
-    """
-    If evidence doesn't mention any allowed file, the model likely hallucinated.
-    """
     if not allowed_files:
         return issues
 
@@ -212,17 +197,76 @@ def must_mention_file(line: str, allowed_files: List[str]) -> bool:
     return any(f.lower() in s for f in allowed_files)
 
 
-def diff_facts(diff: str, allowed_files: List[str], max_lines: int = 60) -> str:
+def mentions_only_allowed_files(line: str, allowed_files: List[str]) -> bool:
     """
-    Provide a minimal "facts" header to reduce model drift:
-    - list changed files
-    - show first N lines of diff
+    Conservative: if line mentions a '.py' / '.yml' / '.yaml' / '.kt' / '.java' / '.md' path,
+    ensure every mentioned path is within allowed_files.
     """
+    import re
+    paths = re.findall(r"[\w./-]+\.(?:py|yml|yaml|kt|java|md)", line or "")
+    if not paths:
+        return True
+    allowed_set = set(allowed_files)
+    return all(p in allowed_set for p in paths)
+
+
+def diff_facts(diff: str, allowed_files: List[str], max_lines: int = 60) -> str:
     files = "\n".join(allowed_files) if allowed_files else "(none)"
     head = "\n".join(diff.splitlines()[:max_lines])
     return f"CHANGED_FILES:\n{files}\n\nDIFF_HEAD (first {max_lines} lines):\n{head}"
 
 
+# --- Fact gating (reduce "smart generic" talk) ---
+
+KEYWORD_GATES: Dict[str, List[str]] = {
+    "idempot": ["idempot", "dedup", "at-least-once", "exactly-once"],
+    "retry": ["retry", "backoff", "exponential"],
+    "timeout": ["timeout", "time out", "deadline"],
+    "logging": ["logger", "log.", "logging", "trace", "span", "metric"],
+    "external": ["http", "request", "client", "api", "url", "fetch"],
+    "concurrency": ["mutex", "lock", "synchronized", "race", "concurrent", "coroutine", "thread"],
+}
+
+def diff_contains_any(diff_lower: str, needles: List[str]) -> bool:
+    return any(n in diff_lower for n in needles)
+
+def gate_claims_to_diff(text: str, diff_lower: str) -> bool:
+    """
+    If the text talks about a gated topic, require the diff to contain related lexemes.
+    This is intentionally strict to avoid hallucinated "idempotence/retry/etc" claims.
+    """
+    t = (text or "").lower()
+    for topic, needles in KEYWORD_GATES.items():
+        if topic in t or any(n in t for n in needles):
+            if not diff_contains_any(diff_lower, needles):
+                return False
+    return True
+
+
+def filter_bullets_by_fact_gate(items: List[str], diff_lower: str, allowed_files: List[str]) -> List[str]:
+    out: List[str] = []
+    for x in items:
+        s = str(x)
+        if not must_mention_file(s, allowed_files):
+            continue
+        if not mentions_only_allowed_files(s, allowed_files):
+            continue
+        if STRICT_FACT_GATING and not gate_claims_to_diff(s, diff_lower):
+            continue
+        out.append(s)
+    return out
+
+
+def rewrite_agent_summary_if_vague(summary: str, diff_lower: str) -> str:
+    s = (summary or "").strip()
+    if not s:
+        return s
+    if STRICT_FACT_GATING and not gate_claims_to_diff(s, diff_lower):
+        # neutral fallback
+        return "No concrete issues were identified from the DIFF."
+    return s
+
+
 # --- Agent definitions ---
 
 @dataclass
@@ -286,9 +330,8 @@ class Agent:
 - questions: max 5 items
 
 Hard rules (facts):
-- You MUST NOT mention technologies, files, functions, or problems that are not present in DIFF.
-- If DIFF does not show Kotlin/coroutines/external calls/etc, do NOT mention them.
-- GUIDELINES are for evaluation only; DIFF is the ONLY source of facts.
+- DIFF is the ONLY source of facts. GUIDELINES are for evaluation only.
+- You MUST NOT mention technologies/files/functions/problems not present in DIFF.
 - You may ONLY reference file paths from ALLOWED_FILES.
 - If you cannot cite evidence from the DIFF, set evidence to "unknown" AND prefer putting it into "questions".
 """.strip()
@@ -307,12 +350,12 @@ class Agent:
 }
 
 Hard rules:
-- Deduplicate and prioritize.
+- DIFF is the ONLY source of facts.
+- You MUST NOT mention any file not present in `allowed_files`.
 - Each short_summary/top_actions item MUST mention at least one file from `allowed_files` by name.
-  If you cannot tie it to a file, write it as a question (still mention a related file).
 - Do NOT recommend changing guidelines unless `docs/review_guidelines.md` is in allowed_files.
-- Do NOT output "mega lists" of test categories. Each action should be a single concrete thing.
-- Use DIFF as the only source of facts; do not invent Kotlin/coroutines/etc.
+- Do NOT output mega lists of test categories; each action must be one concrete thing.
+- Prefer "Changed: <files> ..." style over generic claims.
 """.strip()
 
 
@@ -336,7 +379,6 @@ def build_prompt(
 """.strip()
 
     allowed_files_text = "\n".join(allowed_files) if allowed_files else "(none)"
-
     facts = diff_facts(diff, allowed_files, max_lines=60)
 
     return f"""
@@ -353,9 +395,9 @@ def build_prompt(
 {facts}
 
 Critical rules:
-- DIFF is the ONLY source of facts. GUIDELINES are only for judging what you see in DIFF.
+- DIFF is the ONLY source of facts. GUIDELINES are for judging what you see in DIFF.
 - You may ONLY reference file paths from ALLOWED_FILES.
-- You MUST NOT mention technologies/files not present in DIFF. If not shown, don't say it.
+- You MUST NOT mention technologies/files not present in DIFF.
 
 TASK:
 {agent.focus}
@@ -458,6 +500,7 @@ def main() -> None:
         return
 
     diff = truncate(diff, MAX_DIFF_CHARS)
+    diff_lower = diff.lower()
 
     changed_files = extract_changed_files(diff)
     allowed_files = filter_allowed_files(changed_files)
@@ -473,7 +516,8 @@ def main() -> None:
         raw, data, err = call_with_optional_retry(MODEL, prompt)
 
         if data:
-            data["summary"] = str(data.get("summary", ""))[:500]
+            # Summary gating to reduce generic claims
+            data["summary"] = rewrite_agent_summary_if_vague(str(data.get("summary", ""))[:500], diff_lower)
 
             blocking = drop_issues_not_in_files(
                 drop_weak_issues(cap_issues(data.get("blocking"), 2)),
@@ -517,6 +561,30 @@ def main() -> None:
         curator["resolved"] = cap_list(curator.get("resolved"), 5)
         curator["still_open"] = cap_list(curator.get("still_open"), 5)
         curator["new_risks"] = cap_list(curator.get("new_risks"), 5)
+
+        # Fact-gate curator bullets/actions
+        curator["short_summary"] = filter_bullets_by_fact_gate(curator["short_summary"], diff_lower, allowed_files)[:2]
+        curator["top_actions"] = filter_bullets_by_fact_gate(curator["top_actions"], diff_lower, allowed_files)[:3]
+
+        if mode == "FOLLOW_UP":
+            curator["resolved"] = filter_bullets_by_fact_gate(curator["resolved"], diff_lower, allowed_files)[:5]
+            curator["still_open"] = filter_bullets_by_fact_gate(curator["still_open"], diff_lower, allowed_files)[:5]
+            curator["new_risks"] = filter_bullets_by_fact_gate(curator["new_risks"], diff_lower, allowed_files)[:5]
+        else:
+            curator["resolved"] = cap_list(curator.get("resolved"), 5)
+            curator["still_open"] = cap_list(curator.get("still_open"), 5)
+            curator["new_risks"] = cap_list(curator.get("new_risks"), 5)
+
+        # If gating nuked everything, keep a minimal, honest summary.
+        if not curator["short_summary"]:
+            if allowed_files:
+                curator["short_summary"] = [f"Changed: {', '.join(allowed_files[:3])}" + (" ..." if len(allowed_files) > 3 else "")]
+            else:
+                curator["short_summary"] = ["No concrete issues were identified from the DIFF."]
+
+        if not curator["top_actions"]:
+            curator["top_actions"] = ["No high-priority actions identified from the DIFF."]
+
     else:
         curator = {
             "short_summary": [f"⚠️ Curator returned invalid JSON: {curator_err}"],
@@ -530,7 +598,9 @@ def main() -> None:
         for section in ("short_summary", "top_actions"):
             for item in curator.get(section, []) or []:
                 if not must_mention_file(str(item), allowed_files):
-                    print(f"[DEBUG] Curator {section} item missing file mention: {item}")
+                    print(f"[DEBUG] Curator {section} item missing allowed file mention: {item}")
+                if not mentions_only_allowed_files(str(item), allowed_files):
+                    print(f"[DEBUG] Curator {section} item mentions non-allowed file(s): {item}")
 
     title = "Local Multi-Agent AI Review"
     title += " (Follow-up)" if mode == "FOLLOW_UP" else " (Initial)"

From e4336b0e25ff78be74cdb4707de67a9253f344a3 Mon Sep 17 00:00:00 2001
From: valerii <chrisburrrma@gmail.com>
Date: Fri, 23 Jan 2026 15:45:03 +0100
Subject: [PATCH 17/17] review adjustments

---
 .github/scripts/multi_agent_review.py | 84 ++++++++++++++++++++++-----
 1 file changed, 68 insertions(+), 16 deletions(-)

diff --git a/.github/scripts/multi_agent_review.py b/.github/scripts/multi_agent_review.py
index 25985ec..4d062a3 100644
--- a/.github/scripts/multi_agent_review.py
+++ b/.github/scripts/multi_agent_review.py
@@ -87,6 +87,19 @@ def run_ollama(model: str, prompt: str, timeout_s: int = 300) -> str:
         raise RuntimeError(f"Ollama HTTP call failed: {e}")
 
 
+def sanitize_jsonish(s: str) -> str:
+    """
+    Normalize common model output issues:
+    - smart quotes -> ascii quotes
+    - odd apostrophes -> ascii apostrophes
+    """
+    if not s:
+        return s
+    s = s.replace("“", '"').replace("”", '"').replace("„", '"')
+    s = s.replace("’", "'").replace("‘", "'")
+    return s
+
+
 def extract_first_json_object(s: str) -> Optional[str]:
     if not s:
         return None
@@ -120,9 +133,11 @@ def extract_first_json_object(s: str) -> Optional[str]:
 
 def safe_json_loads(s: str) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
     try:
+        s = sanitize_jsonish(s)
         js = extract_first_json_object(s)
         if not js:
             return None, "No JSON object found"
+        js = sanitize_jsonish(js)
         return json.loads(js), None
     except Exception as e:
         return None, str(e)
@@ -199,7 +214,7 @@ def must_mention_file(line: str, allowed_files: List[str]) -> bool:
 
 def mentions_only_allowed_files(line: str, allowed_files: List[str]) -> bool:
     """
-    Conservative: if line mentions a '.py' / '.yml' / '.yaml' / '.kt' / '.java' / '.md' path,
+    Conservative: if line mentions a path with a known extension,
     ensure every mentioned path is within allowed_files.
     """
     import re
@@ -233,7 +248,6 @@ def diff_contains_any(diff_lower: str, needles: List[str]) -> bool:
 def gate_claims_to_diff(text: str, diff_lower: str) -> bool:
     """
     If the text talks about a gated topic, require the diff to contain related lexemes.
-    This is intentionally strict to avoid hallucinated "idempotence/retry/etc" claims.
     """
     t = (text or "").lower()
     for topic, needles in KEYWORD_GATES.items():
@@ -247,7 +261,7 @@ def filter_bullets_by_fact_gate(items: List[str], diff_lower: str, allowed_files
     out: List[str] = []
     for x in items:
         s = str(x)
-        if not must_mention_file(s, allowed_files):
+        if allowed_files and not must_mention_file(s, allowed_files):
             continue
         if not mentions_only_allowed_files(s, allowed_files):
             continue
@@ -257,16 +271,60 @@ def filter_bullets_by_fact_gate(items: List[str], diff_lower: str, allowed_files
     return out
 
 
-def rewrite_agent_summary_if_vague(summary: str, diff_lower: str) -> str:
+def agent_summary_ok(summary: str, diff_lower: str, allowed_files: List[str]) -> bool:
     s = (summary or "").strip()
     if not s:
-        return s
+        return False
     if STRICT_FACT_GATING and not gate_claims_to_diff(s, diff_lower):
-        # neutral fallback
+        return False
+    if allowed_files and not must_mention_file(s, allowed_files):
+        return False
+    if not mentions_only_allowed_files(s, allowed_files):
+        return False
+    return True
+
+
+def rewrite_agent_summary_if_vague(summary: str, diff_lower: str, allowed_files: List[str]) -> str:
+    s = (summary or "").strip()
+    if not s:
+        return s
+    if not agent_summary_ok(s, diff_lower, allowed_files):
         return "No concrete issues were identified from the DIFF."
     return s
 
 
+def extract_relevant_guidelines(guidelines: str, diff: str, max_lines: int = 80) -> str:
+    """
+    Reduce guideline "prompt bleed" by only including relevant snippets.
+    - If diff doesn't trigger any keywords, include only a tiny header (first ~20 lines).
+    - Otherwise, include up to max_lines lines that match triggered keywords.
+    """
+    if not guidelines.strip():
+        return ""
+    diff_lower = diff.lower()
+    triggers: List[str] = []
+    for needles in KEYWORD_GATES.values():
+        for n in needles:
+            if n in diff_lower:
+                triggers.append(n)
+    triggers = list(dict.fromkeys(triggers))
+
+    lines = guidelines.splitlines()
+
+    if not triggers:
+        return "\n".join(lines[:min(len(lines), 20)])
+
+    picked: List[str] = []
+    for ln in lines:
+        l = ln.lower()
+        if any(k in l for k in triggers):
+            picked.append(ln)
+            if len(picked) >= max_lines:
+                break
+
+    return "\n".join(picked) if picked else "\n".join(lines[:min(len(lines), 20)])
+
+
 # --- Agent definitions ---
 
 @dataclass
@@ -380,6 +438,7 @@ def build_prompt(
 
     allowed_files_text = "\n".join(allowed_files) if allowed_files else "(none)"
     facts = diff_facts(diff, allowed_files, max_lines=60)
+    relevant_guidelines = extract_relevant_guidelines(guidelines, diff)
 
     return f"""
 SYSTEM:
@@ -402,8 +461,8 @@ def build_prompt(
 TASK:
 {agent.focus}
 
-GUIDELINES (evaluation only):
-{guidelines if guidelines else "(none provided)"}
+GUIDELINES (evaluation only, trimmed):
+{relevant_guidelines if relevant_guidelines else "(none provided)"}
 
 DIFF:
 {diff}
@@ -516,8 +575,7 @@ def main() -> None:
         raw, data, err = call_with_optional_retry(MODEL, prompt)
 
         if data:
-            # Summary gating to reduce generic claims
-            data["summary"] = rewrite_agent_summary_if_vague(str(data.get("summary", ""))[:500], diff_lower)
+            data["summary"] = rewrite_agent_summary_if_vague(str(data.get("summary", ""))[:500], diff_lower, allowed_files)
 
             blocking = drop_issues_not_in_files(
                 drop_weak_issues(cap_issues(data.get("blocking"), 2)),
@@ -562,7 +620,6 @@ def main() -> None:
         curator["still_open"] = cap_list(curator.get("still_open"), 5)
         curator["new_risks"] = cap_list(curator.get("new_risks"), 5)
 
-        # Fact-gate curator bullets/actions
         curator["short_summary"] = filter_bullets_by_fact_gate(curator["short_summary"], diff_lower, allowed_files)[:2]
         curator["top_actions"] = filter_bullets_by_fact_gate(curator["top_actions"], diff_lower, allowed_files)[:3]
 
@@ -570,12 +627,7 @@ def main() -> None:
             curator["resolved"] = filter_bullets_by_fact_gate(curator["resolved"], diff_lower, allowed_files)[:5]
             curator["still_open"] = filter_bullets_by_fact_gate(curator["still_open"], diff_lower, allowed_files)[:5]
             curator["new_risks"] = filter_bullets_by_fact_gate(curator["new_risks"], diff_lower, allowed_files)[:5]
-        else:
-            curator["resolved"] = cap_list(curator.get("resolved"), 5)
-            curator["still_open"] = cap_list(curator.get("still_open"), 5)
-            curator["new_risks"] = cap_list(curator.get("new_risks"), 5)
 
-        # If gating nuked everything, keep a minimal, honest summary.
         if not curator["short_summary"]:
             if allowed_files:
                 curator["short_summary"] = [f"Changed: {', '.join(allowed_files[:3])}" + (" ..." if len(allowed_files) > 3 else "")]