From 2e7cd45aa972f9f4fd3e78df022c1f170c421228 Mon Sep 17 00:00:00 2001
From: Jin Choi <jinchoi@u.northwestern.edu>
Date: Sun, 26 Apr 2026 14:10:26 -0700
Subject: [PATCH 1/3] =?UTF-8?q?feat:=20Option=20B=20PR=20workflow=20?=
 =?UTF-8?q?=E2=80=94=20Codex=20review=20for=20code,=20direct-commit=20for?=
 =?UTF-8?q?=20data?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Establishes the Option B architecture:
  - Pure data updates (knowledge-base/) continue to flow through
    post-ingest's auto-git step directly to main.
  - Code, briefs, docs, schemas, hooks, and GitHub workflows now go
    through branches + PRs and get reviewed by Codex before merge.

Three pieces:

1. .github/workflows/codex-review.yml
   Triggers on pull_request for code-touching paths only. Uses the
   openai/codex-action (action ref is a placeholder — adjust to match
   the install path: GitHub App vs action). Skips data paths to avoid
   wasting review cycles on regenerable insight files.

2. .github/pull_request_template.md
   Scaffold for PR descriptions: change summary, scope checklist, risk
   surface, tests, out-of-scope notes, verification. Helps Codex
   review against intent rather than just diff.

3. scripts/post-ingest.ts
   Adds isDataPath() helper and a guard in the auto-git step. If any
   non-data file is staged when post-ingest runs (because the user
   staged it manually), the auto-commit aborts with a clear message
   directing the user to either unstage or open a PR. Preserves the
   data-only invariant on auto-commits to main.

Setup follow-up (manual, in repo Settings):
  - Add OPENAI_API_KEY secret for the Codex action.
  - Enable branch protection on main: require PR + status checks.
  - Confirm openai/codex-action ref or install the GitHub App.

Tests: all 464 existing tests pass. No new tests added — the guard's
behavior is exercised by the smoke test of running post-ingest with
nothing staged outside knowledge-base/ (the common path).
---
 .github/pull_request_template.md   |  49 ++++++++++++++
 .github/workflows/codex-review.yml | 101 +++++++++++++++++++++++++++++
 scripts/post-ingest.ts             |  89 +++++++++++++++++++------
 3 files changed, 219 insertions(+), 20 deletions(-)
 create mode 100644 .github/pull_request_template.md
 create mode 100644 .github/workflows/codex-review.yml

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000000..305f8393d7
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,49 @@
+<!--
+PR template for Zuhn. Codex reviews PRs against this scaffold and the
+project's CLAUDE.md / AGENTS.md conventions.
+
+Pure data updates (insights, principles, sources, views) flow through
+post-ingest's auto-git step directly to main and do NOT use PRs.
+This template is for code, schema, hooks, briefs, or docs.
+-->
+
+## What this changes
+
+<!-- 1-3 sentences naming the user-facing or behavior-facing change. -->
+
+## Why
+
+<!-- The problem being solved or the capability being added. -->
+
+## Scope
+
+- [ ] Code (scripts, schemas, agents, hooks)
+- [ ] CI / GitHub workflows
+- [ ] Tests
+- [ ] Briefs / docs / README
+- [ ] Skills
+- [ ] Other: ____
+
+## Risk surface
+
+<!-- What could break if this is wrong? Which parts of the pipeline
+     does this touch? Add nothing if risk is low; add specifics if not. -->
+
+## Tests
+
+<!-- New tests added, or rationale for not adding any. Reference test
+     files by path. -->
+
+## Out of scope
+
+<!-- Adjacent things you noticed but did not fix here. Helps reviewers
+     calibrate scope and prevents scope-creep ratchet. -->
+
+## Verification
+
+<!-- How you confirmed this works. Commands run, manual checks, etc. -->
+
+---
+
+🤖 If Codex review surfaces non-trivial findings, address or explicitly
+acknowledge each one before merging.
diff --git a/.github/workflows/codex-review.yml b/.github/workflows/codex-review.yml
new file mode 100644
index 0000000000..ce85b30f11
--- /dev/null
+++ b/.github/workflows/codex-review.yml
@@ -0,0 +1,101 @@
+name: Codex Review
+
+# Runs OpenAI Codex review on every PR that touches code (not data).
+# Pairs with the Option B architecture: data changes commit directly
+# to main via post-ingest's auto-git step; code changes flow through
+# branches + PRs and are reviewed here before merge.
+#
+# Setup required:
+#   1. Install the OpenAI Codex GitHub App in repo settings, OR
+#      use the openai/codex-action (whichever your account has).
+#   2. Add OPENAI_API_KEY to repo secrets:
+#        Settings → Secrets and variables → Actions → New repo secret
+#   3. (Optional) Enable branch protection on main:
+#        Settings → Branches → Add rule for main:
+#          - Require pull request before merging
+#          - Require status checks (CI + Codex Review)
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+    paths:
+      # Code surface — review when these change
+      - "scripts/**"
+      - ".github/**"
+      - "tests/**"
+      - "templates/**"
+      - "skills/**"
+      - "package.json"
+      - "package-lock.json"
+      - "tsconfig.json"
+      - "vitest.config.ts"
+      # Briefs are user-authored content; review for craft + voice
+      - "briefs/**"
+      - "docs/**"
+      - "README.md"
+      - "CLAUDE.md"
+      - "AGENTS.md"
+    paths-ignore:
+      # Data surface — never review (regenerable from sources)
+      - "knowledge-base/**"
+      - "**/views/**"
+      - "**/MASTER_INDEX.md"
+      - "**/_summary.md"
+      - "**/_overview.md"
+
+# Cancel in-progress reviews when a new commit lands on the same PR
+concurrency:
+  group: codex-review-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+jobs:
+  codex-review:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+      issues: write
+
+    steps:
+      - name: Checkout PR
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          # Check out the PR's HEAD, not the merge commit, so Codex
+          # reviews the actual proposed code.
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: Run Codex review
+        # NOTE: confirm the action ref against what your OpenAI Codex
+        # subscription provides. Common forms:
+        #   - openai/codex-action@v1
+        #   - openai/codex@v1
+        # If your install uses the GitHub App rather than an action,
+        # you can delete this step entirely; the App posts comments
+        # automatically when the workflow trigger fires.
+        uses: openai/codex-action@v1
+        with:
+          openai-api-key: ${{ secrets.OPENAI_API_KEY }}
+          # Review focus — tells Codex what to prioritize
+          review-instructions: |
+            Review for: correctness, simplicity, type-safety, hidden bugs,
+            unnecessary abstractions, and adherence to the project's
+            CLAUDE.md and AGENTS.md conventions. Skip nitpicks on
+            formatting handled by linters. Prefer concrete suggestions
+            with file:line references.
+
+      - name: Summarize review status
+        if: always()
+        # Move PR-controlled values into env vars rather than expanding
+        # them directly in the run script. PR head ref / branch name
+        # are user-controlled and should never be substituted into a
+        # shell command. See:
+        # https://github.blog/security/vulnerability-research/how-to-catch-github-actions-workflow-injections-before-attackers-do/
+        env:
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          PR_BRANCH: ${{ github.event.pull_request.head.ref }}
+          PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+        run: |
+          echo "Codex review completed for PR #${PR_NUMBER}"
+          echo "Branch: ${PR_BRANCH}"
+          echo "Head SHA: ${PR_HEAD_SHA}"
diff --git a/scripts/post-ingest.ts b/scripts/post-ingest.ts
index 3165c8e441..6a51f1b744 100644
--- a/scripts/post-ingest.ts
+++ b/scripts/post-ingest.ts
@@ -32,6 +32,24 @@ async function appendToActivityLog(entry: string): Promise<void> {
   await writeFile(ACTIVITY_LOG, existing + line + "\n", "utf-8");
 }
 
+/**
+ * Classify a staged path as data (auto-committable to main) or not.
+ *
+ * Option B architecture: pure data updates flow through post-ingest's
+ * auto-git step directly to main. Anything else — scripts, configs,
+ * briefs, docs, schemas, hooks, GitHub workflows — must go through a
+ * branch + PR + Codex review. The guard in the auto-git step uses
+ * this predicate to refuse commits that mix code into a data run.
+ *
+ * Data scope is anything under `knowledge-base/`. The MASTER_INDEX,
+ * views, _summary files, and meta/ all live there.
+ */
+function isDataPath(path: string): boolean {
+  // Normalize POSIX-style separators (git output) regardless of host
+  const normalized = path.replace(/\\/g, "/");
+  return normalized.startsWith("knowledge-base/");
+}
+
 function buildAutoCommitMessage(): string {
   try {
     const diffStat = execFileSync(
@@ -207,32 +225,63 @@ async function main(): Promise<void> {
   let gitStatus = "SKIPPED";
   console.log("\n>> Auto-git");
   try {
-    // Stage knowledge-base/
+    // Stage knowledge-base/ — auto-git is intentionally narrow-scoped
+    // to data only. Code, briefs, and config changes go through PRs
+    // (see .github/workflows/codex-review.yml for the review surface).
     execFileSync("git", ["add", "knowledge-base/"], {
       stdio: "pipe",
       cwd: PROJECT_ROOT,
     });
 
-    // Check if there are staged changes
-    try {
-      execFileSync("git", ["diff", "--cached", "--quiet"], {
-        stdio: "pipe",
-        cwd: PROJECT_ROOT,
-      });
-      // Exit 0 means no changes
-      console.log("   No changes to commit.");
-      gitStatus = "NO_CHANGES";
-    } catch {
-      // Exit non-zero means there ARE staged changes — commit them
-      // Build a descriptive commit message from what actually changed
-      const commitMsg = buildAutoCommitMessage();
-      execFileSync(
-        "git",
-        ["commit", "-m", commitMsg],
-        { stdio: "pipe", cwd: PROJECT_ROOT }
+    // Guard: if any non-data file is staged (because the user staged
+    // it manually before running post-ingest), abort the auto-commit
+    // rather than mixing code into a data commit. This preserves the
+    // Option B invariant that auto-commits to main are data-only.
+    const stagedFiles = execFileSync(
+      "git",
+      ["diff", "--cached", "--name-only"],
+      { encoding: "utf-8", cwd: PROJECT_ROOT },
+    )
+      .split("\n")
+      .map((s) => s.trim())
+      .filter(Boolean);
+
+    const nonDataFiles = stagedFiles.filter((f) => !isDataPath(f));
+
+    if (nonDataFiles.length > 0) {
+      console.error(
+        "   Auto-git aborted: non-data files are staged. Auto-commits must be data-only.",
+      );
+      console.error("   Staged non-data files:");
+      for (const f of nonDataFiles) console.error(`     - ${f}`);
+      console.error(
+        "   Resolve by either unstaging these (`git restore --staged <file>`) or",
+      );
+      console.error(
+        "   committing them manually on a feature branch + PR (Option B).",
       );
-      console.log(`   Committed: ${commitMsg}`);
-      gitStatus = "COMMITTED";
+      gitStatus = "ABORTED_NON_DATA_STAGED";
+    } else {
+      // Check if there are staged changes
+      try {
+        execFileSync("git", ["diff", "--cached", "--quiet"], {
+          stdio: "pipe",
+          cwd: PROJECT_ROOT,
+        });
+        // Exit 0 means no changes
+        console.log("   No changes to commit.");
+        gitStatus = "NO_CHANGES";
+      } catch {
+        // Exit non-zero means there ARE staged changes — commit them.
+        // Build a descriptive commit message from what actually changed.
+        const commitMsg = buildAutoCommitMessage();
+        execFileSync("git", ["commit", "-m", commitMsg], {
+          stdio: "pipe",
+          cwd: PROJECT_ROOT,
+        });
+        console.log(`   Committed: ${commitMsg}`);
+        gitStatus = "COMMITTED";
+      }
     }
   } catch (err: unknown) {
     const msg = err instanceof Error ? err.message : String(err);

From e6ab051e484b3ec5eca49ad24b0064308dfdbc7f Mon Sep 17 00:00:00 2001
From: Jin Choi <jinchoi@u.northwestern.edu>
Date: Sun, 26 Apr 2026 14:15:58 -0700
Subject: [PATCH 2/3] =?UTF-8?q?fix:=20drop=20codex-review.yml=20=E2=80=94?=
 =?UTF-8?q?=20Codex=20review=20uses=20GitHub=20App,=20not=20workflow?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two issues with the original workflow:

1. Invalid YAML — GitHub rejects `paths` + `paths-ignore` on the same
   trigger (Line 38, Col 5: "you may only define one of paths and
   paths-ignore for a single event").

2. The workflow was unnecessary in the first place. The user's other
   repos use the chatgpt-codex-connector GitHub App, which listens
   to pull_request events directly and posts reviews as a bot account.
   No workflow file is needed; the openai/codex-action@v1 reference
   was speculative and not a real published action.

Fix: delete the workflow. Update the auto-git comment in post-ingest
to point at the App instead. PR template stays as-is since its Codex
references work for either delivery mechanism.

Setup follow-up (manual, in repo Settings):
  - Install chatgpt-codex-connector GitHub App on gorajing/zuhn
    (probably already authorized at the org level if installed on
    other gorajing repos like rezn).
  - Branch protection on main still recommended.
---
 .github/workflows/codex-review.yml | 101 -----------------------------
 scripts/post-ingest.ts             |   4 +-
 2 files changed, 3 insertions(+), 102 deletions(-)
 delete mode 100644 .github/workflows/codex-review.yml

diff --git a/.github/workflows/codex-review.yml b/.github/workflows/codex-review.yml
deleted file mode 100644
index ce85b30f11..0000000000
--- a/.github/workflows/codex-review.yml
+++ /dev/null
@@ -1,101 +0,0 @@
-name: Codex Review
-
-# Runs OpenAI Codex review on every PR that touches code (not data).
-# Pairs with the Option B architecture: data changes commit directly
-# to main via post-ingest's auto-git step; code changes flow through
-# branches + PRs and are reviewed here before merge.
-#
-# Setup required:
-#   1. Install the OpenAI Codex GitHub App in repo settings, OR
-#      use the openai/codex-action (whichever your account has).
-#   2. Add OPENAI_API_KEY to repo secrets:
-#        Settings → Secrets and variables → Actions → New repo secret
-#   3. (Optional) Enable branch protection on main:
-#        Settings → Branches → Add rule for main:
-#          - Require pull request before merging
-#          - Require status checks (CI + Codex Review)
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened, ready_for_review]
-    paths:
-      # Code surface — review when these change
-      - "scripts/**"
-      - ".github/**"
-      - "tests/**"
-      - "templates/**"
-      - "skills/**"
-      - "package.json"
-      - "package-lock.json"
-      - "tsconfig.json"
-      - "vitest.config.ts"
-      # Briefs are user-authored content; review for craft + voice
-      - "briefs/**"
-      - "docs/**"
-      - "README.md"
-      - "CLAUDE.md"
-      - "AGENTS.md"
-    paths-ignore:
-      # Data surface — never review (regenerable from sources)
-      - "knowledge-base/**"
-      - "**/views/**"
-      - "**/MASTER_INDEX.md"
-      - "**/_summary.md"
-      - "**/_overview.md"
-
-# Cancel in-progress reviews when a new commit lands on the same PR
-concurrency:
-  group: codex-review-${{ github.event.pull_request.number }}
-  cancel-in-progress: true
-
-jobs:
-  codex-review:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      pull-requests: write
-      issues: write
-
-    steps:
-      - name: Checkout PR
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          # Check out the PR's HEAD, not the merge commit, so Codex
-          # reviews the actual proposed code.
-          ref: ${{ github.event.pull_request.head.sha }}
-
-      - name: Run Codex review
-        # NOTE: confirm the action ref against what your OpenAI Codex
-        # subscription provides. Common forms:
-        #   - openai/codex-action@v1
-        #   - openai/codex@v1
-        # If your install uses the GitHub App rather than an action,
-        # you can delete this step entirely; the App posts comments
-        # automatically when the workflow trigger fires.
-        uses: openai/codex-action@v1
-        with:
-          openai-api-key: ${{ secrets.OPENAI_API_KEY }}
-          # Review focus — tells Codex what to prioritize
-          review-instructions: |
-            Review for: correctness, simplicity, type-safety, hidden bugs,
-            unnecessary abstractions, and adherence to the project's
-            CLAUDE.md and AGENTS.md conventions. Skip nitpicks on
-            formatting handled by linters. Prefer concrete suggestions
-            with file:line references.
-
-      - name: Summarize review status
-        if: always()
-        # Move PR-controlled values into env vars rather than expanding
-        # them directly in the run script. PR head ref / branch name
-        # are user-controlled and should never be substituted into a
-        # shell command. See:
-        # https://github.blog/security/vulnerability-research/how-to-catch-github-actions-workflow-injections-before-attackers-do/
-        env:
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          PR_BRANCH: ${{ github.event.pull_request.head.ref }}
-          PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
-        run: |
-          echo "Codex review completed for PR #${PR_NUMBER}"
-          echo "Branch: ${PR_BRANCH}"
-          echo "Head SHA: ${PR_HEAD_SHA}"
diff --git a/scripts/post-ingest.ts b/scripts/post-ingest.ts
index 6a51f1b744..af6d6977ef 100644
--- a/scripts/post-ingest.ts
+++ b/scripts/post-ingest.ts
@@ -227,7 +227,9 @@ async function main(): Promise<void> {
   try {
     // Stage knowledge-base/ — auto-git is intentionally narrow-scoped
     // to data only. Code, briefs, and config changes go through PRs
-    // (see .github/workflows/codex-review.yml for the review surface).
+    // and are reviewed by the chatgpt-codex-connector GitHub App,
+    // which triggers automatically on pull_request events. No workflow
+    // file is needed; the App listens via webhooks.
     execFileSync("git", ["add", "knowledge-base/"], {
       stdio: "pipe",
       cwd: PROJECT_ROOT,

From c6d2c70e8ead49dbe5e31dbcba3b34a70499083a Mon Sep 17 00:00:00 2001
From: Jin Choi <jinchoi@u.northwestern.edu>
Date: Sun, 26 Apr 2026 14:31:32 -0700
Subject: [PATCH 3/3] fix(auto-git): use -z for null-terminated git output to
 handle quoted paths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Codex review on PR #1 caught a regression: `git diff --cached --name-only`
C-quotes non-ASCII filenames by default (core.quotePath=true), turning
`knowledge-base/foo-ñ.md` into `"knowledge-base/foo-\303\261.md"`. The
leading quote defeats `isDataPath`'s `startsWith("knowledge-base/")`
check, so legitimate Unicode-named data files would be misclassified
as non-data and abort the auto-git step on pure data runs.

Fix: pass `-z` to git diff for null-terminated raw paths, then split
on `\0`. No quoting, no C-escapes. Removes the `.trim()` call too
since -z output has no surrounding whitespace.

Tests: 464 / 464 passing. The fix is internal to auto-git and does
not change the predicate's contract.
---
 scripts/post-ingest.ts | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/scripts/post-ingest.ts b/scripts/post-ingest.ts
index af6d6977ef..5c94f32bfc 100644
--- a/scripts/post-ingest.ts
+++ b/scripts/post-ingest.ts
@@ -239,13 +239,18 @@ async function main(): Promise<void> {
     // it manually before running post-ingest), abort the auto-commit
     // rather than mixing code into a data commit. This preserves the
     // Option B invariant that auto-commits to main are data-only.
+    // Use -z for null-terminated output. Without it, git C-quotes
+    // non-ASCII filenames (default core.quotePath=true), turning paths
+    // like `knowledge-base/foo-ñ.md` into `"knowledge-base/foo-\303\261.md"`.
+    // The leading quote would defeat isDataPath's prefix check and
+    // misclassify legitimate Unicode-named data files as non-data,
+    // aborting auto-git on pure data runs.
     const stagedFiles = execFileSync(
       "git",
-      ["diff", "--cached", "--name-only"],
+      ["diff", "--cached", "--name-only", "-z"],
       { encoding: "utf-8", cwd: PROJECT_ROOT },
     )
-      .split("\n")
-      .map((s) => s.trim())
+      .split("\0")
       .filter(Boolean);
 
     const nonDataFiles = stagedFiles.filter((f) => !isDataPath(f));