From 2e7cd45aa972f9f4fd3e78df022c1f170c421228 Mon Sep 17 00:00:00 2001 From: Jin Choi Date: Sun, 26 Apr 2026 14:10:26 -0700 Subject: [PATCH 1/3] =?UTF-8?q?feat:=20Option=20B=20PR=20workflow=20?= =?UTF-8?q?=E2=80=94=20Codex=20review=20for=20code,=20direct-commit=20for?= =?UTF-8?q?=20data?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Establishes the Option B architecture: - Pure data updates (knowledge-base/) continue to flow through post-ingest's auto-git step directly to main. - Code, briefs, docs, schemas, hooks, and GitHub workflows now go through branches + PRs and get reviewed by Codex before merge. Three pieces: 1. .github/workflows/codex-review.yml Triggers on pull_request for code-touching paths only. Uses the openai/codex-action (action ref is a placeholder — adjust to match the install path: GitHub App vs action). Skips data paths to avoid wasting review cycles on regenerable insight files. 2. .github/pull_request_template.md Scaffold for PR descriptions: change summary, scope checklist, risk surface, tests, out-of-scope notes, verification. Helps Codex review against intent rather than just diff. 3. scripts/post-ingest.ts Adds isDataPath() helper and a guard in the auto-git step. If any non-data file is staged when post-ingest runs (because the user staged it manually), the auto-commit aborts with a clear message directing the user to either unstage or open a PR. Preserves the data-only invariant on auto-commits to main. Setup follow-up (manual, in repo Settings): - Add OPENAI_API_KEY secret for the Codex action. - Enable branch protection on main: require PR + status checks. - Confirm openai/codex-action ref or install the GitHub App. Tests: all 464 existing tests pass. No new tests added — the guard's behavior is exercised by the smoke test of running post-ingest with nothing staged outside knowledge-base/ (the common path). --- .github/pull_request_template.md | 49 ++++++++++++++ .github/workflows/codex-review.yml | 101 +++++++++++++++++++++++++++++ scripts/post-ingest.ts | 89 +++++++++++++++++++------ 3 files changed, 219 insertions(+), 20 deletions(-) create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/codex-review.yml diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000000..305f8393d7 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,49 @@ + + +## What this changes + + + +## Why + + + +## Scope + +- [ ] Code (scripts, schemas, agents, hooks) +- [ ] CI / GitHub workflows +- [ ] Tests +- [ ] Briefs / docs / README +- [ ] Skills +- [ ] Other: ____ + +## Risk surface + + + +## Tests + + + +## Out of scope + + + +## Verification + + + +--- + +🤖 If Codex review surfaces non-trivial findings, address or explicitly +acknowledge each one before merging. diff --git a/.github/workflows/codex-review.yml b/.github/workflows/codex-review.yml new file mode 100644 index 0000000000..ce85b30f11 --- /dev/null +++ b/.github/workflows/codex-review.yml @@ -0,0 +1,101 @@ +name: Codex Review + +# Runs OpenAI Codex review on every PR that touches code (not data). +# Pairs with the Option B architecture: data changes commit directly +# to main via post-ingest's auto-git step; code changes flow through +# branches + PRs and are reviewed here before merge. +# +# Setup required: +# 1. Install the OpenAI Codex GitHub App in repo settings, OR +# use the openai/codex-action (whichever your account has). +# 2. Add OPENAI_API_KEY to repo secrets: +# Settings → Secrets and variables → Actions → New repo secret +# 3. (Optional) Enable branch protection on main: +# Settings → Branches → Add rule for main: +# - Require pull request before merging +# - Require status checks (CI + Codex Review) + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + paths: + # Code surface — review when these change + - "scripts/**" + - ".github/**" + - "tests/**" + - "templates/**" + - "skills/**" + - "package.json" + - "package-lock.json" + - "tsconfig.json" + - "vitest.config.ts" + # Briefs are user-authored content; review for craft + voice + - "briefs/**" + - "docs/**" + - "README.md" + - "CLAUDE.md" + - "AGENTS.md" + paths-ignore: + # Data surface — never review (regenerable from sources) + - "knowledge-base/**" + - "**/views/**" + - "**/MASTER_INDEX.md" + - "**/_summary.md" + - "**/_overview.md" + +# Cancel in-progress reviews when a new commit lands on the same PR +concurrency: + group: codex-review-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + codex-review: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + issues: write + + steps: + - name: Checkout PR + uses: actions/checkout@v4 + with: + fetch-depth: 0 + # Check out the PR's HEAD, not the merge commit, so Codex + # reviews the actual proposed code. + ref: ${{ github.event.pull_request.head.sha }} + + - name: Run Codex review + # NOTE: confirm the action ref against what your OpenAI Codex + # subscription provides. Common forms: + # - openai/codex-action@v1 + # - openai/codex@v1 + # If your install uses the GitHub App rather than an action, + # you can delete this step entirely; the App posts comments + # automatically when the workflow trigger fires. + uses: openai/codex-action@v1 + with: + openai-api-key: ${{ secrets.OPENAI_API_KEY }} + # Review focus — tells Codex what to prioritize + review-instructions: | + Review for: correctness, simplicity, type-safety, hidden bugs, + unnecessary abstractions, and adherence to the project's + CLAUDE.md and AGENTS.md conventions. Skip nitpicks on + formatting handled by linters. Prefer concrete suggestions + with file:line references. + + - name: Summarize review status + if: always() + # Move PR-controlled values into env vars rather than expanding + # them directly in the run script. PR head ref / branch name + # are user-controlled and should never be substituted into a + # shell command. See: + # https://github.blog/security/vulnerability-research/how-to-catch-github-actions-workflow-injections-before-attackers-do/ + env: + PR_NUMBER: ${{ github.event.pull_request.number }} + PR_BRANCH: ${{ github.event.pull_request.head.ref }} + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: | + echo "Codex review completed for PR #${PR_NUMBER}" + echo "Branch: ${PR_BRANCH}" + echo "Head SHA: ${PR_HEAD_SHA}" diff --git a/scripts/post-ingest.ts b/scripts/post-ingest.ts index 3165c8e441..6a51f1b744 100644 --- a/scripts/post-ingest.ts +++ b/scripts/post-ingest.ts @@ -32,6 +32,24 @@ async function appendToActivityLog(entry: string): Promise { await writeFile(ACTIVITY_LOG, existing + line + "\n", "utf-8"); } +/** + * Classify a staged path as data (auto-committable to main) or not. + * + * Option B architecture: pure data updates flow through post-ingest's + * auto-git step directly to main. Anything else — scripts, configs, + * briefs, docs, schemas, hooks, GitHub workflows — must go through a + * branch + PR + Codex review. The guard in the auto-git step uses + * this predicate to refuse commits that mix code into a data run. + * + * Data scope is anything under `knowledge-base/`. The MASTER_INDEX, + * views, _summary files, and meta/ all live there. + */ +function isDataPath(path: string): boolean { + // Normalize POSIX-style separators (git output) regardless of host + const normalized = path.replace(/\\/g, "/"); + return normalized.startsWith("knowledge-base/"); +} + function buildAutoCommitMessage(): string { try { const diffStat = execFileSync( @@ -207,32 +225,63 @@ async function main(): Promise { let gitStatus = "SKIPPED"; console.log("\n>> Auto-git"); try { - // Stage knowledge-base/ + // Stage knowledge-base/ — auto-git is intentionally narrow-scoped + // to data only. Code, briefs, and config changes go through PRs + // (see .github/workflows/codex-review.yml for the review surface). execFileSync("git", ["add", "knowledge-base/"], { stdio: "pipe", cwd: PROJECT_ROOT, }); - // Check if there are staged changes - try { - execFileSync("git", ["diff", "--cached", "--quiet"], { - stdio: "pipe", - cwd: PROJECT_ROOT, - }); - // Exit 0 means no changes - console.log(" No changes to commit."); - gitStatus = "NO_CHANGES"; - } catch { - // Exit non-zero means there ARE staged changes — commit them - // Build a descriptive commit message from what actually changed - const commitMsg = buildAutoCommitMessage(); - execFileSync( - "git", - ["commit", "-m", commitMsg], - { stdio: "pipe", cwd: PROJECT_ROOT } + // Guard: if any non-data file is staged (because the user staged + // it manually before running post-ingest), abort the auto-commit + // rather than mixing code into a data commit. This preserves the + // Option B invariant that auto-commits to main are data-only. + const stagedFiles = execFileSync( + "git", + ["diff", "--cached", "--name-only"], + { encoding: "utf-8", cwd: PROJECT_ROOT }, + ) + .split("\n") + .map((s) => s.trim()) + .filter(Boolean); + + const nonDataFiles = stagedFiles.filter((f) => !isDataPath(f)); + + if (nonDataFiles.length > 0) { + console.error( + " Auto-git aborted: non-data files are staged. Auto-commits must be data-only.", + ); + console.error(" Staged non-data files:"); + for (const f of nonDataFiles) console.error(` - ${f}`); + console.error( + " Resolve by either unstaging these (`git restore --staged `) or", + ); + console.error( + " committing them manually on a feature branch + PR (Option B).", ); - console.log(` Committed: ${commitMsg}`); - gitStatus = "COMMITTED"; + gitStatus = "ABORTED_NON_DATA_STAGED"; + } else { + // Check if there are staged changes + try { + execFileSync("git", ["diff", "--cached", "--quiet"], { + stdio: "pipe", + cwd: PROJECT_ROOT, + }); + // Exit 0 means no changes + console.log(" No changes to commit."); + gitStatus = "NO_CHANGES"; + } catch { + // Exit non-zero means there ARE staged changes — commit them. + // Build a descriptive commit message from what actually changed. + const commitMsg = buildAutoCommitMessage(); + execFileSync("git", ["commit", "-m", commitMsg], { + stdio: "pipe", + cwd: PROJECT_ROOT, + }); + console.log(` Committed: ${commitMsg}`); + gitStatus = "COMMITTED"; + } } } catch (err: unknown) { const msg = err instanceof Error ? err.message : String(err); From e6ab051e484b3ec5eca49ad24b0064308dfdbc7f Mon Sep 17 00:00:00 2001 From: Jin Choi Date: Sun, 26 Apr 2026 14:15:58 -0700 Subject: [PATCH 2/3] =?UTF-8?q?fix:=20drop=20codex-review.yml=20=E2=80=94?= =?UTF-8?q?=20Codex=20review=20uses=20GitHub=20App,=20not=20workflow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues with the original workflow: 1. Invalid YAML — GitHub rejects `paths` + `paths-ignore` on the same trigger (Line 38, Col 5: "you may only define one of paths and paths-ignore for a single event"). 2. The workflow was unnecessary in the first place. The user's other repos use the chatgpt-codex-connector GitHub App, which listens to pull_request events directly and posts reviews as a bot account. No workflow file is needed; the openai/codex-action@v1 reference was speculative and not a real published action. Fix: delete the workflow. Update the auto-git comment in post-ingest to point at the App instead. PR template stays as-is since its Codex references work for either delivery mechanism. Setup follow-up (manual, in repo Settings): - Install chatgpt-codex-connector GitHub App on gorajing/zuhn (probably already authorized at the org level if installed on other gorajing repos like rezn). - Branch protection on main still recommended. --- .github/workflows/codex-review.yml | 101 ----------------------------- scripts/post-ingest.ts | 4 +- 2 files changed, 3 insertions(+), 102 deletions(-) delete mode 100644 .github/workflows/codex-review.yml diff --git a/.github/workflows/codex-review.yml b/.github/workflows/codex-review.yml deleted file mode 100644 index ce85b30f11..0000000000 --- a/.github/workflows/codex-review.yml +++ /dev/null @@ -1,101 +0,0 @@ -name: Codex Review - -# Runs OpenAI Codex review on every PR that touches code (not data). -# Pairs with the Option B architecture: data changes commit directly -# to main via post-ingest's auto-git step; code changes flow through -# branches + PRs and are reviewed here before merge. -# -# Setup required: -# 1. Install the OpenAI Codex GitHub App in repo settings, OR -# use the openai/codex-action (whichever your account has). -# 2. Add OPENAI_API_KEY to repo secrets: -# Settings → Secrets and variables → Actions → New repo secret -# 3. (Optional) Enable branch protection on main: -# Settings → Branches → Add rule for main: -# - Require pull request before merging -# - Require status checks (CI + Codex Review) - -on: - pull_request: - types: [opened, synchronize, reopened, ready_for_review] - paths: - # Code surface — review when these change - - "scripts/**" - - ".github/**" - - "tests/**" - - "templates/**" - - "skills/**" - - "package.json" - - "package-lock.json" - - "tsconfig.json" - - "vitest.config.ts" - # Briefs are user-authored content; review for craft + voice - - "briefs/**" - - "docs/**" - - "README.md" - - "CLAUDE.md" - - "AGENTS.md" - paths-ignore: - # Data surface — never review (regenerable from sources) - - "knowledge-base/**" - - "**/views/**" - - "**/MASTER_INDEX.md" - - "**/_summary.md" - - "**/_overview.md" - -# Cancel in-progress reviews when a new commit lands on the same PR -concurrency: - group: codex-review-${{ github.event.pull_request.number }} - cancel-in-progress: true - -jobs: - codex-review: - runs-on: ubuntu-latest - permissions: - contents: read - pull-requests: write - issues: write - - steps: - - name: Checkout PR - uses: actions/checkout@v4 - with: - fetch-depth: 0 - # Check out the PR's HEAD, not the merge commit, so Codex - # reviews the actual proposed code. - ref: ${{ github.event.pull_request.head.sha }} - - - name: Run Codex review - # NOTE: confirm the action ref against what your OpenAI Codex - # subscription provides. Common forms: - # - openai/codex-action@v1 - # - openai/codex@v1 - # If your install uses the GitHub App rather than an action, - # you can delete this step entirely; the App posts comments - # automatically when the workflow trigger fires. - uses: openai/codex-action@v1 - with: - openai-api-key: ${{ secrets.OPENAI_API_KEY }} - # Review focus — tells Codex what to prioritize - review-instructions: | - Review for: correctness, simplicity, type-safety, hidden bugs, - unnecessary abstractions, and adherence to the project's - CLAUDE.md and AGENTS.md conventions. Skip nitpicks on - formatting handled by linters. Prefer concrete suggestions - with file:line references. - - - name: Summarize review status - if: always() - # Move PR-controlled values into env vars rather than expanding - # them directly in the run script. PR head ref / branch name - # are user-controlled and should never be substituted into a - # shell command. See: - # https://github.blog/security/vulnerability-research/how-to-catch-github-actions-workflow-injections-before-attackers-do/ - env: - PR_NUMBER: ${{ github.event.pull_request.number }} - PR_BRANCH: ${{ github.event.pull_request.head.ref }} - PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} - run: | - echo "Codex review completed for PR #${PR_NUMBER}" - echo "Branch: ${PR_BRANCH}" - echo "Head SHA: ${PR_HEAD_SHA}" diff --git a/scripts/post-ingest.ts b/scripts/post-ingest.ts index 6a51f1b744..af6d6977ef 100644 --- a/scripts/post-ingest.ts +++ b/scripts/post-ingest.ts @@ -227,7 +227,9 @@ async function main(): Promise { try { // Stage knowledge-base/ — auto-git is intentionally narrow-scoped // to data only. Code, briefs, and config changes go through PRs - // (see .github/workflows/codex-review.yml for the review surface). + // and are reviewed by the chatgpt-codex-connector GitHub App, + // which triggers automatically on pull_request events. No workflow + // file is needed; the App listens via webhooks. execFileSync("git", ["add", "knowledge-base/"], { stdio: "pipe", cwd: PROJECT_ROOT, From c6d2c70e8ead49dbe5e31dbcba3b34a70499083a Mon Sep 17 00:00:00 2001 From: Jin Choi Date: Sun, 26 Apr 2026 14:31:32 -0700 Subject: [PATCH 3/3] fix(auto-git): use -z for null-terminated git output to handle quoted paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex review on PR #1 caught a regression: `git diff --cached --name-only` C-quotes non-ASCII filenames by default (core.quotePath=true), turning `knowledge-base/foo-ñ.md` into `"knowledge-base/foo-\303\261.md"`. The leading quote defeats `isDataPath`'s `startsWith("knowledge-base/")` check, so legitimate Unicode-named data files would be misclassified as non-data and abort the auto-git step on pure data runs. Fix: pass `-z` to git diff for null-terminated raw paths, then split on `\0`. No quoting, no C-escapes. Removes the `.trim()` call too since -z output has no surrounding whitespace. Tests: 464 / 464 passing. The fix is internal to auto-git and does not change the predicate's contract. --- scripts/post-ingest.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/post-ingest.ts b/scripts/post-ingest.ts index af6d6977ef..5c94f32bfc 100644 --- a/scripts/post-ingest.ts +++ b/scripts/post-ingest.ts @@ -239,13 +239,18 @@ async function main(): Promise { // it manually before running post-ingest), abort the auto-commit // rather than mixing code into a data commit. This preserves the // Option B invariant that auto-commits to main are data-only. + // Use -z for null-terminated output. Without it, git C-quotes + // non-ASCII filenames (default core.quotePath=true), turning paths + // like `knowledge-base/foo-ñ.md` into `"knowledge-base/foo-\303\261.md"`. + // The leading quote would defeat isDataPath's prefix check and + // misclassify legitimate Unicode-named data files as non-data, + // aborting auto-git on pure data runs. const stagedFiles = execFileSync( "git", - ["diff", "--cached", "--name-only"], + ["diff", "--cached", "--name-only", "-z"], { encoding: "utf-8", cwd: PROJECT_ROOT }, ) - .split("\n") - .map((s) => s.trim()) + .split("\0") .filter(Boolean); const nonDataFiles = stagedFiles.filter((f) => !isDataPath(f));