diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000000..305f8393d7 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,49 @@ + + +## What this changes + + + +## Why + + + +## Scope + +- [ ] Code (scripts, schemas, agents, hooks) +- [ ] CI / GitHub workflows +- [ ] Tests +- [ ] Briefs / docs / README +- [ ] Skills +- [ ] Other: ____ + +## Risk surface + + + +## Tests + + + +## Out of scope + + + +## Verification + + + +--- + +🤖 If Codex review surfaces non-trivial findings, address or explicitly +acknowledge each one before merging. diff --git a/scripts/post-ingest.ts b/scripts/post-ingest.ts index 3165c8e441..5c94f32bfc 100644 --- a/scripts/post-ingest.ts +++ b/scripts/post-ingest.ts @@ -32,6 +32,24 @@ async function appendToActivityLog(entry: string): Promise { await writeFile(ACTIVITY_LOG, existing + line + "\n", "utf-8"); } +/** + * Classify a staged path as data (auto-committable to main) or not. + * + * Option B architecture: pure data updates flow through post-ingest's + * auto-git step directly to main. Anything else — scripts, configs, + * briefs, docs, schemas, hooks, GitHub workflows — must go through a + * branch + PR + Codex review. The guard in the auto-git step uses + * this predicate to refuse commits that mix code into a data run. + * + * Data scope is anything under `knowledge-base/`. The MASTER_INDEX, + * views, _summary files, and meta/ all live there. + */ +function isDataPath(path: string): boolean { + // Normalize POSIX-style separators (git output) regardless of host + const normalized = path.replace(/\\/g, "/"); + return normalized.startsWith("knowledge-base/"); +} + function buildAutoCommitMessage(): string { try { const diffStat = execFileSync( @@ -207,32 +225,70 @@ async function main(): Promise { let gitStatus = "SKIPPED"; console.log("\n>> Auto-git"); try { - // Stage knowledge-base/ + // Stage knowledge-base/ — auto-git is intentionally narrow-scoped + // to data only. Code, briefs, and config changes go through PRs + // and are reviewed by the chatgpt-codex-connector GitHub App, + // which triggers automatically on pull_request events. No workflow + // file is needed; the App listens via webhooks. execFileSync("git", ["add", "knowledge-base/"], { stdio: "pipe", cwd: PROJECT_ROOT, }); - // Check if there are staged changes - try { - execFileSync("git", ["diff", "--cached", "--quiet"], { - stdio: "pipe", - cwd: PROJECT_ROOT, - }); - // Exit 0 means no changes - console.log(" No changes to commit."); - gitStatus = "NO_CHANGES"; - } catch { - // Exit non-zero means there ARE staged changes — commit them - // Build a descriptive commit message from what actually changed - const commitMsg = buildAutoCommitMessage(); - execFileSync( - "git", - ["commit", "-m", commitMsg], - { stdio: "pipe", cwd: PROJECT_ROOT } + // Guard: if any non-data file is staged (because the user staged + // it manually before running post-ingest), abort the auto-commit + // rather than mixing code into a data commit. This preserves the + // Option B invariant that auto-commits to main are data-only. + // Use -z for null-terminated output. Without it, git C-quotes + // non-ASCII filenames (default core.quotePath=true), turning paths + // like `knowledge-base/foo-ñ.md` into `"knowledge-base/foo-\303\261.md"`. + // The leading quote would defeat isDataPath's prefix check and + // misclassify legitimate Unicode-named data files as non-data, + // aborting auto-git on pure data runs. + const stagedFiles = execFileSync( + "git", + ["diff", "--cached", "--name-only", "-z"], + { encoding: "utf-8", cwd: PROJECT_ROOT }, + ) + .split("\0") + .filter(Boolean); + + const nonDataFiles = stagedFiles.filter((f) => !isDataPath(f)); + + if (nonDataFiles.length > 0) { + console.error( + " Auto-git aborted: non-data files are staged. Auto-commits must be data-only.", + ); + console.error(" Staged non-data files:"); + for (const f of nonDataFiles) console.error(` - ${f}`); + console.error( + " Resolve by either unstaging these (`git restore --staged `) or", + ); + console.error( + " committing them manually on a feature branch + PR (Option B).", ); - console.log(` Committed: ${commitMsg}`); - gitStatus = "COMMITTED"; + gitStatus = "ABORTED_NON_DATA_STAGED"; + } else { + // Check if there are staged changes + try { + execFileSync("git", ["diff", "--cached", "--quiet"], { + stdio: "pipe", + cwd: PROJECT_ROOT, + }); + // Exit 0 means no changes + console.log(" No changes to commit."); + gitStatus = "NO_CHANGES"; + } catch { + // Exit non-zero means there ARE staged changes — commit them. + // Build a descriptive commit message from what actually changed. + const commitMsg = buildAutoCommitMessage(); + execFileSync("git", ["commit", "-m", commitMsg], { + stdio: "pipe", + cwd: PROJECT_ROOT, + }); + console.log(` Committed: ${commitMsg}`); + gitStatus = "COMMITTED"; + } } } catch (err: unknown) { const msg = err instanceof Error ? err.message : String(err);