diff --git a/.agents/skills/gstack-autoplan/agents/openai.yaml b/.agents/skills/gstack-autoplan/agents/openai.yaml index 28794c1a3..361031f6b 100644 --- a/.agents/skills/gstack-autoplan/agents/openai.yaml +++ b/.agents/skills/gstack-autoplan/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-autoplan" - short_description: "Auto-review pipeline — reads the full CEO, design, and eng review skills from disk and runs them sequentially with..." + short_description: "" default_prompt: "Use gstack-autoplan for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-benchmark/agents/openai.yaml b/.agents/skills/gstack-benchmark/agents/openai.yaml index 4df54f31f..1c461de17 100644 --- a/.agents/skills/gstack-benchmark/agents/openai.yaml +++ b/.agents/skills/gstack-benchmark/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-benchmark" - short_description: "Performance regression detection using the browse daemon. Establishes baselines for page load times, Core Web..." + short_description: "" default_prompt: "Use gstack-benchmark for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-browse/agents/openai.yaml b/.agents/skills/gstack-browse/agents/openai.yaml index 851f80838..b612fd2c2 100644 --- a/.agents/skills/gstack-browse/agents/openai.yaml +++ b/.agents/skills/gstack-browse/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-browse" - short_description: "Fast headless browser for QA testing and site dogfooding. Navigate any URL, interact with elements, verify page..." + short_description: "" default_prompt: "Use gstack-browse for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-canary/agents/openai.yaml b/.agents/skills/gstack-canary/agents/openai.yaml index e51e42311..1c9239d7e 100644 --- a/.agents/skills/gstack-canary/agents/openai.yaml +++ b/.agents/skills/gstack-canary/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-canary" - short_description: "Post-deploy canary monitoring. Watches the live app for console errors, performance regressions, and page failures..." + short_description: "" default_prompt: "Use gstack-canary for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-careful/agents/openai.yaml b/.agents/skills/gstack-careful/agents/openai.yaml index f470fcaa7..9c7e17664 100644 --- a/.agents/skills/gstack-careful/agents/openai.yaml +++ b/.agents/skills/gstack-careful/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-careful" - short_description: "Safety guardrails for destructive commands. Warns before rm -rf, DROP TABLE, force-push, git reset --hard, kubectl..." + short_description: "" default_prompt: "Use gstack-careful for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-connect-chrome/SKILL.md b/.agents/skills/gstack-connect-chrome/SKILL.md index f19989231..03cf40864 100644 --- a/.agents/skills/gstack-connect-chrome/SKILL.md +++ b/.agents/skills/gstack-connect-chrome/SKILL.md @@ -1,11 +1,17 @@ --- name: connect-chrome +version: 0.1.0 description: | Launch real Chrome controlled by gstack with the Side Panel extension auto-loaded. One command: connects Claude to a visible Chrome window where you can watch every action in real time. The extension shows a live activity feed in the Side Panel. Use when asked to "connect chrome", "open chrome", "real browser", "launch chrome", "side panel", or "control my browser". +allowed-tools: + - Bash + - Read + - AskUserQuestion + --- diff --git a/.agents/skills/gstack-cso/agents/openai.yaml b/.agents/skills/gstack-cso/agents/openai.yaml index dd5e7bde8..69d5263e5 100644 --- a/.agents/skills/gstack-cso/agents/openai.yaml +++ b/.agents/skills/gstack-cso/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-cso" - short_description: "Chief Security Officer mode. Infrastructure-first security audit: secrets archaeology, dependency supply chain,..." + short_description: "" default_prompt: "Use gstack-cso for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-design-consultation/agents/openai.yaml b/.agents/skills/gstack-design-consultation/agents/openai.yaml index 3af30a8a2..af892c498 100644 --- a/.agents/skills/gstack-design-consultation/agents/openai.yaml +++ b/.agents/skills/gstack-design-consultation/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-design-consultation" - short_description: "Design consultation: understands your product, researches the landscape, proposes a complete design system..." + short_description: "" default_prompt: "Use gstack-design-consultation for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-design-review/agents/openai.yaml b/.agents/skills/gstack-design-review/agents/openai.yaml index 473554d34..98f30c6df 100644 --- a/.agents/skills/gstack-design-review/agents/openai.yaml +++ b/.agents/skills/gstack-design-review/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-design-review" - short_description: "Designer's eye QA: finds visual inconsistency, spacing issues, hierarchy problems, AI slop patterns, and slow..." + short_description: "" default_prompt: "Use gstack-design-review for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-document-release/agents/openai.yaml b/.agents/skills/gstack-document-release/agents/openai.yaml index 453bf5bd1..43e5b912f 100644 --- a/.agents/skills/gstack-document-release/agents/openai.yaml +++ b/.agents/skills/gstack-document-release/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-document-release" - short_description: "Post-ship documentation update. Reads all project docs, cross-references the diff, updates..." + short_description: "" default_prompt: "Use gstack-document-release for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-freeze/agents/openai.yaml b/.agents/skills/gstack-freeze/agents/openai.yaml index 0b643f68a..fa86feebf 100644 --- a/.agents/skills/gstack-freeze/agents/openai.yaml +++ b/.agents/skills/gstack-freeze/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-freeze" - short_description: "Restrict file edits to a specific directory for the session. Blocks Edit and Write outside the allowed path. Use..." + short_description: "" default_prompt: "Use gstack-freeze for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-guard/agents/openai.yaml b/.agents/skills/gstack-guard/agents/openai.yaml index c7fe7902e..9736ddfd0 100644 --- a/.agents/skills/gstack-guard/agents/openai.yaml +++ b/.agents/skills/gstack-guard/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-guard" - short_description: "Full safety mode: destructive command warnings + directory-scoped edits. Combines /careful (warns before rm -rf,..." + short_description: "" default_prompt: "Use gstack-guard for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-investigate/agents/openai.yaml b/.agents/skills/gstack-investigate/agents/openai.yaml index 3c778414f..ce13e6241 100644 --- a/.agents/skills/gstack-investigate/agents/openai.yaml +++ b/.agents/skills/gstack-investigate/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-investigate" - short_description: "Systematic debugging with root cause investigation. Four phases: investigate, analyze, hypothesize, implement. Iron..." + short_description: "" default_prompt: "Use gstack-investigate for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-land-and-deploy/agents/openai.yaml b/.agents/skills/gstack-land-and-deploy/agents/openai.yaml index 73a9d7069..025accde5 100644 --- a/.agents/skills/gstack-land-and-deploy/agents/openai.yaml +++ b/.agents/skills/gstack-land-and-deploy/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-land-and-deploy" - short_description: "Land and deploy workflow. Merges the PR, waits for CI and deploy, verifies production health via canary checks...." + short_description: "" default_prompt: "Use gstack-land-and-deploy for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-office-hours/agents/openai.yaml b/.agents/skills/gstack-office-hours/agents/openai.yaml index 51ac282dd..3ce30970d 100644 --- a/.agents/skills/gstack-office-hours/agents/openai.yaml +++ b/.agents/skills/gstack-office-hours/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-office-hours" - short_description: "YC Office Hours — two modes. Startup mode: six forcing questions that expose demand reality, status quo, desperate..." + short_description: "" default_prompt: "Use gstack-office-hours for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-plan-ceo-review/agents/openai.yaml b/.agents/skills/gstack-plan-ceo-review/agents/openai.yaml index 6927e353f..621607b07 100644 --- a/.agents/skills/gstack-plan-ceo-review/agents/openai.yaml +++ b/.agents/skills/gstack-plan-ceo-review/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-plan-ceo-review" - short_description: "CEO/founder-mode plan review. Rethink the problem, find the 10-star product, challenge premises, expand scope when..." + short_description: "" default_prompt: "Use gstack-plan-ceo-review for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-plan-design-review/agents/openai.yaml b/.agents/skills/gstack-plan-design-review/agents/openai.yaml index d39482125..4fd1d487c 100644 --- a/.agents/skills/gstack-plan-design-review/agents/openai.yaml +++ b/.agents/skills/gstack-plan-design-review/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-plan-design-review" - short_description: "Designer's eye plan review — interactive, like CEO and Eng review. Rates each design dimension 0-10, explains what..." + short_description: "" default_prompt: "Use gstack-plan-design-review for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-plan-eng-review/agents/openai.yaml b/.agents/skills/gstack-plan-eng-review/agents/openai.yaml index 96eefa75a..de62c81ba 100644 --- a/.agents/skills/gstack-plan-eng-review/agents/openai.yaml +++ b/.agents/skills/gstack-plan-eng-review/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-plan-eng-review" - short_description: "Eng manager-mode plan review. Lock in the execution plan — architecture, data flow, diagrams, edge cases, test..." + short_description: "" default_prompt: "Use gstack-plan-eng-review for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-qa-only/agents/openai.yaml b/.agents/skills/gstack-qa-only/agents/openai.yaml index afbd1ee34..b4b3c2542 100644 --- a/.agents/skills/gstack-qa-only/agents/openai.yaml +++ b/.agents/skills/gstack-qa-only/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-qa-only" - short_description: "Report-only QA testing. Systematically tests a web application and produces a structured report with health score,..." + short_description: "" default_prompt: "Use gstack-qa-only for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-qa/agents/openai.yaml b/.agents/skills/gstack-qa/agents/openai.yaml index 6d940241d..16fdbb7f9 100644 --- a/.agents/skills/gstack-qa/agents/openai.yaml +++ b/.agents/skills/gstack-qa/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-qa" - short_description: "Systematically QA test a web application and fix bugs found. Runs QA testing, then iteratively fixes bugs in source..." + short_description: "" default_prompt: "Use gstack-qa for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-retro/agents/openai.yaml b/.agents/skills/gstack-retro/agents/openai.yaml index dbf45f2d9..80e286e23 100644 --- a/.agents/skills/gstack-retro/agents/openai.yaml +++ b/.agents/skills/gstack-retro/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-retro" - short_description: "Weekly engineering retrospective. Analyzes commit history, work patterns, and code quality metrics with persistent..." + short_description: "" default_prompt: "Use gstack-retro for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-review/agents/openai.yaml b/.agents/skills/gstack-review/agents/openai.yaml index ba44751c5..1fa3a63ba 100644 --- a/.agents/skills/gstack-review/agents/openai.yaml +++ b/.agents/skills/gstack-review/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-review" - short_description: "Pre-landing PR review. Analyzes diff against the base branch for SQL safety, LLM trust boundary violations,..." + short_description: "" default_prompt: "Use gstack-review for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-setup-browser-cookies/agents/openai.yaml b/.agents/skills/gstack-setup-browser-cookies/agents/openai.yaml index 9f51dcbfb..5734edf0e 100644 --- a/.agents/skills/gstack-setup-browser-cookies/agents/openai.yaml +++ b/.agents/skills/gstack-setup-browser-cookies/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-setup-browser-cookies" - short_description: "Import cookies from your real Chromium browser into the headless browse session. Opens an interactive picker UI..." + short_description: "" default_prompt: "Use gstack-setup-browser-cookies for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-setup-deploy/agents/openai.yaml b/.agents/skills/gstack-setup-deploy/agents/openai.yaml index b666712ef..bc4ef80c5 100644 --- a/.agents/skills/gstack-setup-deploy/agents/openai.yaml +++ b/.agents/skills/gstack-setup-deploy/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-setup-deploy" - short_description: "Configure deployment settings for /land-and-deploy. Detects your deploy platform (Fly.io, Render, Vercel, Netlify,..." + short_description: "" default_prompt: "Use gstack-setup-deploy for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-ship/agents/openai.yaml b/.agents/skills/gstack-ship/agents/openai.yaml index 537ab1558..07630dde8 100644 --- a/.agents/skills/gstack-ship/agents/openai.yaml +++ b/.agents/skills/gstack-ship/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-ship" - short_description: "Ship workflow: detect + merge base branch, run tests, review diff, bump VERSION, update CHANGELOG, commit, push,..." + short_description: "" default_prompt: "Use gstack-ship for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-unfreeze/agents/openai.yaml b/.agents/skills/gstack-unfreeze/agents/openai.yaml index 93de8da67..627969872 100644 --- a/.agents/skills/gstack-unfreeze/agents/openai.yaml +++ b/.agents/skills/gstack-unfreeze/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-unfreeze" - short_description: "Clear the freeze boundary set by /freeze, allowing edits to all directories again. Use when you want to widen edit..." + short_description: "" default_prompt: "Use gstack-unfreeze for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack-upgrade/agents/openai.yaml b/.agents/skills/gstack-upgrade/agents/openai.yaml index ca055a017..c3007f7a6 100644 --- a/.agents/skills/gstack-upgrade/agents/openai.yaml +++ b/.agents/skills/gstack-upgrade/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack-upgrade" - short_description: "Upgrade gstack to the latest version. Detects global vs vendored install, runs the upgrade, and shows what's new...." + short_description: "" default_prompt: "Use gstack-upgrade for this task." policy: allow_implicit_invocation: true diff --git a/.agents/skills/gstack/agents/openai.yaml b/.agents/skills/gstack/agents/openai.yaml index fe13e8ed7..373b5086e 100644 --- a/.agents/skills/gstack/agents/openai.yaml +++ b/.agents/skills/gstack/agents/openai.yaml @@ -1,6 +1,6 @@ interface: display_name: "gstack" - short_description: "Fast headless browser for QA testing and site dogfooding. Navigate pages, interact with elements, verify state, diff..." + short_description: "" default_prompt: "Use gstack for this task." policy: allow_implicit_invocation: true diff --git a/SKILL.md b/SKILL.md index a98ce915d..57044c2c3 100644 --- a/SKILL.md +++ b/SKILL.md @@ -44,7 +44,7 @@ _SESSION_ID="$$-$(date +%s)" echo "TELEMETRY: ${_TEL:-off}" echo "TEL_PROMPTED: $_TEL_PROMPTED" mkdir -p ~/.gstack/analytics -echo '{"skill":"gstack","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true +echo '{"skill":"gstack-contrib","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename "$(git rev-parse --show-toplevel 2>/dev/null)" 2>/dev/null || echo "unknown")'"}' >> ~/.gstack/analytics/skill-usage.jsonl 2>/dev/null || true # zsh-compatible: use find instead of glob to avoid NOMATCH error for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` diff --git a/browse/test/fixtures/test-cookies-linux.db b/browse/test/fixtures/test-cookies-linux.db new file mode 100644 index 000000000..34e92702d Binary files /dev/null and b/browse/test/fixtures/test-cookies-linux.db differ diff --git a/browse/test/fixtures/test-cookies.db b/browse/test/fixtures/test-cookies.db new file mode 100644 index 000000000..06d9743df Binary files /dev/null and b/browse/test/fixtures/test-cookies.db differ diff --git a/design-consultation/SKILL.md b/design-consultation/SKILL.md index 32394b37d..6de9af3fe 100644 --- a/design-consultation/SKILL.md +++ b/design-consultation/SKILL.md @@ -463,14 +463,25 @@ Use AskUserQuestion: If user chooses B, skip this step and continue. -**Check Codex availability:** +**Detect available second-opinion provider:** ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" ``` -**If Codex is available**, launch both voices simultaneously: +**If a provider is available**, launch both voices simultaneously: + +1. **Cross-model design voice** (via Bash): -1. **Codex design voice** (via Bash): +**If Codex:** ```bash TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } @@ -484,6 +495,21 @@ codex exec "Given this product context, propose a complete design direction: Be opinionated. Be specific. Do not hedge. This is YOUR design direction — own it." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_DESIGN" ``` + +**If Gemini:** +```bash +TMPERR_DESIGN=$(mktemp /tmp/gemini-design-XXXXXXXX) +gemini --prompt "Given this product context, propose a complete design direction: +- Visual thesis: one sentence describing mood, material, and energy +- Typography: specific font names (not defaults — no Inter/Roboto/Arial/system) + hex colors +- Color system: CSS variables for background, surface, primary text, muted text, accent +- Layout: composition-first, not component-first. First viewport as poster, not document +- Differentiation: 2 deliberate departures from category norms +- Anti-slop: no purple gradients, no 3-column icon grids, no centered everything, no decorative blobs + +Be opinionated. Be specific. Do not hedge. This is YOUR design direction — own it." 2>"$TMPERR_DESIGN" +``` + Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: ```bash cat "$TMPERR_DESIGN" && rm -f "$TMPERR_DESIGN" @@ -499,13 +525,13 @@ Dispatch a subagent with this prompt: Be bold. Be specific. No hedging." **Error handling (all non-blocking):** -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run `codex login` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response." -- On any Codex error: proceed with Claude subagent output only, tagged `[single-model]`. +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "[Provider] authentication failed. Run \`codex login\` or \`gemini\` to authenticate." +- **Timeout:** "[Provider] timed out after 5 minutes." +- **Empty response:** "[Provider] returned no response." +- On any provider error: proceed with Claude subagent output only, tagged `[single-model]`. - If Claude subagent also fails: "Outside voices unavailable — continuing with primary review." -Present Codex output under a `CODEX SAYS (design direction):` header. +Present cross-model output under a `{PROVIDER} SAYS (design direction):` header (substituting Codex or Gemini). Present subagent output under a `CLAUDE SUBAGENT (design direction):` header. **Synthesis:** Claude main references both Codex and subagent proposals in the Phase 3 proposal. Present: @@ -517,7 +543,7 @@ Present subagent output under a `CLAUDE SUBAGENT (design direction):` header. ```bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"design-outside-voices","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","commit":"'"$(git rev-parse --short HEAD)"'"}' ``` -Replace STATUS with "clean" or "issues_found", SOURCE with "codex+subagent", "codex-only", "subagent-only", or "unavailable". +Replace STATUS with "clean" or "issues_found", SOURCE with "codex+subagent", "gemini+subagent", "codex-only", "gemini-only", "subagent-only", or "unavailable". ## Phase 3: The Complete Proposal diff --git a/design-review/SKILL.md b/design-review/SKILL.md index 55674c3b3..1fdc1509e 100644 --- a/design-review/SKILL.md +++ b/design-review/SKILL.md @@ -988,14 +988,25 @@ Record baseline design score and AI slop score at end of Phase 6. **Automatic:** Outside voices run automatically when Codex is available. No opt-in needed. -**Check Codex availability:** +**Detect available second-opinion provider:** ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" ``` -**If Codex is available**, launch both voices simultaneously: +**If a provider is available**, launch both voices simultaneously: + +1. **Cross-model design voice** (via Bash): -1. **Codex design voice** (via Bash): +**If Codex:** ```bash TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } @@ -1030,6 +1041,42 @@ HARD REJECTION — flag if ANY apply: Be specific. Reference file:line for every finding." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DESIGN" ``` + +**If Gemini:** +```bash +TMPERR_DESIGN=$(mktemp /tmp/gemini-design-XXXXXXXX) +gemini --prompt "Review the frontend source code in this repo. Evaluate against these design hard rules: +- Spacing: systematic (design tokens / CSS variables) or magic numbers? +- Typography: expressive purposeful fonts or default stacks? +- Color: CSS variables with defined system, or hardcoded hex scattered? +- Responsive: breakpoints defined? calc(100svh - header) for heroes? Mobile tested? +- A11y: ARIA landmarks, alt text, contrast ratios, 44px touch targets? +- Motion: 2-3 intentional animations, or zero / ornamental only? +- Cards: used only when card IS the interaction? No decorative card grids? + +First classify as MARKETING/LANDING PAGE vs APP UI vs HYBRID, then apply matching rules. + +LITMUS CHECKS — answer YES/NO: +1. Brand/product unmistakable in first screen? +2. One strong visual anchor present? +3. Page understandable by scanning headlines only? +4. Each section has one job? +5. Are cards actually necessary? +6. Does motion improve hierarchy or atmosphere? +7. Would design feel premium with all decorative shadows removed? + +HARD REJECTION — flag if ANY apply: +1. Generic SaaS card grid as first impression +2. Beautiful image with weak brand +3. Strong headline with no clear action +4. Busy imagery behind text +5. Sections repeating same mood statement +6. Carousel with no narrative purpose +7. App UI made of stacked cards instead of layout + +Be specific. Reference file:line for every finding." 2>"$TMPERR_DESIGN" +``` + Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: ```bash cat "$TMPERR_DESIGN" && rm -f "$TMPERR_DESIGN" @@ -1046,13 +1093,13 @@ Dispatch a subagent with this prompt: For each finding: what's wrong, severity (critical/high/medium), and the file:line." **Error handling (all non-blocking):** -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run `codex login` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response." -- On any Codex error: proceed with Claude subagent output only, tagged `[single-model]`. +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "[Provider] authentication failed. Run \`codex login\` or \`gemini\` to authenticate." +- **Timeout:** "[Provider] timed out after 5 minutes." +- **Empty response:** "[Provider] returned no response." +- On any provider error: proceed with Claude subagent output only, tagged `[single-model]`. - If Claude subagent also fails: "Outside voices unavailable — continuing with primary review." -Present Codex output under a `CODEX SAYS (design source audit):` header. +Present cross-model output under a `{PROVIDER} SAYS (design source audit):` header (substituting Codex or Gemini). Present subagent output under a `CLAUDE SUBAGENT (design consistency):` header. **Synthesis — Litmus scorecard:** @@ -1064,7 +1111,7 @@ Merge findings into the triage with `[codex]` / `[subagent]` / `[cross-model]` t ```bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"design-outside-voices","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","commit":"'"$(git rev-parse --short HEAD)"'"}' ``` -Replace STATUS with "clean" or "issues_found", SOURCE with "codex+subagent", "codex-only", "subagent-only", or "unavailable". +Replace STATUS with "clean" or "issues_found", SOURCE with "codex+subagent", "gemini+subagent", "codex-only", "gemini-only", "subagent-only", or "unavailable". ## Phase 7: Triage diff --git a/office-hours/SKILL.md b/office-hours/SKILL.md index 345f4c007..b5723ce9b 100644 --- a/office-hours/SKILL.md +++ b/office-hours/SKILL.md @@ -674,13 +674,22 @@ Use AskUserQuestion to confirm. If the user disagrees with a premise, revise und ## Phase 3.5: Cross-Model Second Opinion (optional) -**Binary check first:** +**Detect available second-opinion provider:** ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" ``` -Use AskUserQuestion (regardless of codex availability): +Use AskUserQuestion (regardless of provider availability): > Want a second opinion from an independent AI perspective? It will review your problem statement, key answers, premises, and any landscape findings from this session without having seen this conversation — it gets a structured summary. Usually takes 2-5 minutes. > A) Yes, get a second opinion @@ -688,7 +697,7 @@ Use AskUserQuestion (regardless of codex availability): If B: skip Phase 3.5 entirely. Remember that the second opinion did NOT run (affects design doc, founder signals, and Phase 4 below). -**If A: Run the Codex cold read.** +**If A: Run the cross-model cold read.** 1. Assemble a structured context block from Phases 1-3: - Mode (Startup or Builder) @@ -701,7 +710,7 @@ If B: skip Phase 3.5 entirely. Remember that the second opinion did NOT run (aff 2. **Write the assembled prompt to a temp file** (prevents shell injection from user-derived content): ```bash -CODEX_PROMPT_FILE=$(mktemp /tmp/gstack-codex-oh-XXXXXXXX.txt) +SECOND_OPINION_PROMPT_FILE=$(mktemp /tmp/gstack-secondop-XXXXXXXX.txt) ``` Write the full prompt (context block + instructions) to this file. Use the mode-appropriate variant: @@ -710,28 +719,37 @@ Write the full prompt (context block + instructions) to this file. Use the mode- **Builder mode instructions:** "You are an independent technical advisor reading a transcript of a builder brainstorming session. [CONTEXT BLOCK HERE]. Your job: 1) What is the COOLEST version of this they haven't considered? 2) What's the ONE thing from their answers that reveals what excites them most? Quote it. 3) What existing open source project or tool gets them 50% of the way there — and what's the 50% they'd need to build? 4) If you had a weekend to build this, what would you build first? Be specific. Be direct. No preamble." -3. Run Codex: +3. **Run the second-opinion provider:** + +**If Codex is the provider:** ```bash TMPERR_OH=$(mktemp /tmp/codex-oh-err-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "$(cat "$CODEX_PROMPT_FILE")" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_OH" +codex exec "$(cat "$SECOND_OPINION_PROMPT_FILE")" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_OH" +``` + +**If Gemini is the provider:** + +```bash +TMPERR_OH=$(mktemp /tmp/gemini-oh-err-XXXXXXXX) +gemini --prompt "$(cat "$SECOND_OPINION_PROMPT_FILE")" 2>"$TMPERR_OH" ``` Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: ```bash cat "$TMPERR_OH" -rm -f "$TMPERR_OH" "$CODEX_PROMPT_FILE" +rm -f "$TMPERR_OH" "$SECOND_OPINION_PROMPT_FILE" ``` **Error handling:** All errors are non-blocking — second opinion is a quality enhancement, not a prerequisite. -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \`codex login\` to authenticate." Fall back to Claude subagent. -- **Timeout:** "Codex timed out after 5 minutes." Fall back to Claude subagent. -- **Empty response:** "Codex returned no response." Fall back to Claude subagent. +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "[Provider] authentication failed. Run \`codex login\` or \`gemini\` to authenticate." Fall back to Claude subagent. +- **Timeout:** "[Provider] timed out after 5 minutes." Fall back to Claude subagent. +- **Empty response:** "[Provider] returned no response." Fall back to Claude subagent. -On any Codex error, fall back to the Claude subagent below. +On any provider error, fall back to the Claude subagent below. -**If CODEX_NOT_AVAILABLE (or Codex errored):** +**If no provider available (or provider errored):** Dispatch via the Agent tool. The subagent has fresh context — genuine independence. @@ -743,19 +761,11 @@ If the subagent fails or times out: "Second opinion unavailable. Continuing to P 4. **Presentation:** -If Codex ran: +Use the provider name in the header: ``` -SECOND OPINION (Codex): +SECOND OPINION ({Provider name — Codex, Gemini, or Claude subagent}): ════════════════════════════════════════════════════════════ - -════════════════════════════════════════════════════════════ -``` - -If Claude subagent ran: -``` -SECOND OPINION (Claude subagent): -════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ ``` @@ -764,10 +774,10 @@ SECOND OPINION (Claude subagent): - Where Claude disagrees and why - Whether the challenged premise changes Claude's recommendation -6. **Premise revision check:** If Codex challenged an agreed premise, use AskUserQuestion: +6. **Premise revision check:** If the second opinion challenged an agreed premise, use AskUserQuestion: -> Codex challenged premise #{N}: "{premise text}". Their argument: "{reasoning}". -> A) Revise this premise based on Codex's input +> [Provider] challenged premise #{N}: "{premise text}". Their argument: "{reasoning}". +> A) Revise this premise based on the outside input > B) Keep the original premise — proceed to alternatives If A: revise the premise and note the revision. If B: proceed (and note that the user defended this premise with reasoning — this is a founder signal if they articulate WHY they disagree, not just dismiss). @@ -871,29 +881,47 @@ The screenshot file at `/tmp/gstack-sketch.png` can be referenced by downstream After the wireframe is approved, offer outside design perspectives: ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" ``` -If Codex is available, use AskUserQuestion: -> "Want outside design perspectives on the chosen approach? Codex proposes a visual thesis, content plan, and interaction ideas. A Claude subagent proposes an alternative aesthetic direction." +If a provider is available, use AskUserQuestion: +> "Want outside design perspectives on the chosen approach? A cross-model voice proposes a visual thesis, content plan, and interaction ideas. A Claude subagent proposes an alternative aesthetic direction." > > A) Yes — get outside design voices > B) No — proceed without If user chooses A, launch both voices simultaneously: -1. **Codex** (via Bash, `model_reasoning_effort="medium"`): +1. **Cross-model voice** (via Bash): + +**If Codex:** ```bash TMPERR_SKETCH=$(mktemp /tmp/codex-sketch-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH" ``` + +**If Gemini:** +```bash +TMPERR_SKETCH=$(mktemp /tmp/gemini-sketch-XXXXXXXX) +gemini --prompt "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." 2>"$TMPERR_SKETCH" +``` + Use a 5-minute timeout (`timeout: 300000`). After completion: `cat "$TMPERR_SKETCH" && rm -f "$TMPERR_SKETCH"` 2. **Claude subagent** (via Agent tool): "For this product approach, what design direction would you recommend? What aesthetic, typography, and interaction patterns fit? What would make this approach feel inevitable to the user? Be specific — font names, hex colors, spacing values." -Present Codex output under `CODEX SAYS (design sketch):` and subagent output under `CLAUDE SUBAGENT (design direction):`. +Present cross-model output under `{PROVIDER} SAYS (design sketch):` (substituting Codex or Gemini) and subagent output under `CLAUDE SUBAGENT (design direction):`. Error handling: all non-blocking. On failure, skip and continue. --- diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md index 604411582..a64183ee3 100644 --- a/plan-ceo-review/SKILL.md +++ b/plan-ceo-review/SKILL.md @@ -1048,10 +1048,19 @@ After all review sections are complete, offer an independent second opinion from different AI system. Two models agreeing on a plan is stronger signal than one model's thorough review. -**Check tool availability:** +**Detect available second-opinion provider:** ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" ``` Use AskUserQuestion: @@ -1090,7 +1099,7 @@ compliments. Just the problems. THE PLAN: " -**If CODEX_AVAILABLE:** +**If Codex is the provider:** ```bash TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX) @@ -1098,6 +1107,13 @@ _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_PV" ``` +**If Gemini is the provider:** + +```bash +TMPERR_PV=$(mktemp /tmp/gemini-planreview-XXXXXXXX) +gemini --prompt "" 2>"$TMPERR_PV" +``` + Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: ```bash cat "$TMPERR_PV" @@ -1106,20 +1122,22 @@ cat "$TMPERR_PV" Present the full output verbatim: ``` -CODEX SAYS (plan review — outside voice): +{PROVIDER} SAYS (plan review — outside voice): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ ``` +Substitute {PROVIDER} with "CODEX" or "GEMINI" based on which ran. + **Error handling:** All errors are non-blocking — the outside voice is informational. -- Auth failure (stderr contains "auth", "login", "unauthorized"): "Codex auth failed. Run \`codex login\` to authenticate." -- Timeout: "Codex timed out after 5 minutes." -- Empty response: "Codex returned no response." +- Auth failure (stderr contains "auth", "login", "unauthorized"): "[Provider] auth failed. Run \`codex login\` or \`gemini\` to authenticate." +- Timeout: "[Provider] timed out after 5 minutes." +- Empty response: "[Provider] returned no response." -On any Codex error, fall back to the Claude adversarial subagent. +On any provider error, fall back to the Claude adversarial subagent. -**If CODEX_NOT_AVAILABLE (or Codex errored):** +**If no provider available (or provider errored):** Dispatch via the Agent tool. The subagent has fresh context — genuine independence. @@ -1156,7 +1174,7 @@ If no tension points exist, note: "No cross-model tension — both reviewers agr ``` Substitute: STATUS = "clean" if no findings, "issues_found" if findings exist. -SOURCE = "codex" if Codex ran, "claude" if subagent ran. +SOURCE = "codex" if Codex ran, "gemini" if Gemini ran, "claude" if subagent ran. **Cleanup:** Run `rm -f "$TMPERR_PV"` after processing (if Codex was used). diff --git a/plan-design-review/SKILL.md b/plan-design-review/SKILL.md index 31389bbc4..4ea4806a0 100644 --- a/plan-design-review/SKILL.md +++ b/plan-design-review/SKILL.md @@ -479,14 +479,25 @@ Use AskUserQuestion: If user chooses B, skip this step and continue. -**Check Codex availability:** +**Detect available second-opinion provider:** ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" ``` -**If Codex is available**, launch both voices simultaneously: +**If a provider is available**, launch both voices simultaneously: -1. **Codex design voice** (via Bash): +1. **Cross-model design voice** (via Bash): + +**If Codex:** ```bash TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } @@ -517,6 +528,38 @@ HARD RULES — first classify as MARKETING/LANDING PAGE vs APP UI vs HYBRID, the For each finding: what's wrong, what will happen if it ships unresolved, and the specific fix. Be opinionated. No hedging." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DESIGN" ``` + +**If Gemini:** +```bash +TMPERR_DESIGN=$(mktemp /tmp/gemini-design-XXXXXXXX) +gemini --prompt "Read the plan file at [plan-file-path]. Evaluate this plan's UI/UX design against these criteria. + +HARD REJECTION — flag if ANY apply: +1. Generic SaaS card grid as first impression +2. Beautiful image with weak brand +3. Strong headline with no clear action +4. Busy imagery behind text +5. Sections repeating same mood statement +6. Carousel with no narrative purpose +7. App UI made of stacked cards instead of layout + +LITMUS CHECKS — answer YES or NO for each: +1. Brand/product unmistakable in first screen? +2. One strong visual anchor present? +3. Page understandable by scanning headlines only? +4. Each section has one job? +5. Are cards actually necessary? +6. Does motion improve hierarchy or atmosphere? +7. Would design feel premium with all decorative shadows removed? + +HARD RULES — first classify as MARKETING/LANDING PAGE vs APP UI vs HYBRID, then flag violations of the matching rule set: +- MARKETING: First viewport as one composition, brand-first hierarchy, full-bleed hero, 2-3 intentional motions, composition-first layout +- APP UI: Calm surface hierarchy, dense but readable, utility language, minimal chrome +- UNIVERSAL: CSS variables for colors, no default font stacks, one job per section, cards earn existence + +For each finding: what's wrong, what will happen if it ships unresolved, and the specific fix. Be opinionated. No hedging." 2>"$TMPERR_DESIGN" +``` + Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: ```bash cat "$TMPERR_DESIGN" && rm -f "$TMPERR_DESIGN" @@ -535,13 +578,13 @@ Dispatch a subagent with this prompt: For each finding: what's wrong, severity (critical/high/medium), and the fix." **Error handling (all non-blocking):** -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run `codex login` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response." -- On any Codex error: proceed with Claude subagent output only, tagged `[single-model]`. +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "[Provider] authentication failed. Run \`codex login\` or \`gemini\` to authenticate." +- **Timeout:** "[Provider] timed out after 5 minutes." +- **Empty response:** "[Provider] returned no response." +- On any provider error: proceed with Claude subagent output only, tagged `[single-model]`. - If Claude subagent also fails: "Outside voices unavailable — continuing with primary review." -Present Codex output under a `CODEX SAYS (design critique):` header. +Present cross-model output under a `{PROVIDER} SAYS (design critique):` header (substituting Codex or Gemini). Present subagent output under a `CLAUDE SUBAGENT (design completeness):` header. **Synthesis — Litmus scorecard:** @@ -575,7 +618,7 @@ Fill in each cell from the Codex and subagent outputs. CONFIRMED = both agree. D ```bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"design-outside-voices","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","commit":"'"$(git rev-parse --short HEAD)"'"}' ``` -Replace STATUS with "clean" or "issues_found", SOURCE with "codex+subagent", "codex-only", "subagent-only", or "unavailable". +Replace STATUS with "clean" or "issues_found", SOURCE with "codex+subagent", "gemini+subagent", "codex-only", "gemini-only", "subagent-only", or "unavailable". ## The 0-10 Rating Method diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md index e9997d842..9895cb07b 100644 --- a/plan-eng-review/SKILL.md +++ b/plan-eng-review/SKILL.md @@ -706,10 +706,19 @@ After all review sections are complete, offer an independent second opinion from different AI system. Two models agreeing on a plan is stronger signal than one model's thorough review. -**Check tool availability:** +**Detect available second-opinion provider:** ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" ``` Use AskUserQuestion: @@ -748,7 +757,7 @@ compliments. Just the problems. THE PLAN: " -**If CODEX_AVAILABLE:** +**If Codex is the provider:** ```bash TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX) @@ -756,6 +765,13 @@ _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_PV" ``` +**If Gemini is the provider:** + +```bash +TMPERR_PV=$(mktemp /tmp/gemini-planreview-XXXXXXXX) +gemini --prompt "" 2>"$TMPERR_PV" +``` + Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: ```bash cat "$TMPERR_PV" @@ -764,20 +780,22 @@ cat "$TMPERR_PV" Present the full output verbatim: ``` -CODEX SAYS (plan review — outside voice): +{PROVIDER} SAYS (plan review — outside voice): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ ``` +Substitute {PROVIDER} with "CODEX" or "GEMINI" based on which ran. + **Error handling:** All errors are non-blocking — the outside voice is informational. -- Auth failure (stderr contains "auth", "login", "unauthorized"): "Codex auth failed. Run \`codex login\` to authenticate." -- Timeout: "Codex timed out after 5 minutes." -- Empty response: "Codex returned no response." +- Auth failure (stderr contains "auth", "login", "unauthorized"): "[Provider] auth failed. Run \`codex login\` or \`gemini\` to authenticate." +- Timeout: "[Provider] timed out after 5 minutes." +- Empty response: "[Provider] returned no response." -On any Codex error, fall back to the Claude adversarial subagent. +On any provider error, fall back to the Claude adversarial subagent. -**If CODEX_NOT_AVAILABLE (or Codex errored):** +**If no provider available (or provider errored):** Dispatch via the Agent tool. The subagent has fresh context — genuine independence. @@ -814,7 +832,7 @@ If no tension points exist, note: "No cross-model tension — both reviewers agr ``` Substitute: STATUS = "clean" if no findings, "issues_found" if findings exist. -SOURCE = "codex" if Codex ran, "claude" if subagent ran. +SOURCE = "codex" if Codex ran, "gemini" if Gemini ran, "claude" if subagent ran. **Cleanup:** Run `rm -f "$TMPERR_PV"` after processing (if Codex was used). diff --git a/review/SKILL.md b/review/SKILL.md index b06e38e25..1cb77d8c5 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -612,20 +612,37 @@ source <(~/.claude/skills/gstack/bin/gstack-diff-scope 2>/dev/null) Substitute: TIMESTAMP = ISO 8601 datetime, STATUS = "clean" if 0 findings or "issues_found", N = total findings, M = auto-fixed count, COMMIT = output of `git rev-parse --short HEAD`. -7. **Codex design voice** (optional, automatic if available): +7. **Cross-model design voice** (optional, automatic if available): ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" ``` -If Codex is available, run a lightweight design check on the diff: +If a provider is available, run a lightweight design check on the diff: +**If Codex:** ```bash TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): 1. Brand/product unmistakable in first screen? 2. One strong visual anchor present? 3. Page understandable by scanning headlines only? 4. Each section has one job? 5. Are cards actually necessary? 6. Does motion improve hierarchy or atmosphere? 7. Would design feel premium with all decorative shadows removed? Flag any hard rejections: 1. Generic SaaS card grid as first impression 2. Beautiful image with weak brand 3. Strong headline with no clear action 4. Busy imagery behind text 5. Sections repeating same mood statement 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout 5 most important design findings only. Reference file:line." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" ``` +**If Gemini:** +```bash +TMPERR_DRL=$(mktemp /tmp/gemini-drl-XXXXXXXX) +DIFF_CONTENT=$(git diff origin/) +gemini --prompt "Review this git diff for design issues. Run 7 litmus checks (YES/NO each): 1. Brand/product unmistakable in first screen? 2. One strong visual anchor present? 3. Page understandable by scanning headlines only? 4. Each section has one job? 5. Are cards actually necessary? 6. Does motion improve hierarchy or atmosphere? 7. Would design feel premium with all decorative shadows removed? Flag any hard rejections: 1. Generic SaaS card grid as first impression 2. Beautiful image with weak brand 3. Strong headline with no clear action 4. Busy imagery behind text 5. Sections repeating same mood statement 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout 5 most important design findings only. Reference file:line. THE DIFF: $DIFF_CONTENT" 2>"$TMPERR_DRL" +``` + Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: ```bash cat "$TMPERR_DRL" && rm -f "$TMPERR_DRL" @@ -633,7 +650,7 @@ cat "$TMPERR_DRL" && rm -f "$TMPERR_DRL" **Error handling:** All errors are non-blocking. On auth failure, timeout, or empty response — skip with a brief note and continue. -Present Codex output under a `CODEX (design):` header, merged with the checklist findings above. +Present output under a `{PROVIDER} (design):` header (substituting Codex or Gemini), merged with the checklist findings above. Include any design findings alongside the findings from Step 4. They follow the same Fix-First flow in Step 5 — AUTO-FIX for mechanical CSS fixes, ASK for everything else. @@ -954,7 +971,16 @@ Adversarial review thoroughness scales automatically based on diff size. No conf DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0") DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0") DIFF_TOTAL=$((DIFF_INS + DIFF_DEL)) -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" # Respect old opt-out OLD_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || true) echo "DIFF_SIZE: $DIFF_TOTAL" @@ -976,9 +1002,9 @@ If `OLD_CFG` is `disabled`: skip this step silently. Continue to the next step. Claude's structured review already ran. Now add a **cross-model adversarial challenge**. -**If Codex is available:** run the Codex adversarial challenge. **If Codex is NOT available:** fall back to the Claude adversarial subagent instead. +**If a second-opinion provider is available (Codex or Gemini):** run the adversarial challenge with that provider. **If neither is available:** fall back to the Claude adversarial subagent instead. -**Codex adversarial:** +**Adversarial challenge (Codex):** ```bash TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX) @@ -986,6 +1012,14 @@ _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_ADV" ``` +**Adversarial challenge (Gemini):** + +```bash +TMPERR_ADV=$(mktemp /tmp/gemini-adv-XXXXXXXX) +DIFF_CONTENT=$(git diff origin/) +gemini --prompt "Review this git diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems. THE DIFF: $DIFF_CONTENT" 2>"$TMPERR_ADV" +``` + Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. After the command completes, read stderr: ```bash cat "$TMPERR_ADV" @@ -994,11 +1028,11 @@ cat "$TMPERR_ADV" Present the full output verbatim. This is informational — it never blocks shipping. **Error handling:** All errors are non-blocking — adversarial review is a quality enhancement, not a prerequisite. -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \`codex login\` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response. Stderr: ." +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "[Provider] authentication failed. Run \`codex login\` or \`gemini\` to authenticate." +- **Timeout:** "[Provider] timed out after 5 minutes." +- **Empty response:** "[Provider] returned no response. Stderr: ." -On any Codex error, fall back to the Claude adversarial subagent automatically. +On any provider error, fall back to the Claude adversarial subagent automatically. **Claude adversarial subagent** (fallback when Codex unavailable or errored): @@ -1015,7 +1049,7 @@ If the subagent fails or times out: "Claude adversarial subagent unavailable. Co ```bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"medium","commit":"'"$(git rev-parse --short HEAD)"'"}' ``` -Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOURCE: "codex" if Codex ran, "claude" if subagent ran. If both failed, do NOT persist. +Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOURCE: "codex" if Codex ran, "gemini" if Gemini ran, "claude" if subagent ran. If all failed, do NOT persist. **Cleanup:** Run `rm -f "$TMPERR_ADV"` after processing (if Codex was used). @@ -1025,7 +1059,9 @@ Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOU Claude's structured review already ran. Now run **all three remaining passes** for maximum coverage: -**1. Codex structured review (if available):** +**1. Cross-model structured review (if Codex or Gemini available):** + +**If Codex:** ```bash TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } @@ -1033,6 +1069,13 @@ cd "$_REPO_ROOT" codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" ``` +**If Gemini (no native `review` subcommand — construct equivalent):** +```bash +TMPERR=$(mktemp /tmp/gemini-review-XXXXXXXX) +DIFF_CONTENT=$(git diff origin/) +gemini --prompt "Review this git diff for a pre-landing code review. For each issue found, classify as [P1] (must fix before merge — security, data loss, correctness) or [P2] (should fix — performance, maintainability, edge cases). Format: [P1] or [P2] tag, file:line, problem, suggested fix. THE DIFF: $DIFF_CONTENT" 2>"$TMPERR" +``` + Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `CODEX SAYS (code review):` header. Check for `[P1]` markers: found → `GATE: FAIL`, not found → `GATE: PASS`. @@ -1054,13 +1097,13 @@ After stderr: `rm -f "$TMPERR"` **3. Codex adversarial challenge (if available):** Run `codex exec` with the adversarial prompt (same as medium tier). -If Codex is not available for steps 1 and 3, note to the user: "Codex CLI not found — large-diff review ran Claude structured + Claude adversarial (2 of 4 passes). Install Codex for full 4-pass coverage: `npm install -g @openai/codex`" +If no second-opinion provider is available for steps 1 and 3, note to the user: "No cross-model CLI found — large-diff review ran Claude structured + Claude adversarial (2 of 4 passes). Install Codex (`npm install -g @openai/codex`) or Gemini CLI (`npm install -g @google/gemini-cli`) for full 4-pass coverage." **Persist the review result AFTER all passes complete** (not after each sub-step): ```bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"large","gate":"GATE","commit":"'"$(git rev-parse --short HEAD)"'"}' ``` -Substitute: STATUS = "clean" if no findings across ALL passes, "issues_found" if any pass found issues. SOURCE = "both" if Codex ran, "claude" if only Claude subagent ran. GATE = the Codex structured review gate result ("pass"/"fail"), or "informational" if Codex was unavailable. If all passes failed, do NOT persist. +Substitute: STATUS = "clean" if no findings across ALL passes, "issues_found" if any pass found issues. SOURCE = "both" if a cross-model provider (Codex or Gemini) ran, "claude" if only Claude subagent ran. GATE = the structured review gate result ("pass"/"fail"), or "informational" if no provider was available. If all passes failed, do NOT persist. --- @@ -1074,8 +1117,8 @@ ADVERSARIAL REVIEW SYNTHESIS (auto: TIER, N lines): High confidence (found by multiple sources): [findings agreed on by >1 pass] Unique to Claude structured review: [from earlier step] Unique to Claude adversarial: [from subagent, if ran] - Unique to Codex: [from codex adversarial or code review, if ran] - Models used: Claude structured ✓ Claude adversarial ✓/✗ Codex ✓/✗ + Unique to cross-model provider: [from Codex/Gemini adversarial or code review, if ran] + Models used: Claude structured ✓ Claude adversarial ✓/✗ Cross-model (Codex/Gemini) ✓/✗ ════════════════════════════════════════════════════════════ ``` diff --git a/scripts/resolvers/design.ts b/scripts/resolvers/design.ts index a59f516ff..c771718a6 100644 --- a/scripts/resolvers/design.ts +++ b/scripts/resolvers/design.ts @@ -7,20 +7,37 @@ export function generateDesignReviewLite(ctx: TemplateContext): string { // Codex block only for Claude host const codexBlock = ctx.host === 'codex' ? '' : ` -7. **Codex design voice** (optional, automatic if available): +7. **Cross-model design voice** (optional, automatic if available): \`\`\`bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "\${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" \`\`\` -If Codex is available, run a lightweight design check on the diff: +If a provider is available, run a lightweight design check on the diff: +**If Codex:** \`\`\`bash TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): ${litmusList} Flag any hard rejections: ${rejectionList} 5 most important design findings only. Reference file:line." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" \`\`\` +**If Gemini:** +\`\`\`bash +TMPERR_DRL=$(mktemp /tmp/gemini-drl-XXXXXXXX) +DIFF_CONTENT=$(git diff origin/) +gemini --prompt "Review this git diff for design issues. Run 7 litmus checks (YES/NO each): ${litmusList} Flag any hard rejections: ${rejectionList} 5 most important design findings only. Reference file:line. THE DIFF: $DIFF_CONTENT" 2>"$TMPERR_DRL" +\`\`\` + Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr: \`\`\`bash cat "$TMPERR_DRL" && rm -f "$TMPERR_DRL" @@ -28,7 +45,7 @@ cat "$TMPERR_DRL" && rm -f "$TMPERR_DRL" **Error handling:** All errors are non-blocking. On auth failure, timeout, or empty response — skip with a brief note and continue. -Present Codex output under a \`CODEX (design):\` header, merged with the checklist findings above.`; +Present output under a \`{PROVIDER} (design):\` header (substituting Codex or Gemini), merged with the checklist findings above.`; return `## Design Review (conditional, diff-scoped) @@ -454,29 +471,47 @@ The screenshot file at \`/tmp/gstack-sketch.png\` can be referenced by downstrea After the wireframe is approved, offer outside design perspectives: \`\`\`bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "\${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" \`\`\` -If Codex is available, use AskUserQuestion: -> "Want outside design perspectives on the chosen approach? Codex proposes a visual thesis, content plan, and interaction ideas. A Claude subagent proposes an alternative aesthetic direction." +If a provider is available, use AskUserQuestion: +> "Want outside design perspectives on the chosen approach? A cross-model voice proposes a visual thesis, content plan, and interaction ideas. A Claude subagent proposes an alternative aesthetic direction." > > A) Yes — get outside design voices > B) No — proceed without If user chooses A, launch both voices simultaneously: -1. **Codex** (via Bash, \`model_reasoning_effort="medium"\`): +1. **Cross-model voice** (via Bash): + +**If Codex:** \`\`\`bash TMPERR_SKETCH=$(mktemp /tmp/codex-sketch-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH" \`\`\` + +**If Gemini:** +\`\`\`bash +TMPERR_SKETCH=$(mktemp /tmp/gemini-sketch-XXXXXXXX) +gemini --prompt "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." 2>"$TMPERR_SKETCH" +\`\`\` + Use a 5-minute timeout (\`timeout: 300000\`). After completion: \`cat "$TMPERR_SKETCH" && rm -f "$TMPERR_SKETCH"\` 2. **Claude subagent** (via Agent tool): "For this product approach, what design direction would you recommend? What aesthetic, typography, and interaction patterns fit? What would make this approach feel inevitable to the user? Be specific — font names, hex colors, spacing values." -Present Codex output under \`CODEX SAYS (design sketch):\` and subagent output under \`CLAUDE SUBAGENT (design direction):\`. +Present cross-model output under \`{PROVIDER} SAYS (design sketch):\` (substituting Codex or Gemini) and subagent output under \`CLAUDE SUBAGENT (design direction):\`. Error handling: all non-blocking. On failure, skip and continue.`; } @@ -628,19 +663,37 @@ Merge findings into the triage with \`[codex]\` / \`[subagent]\` / \`[cross-mode return `## Design Outside Voices (parallel) ${optInSection} -**Check Codex availability:** +**Detect available second-opinion provider:** \`\`\`bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "\${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" \`\`\` -**If Codex is available**, launch both voices simultaneously: +**If a provider is available**, launch both voices simultaneously: + +1. **Cross-model design voice** (via Bash): -1. **Codex design voice** (via Bash): +**If Codex:** \`\`\`bash TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "${escapedCodexPrompt}" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="${reasoningEffort}"' --enable web_search_cached 2>"$TMPERR_DESIGN" \`\`\` + +**If Gemini:** +\`\`\`bash +TMPERR_DESIGN=$(mktemp /tmp/gemini-design-XXXXXXXX) +gemini --prompt "${escapedCodexPrompt}" 2>"$TMPERR_DESIGN" +\`\`\` + Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr: \`\`\`bash cat "$TMPERR_DESIGN" && rm -f "$TMPERR_DESIGN" @@ -651,13 +704,13 @@ Dispatch a subagent with this prompt: "${subagentPrompt}" **Error handling (all non-blocking):** -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \`codex login\` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response." -- On any Codex error: proceed with Claude subagent output only, tagged \`[single-model]\`. +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "[Provider] authentication failed. Run \\\`codex login\\\` or \\\`gemini\\\` to authenticate." +- **Timeout:** "[Provider] timed out after 5 minutes." +- **Empty response:** "[Provider] returned no response." +- On any provider error: proceed with Claude subagent output only, tagged \`[single-model]\`. - If Claude subagent also fails: "Outside voices unavailable — continuing with primary review." -Present Codex output under a \`CODEX SAYS (design ${isPlanDesignReview ? 'critique' : isDesignReview ? 'source audit' : 'direction'}):\` header. +Present cross-model output under a \`{PROVIDER} SAYS (design ${isPlanDesignReview ? 'critique' : isDesignReview ? 'source audit' : 'direction'}):\` header (substituting Codex or Gemini). Present subagent output under a \`CLAUDE SUBAGENT (design ${isPlanDesignReview ? 'completeness' : isDesignReview ? 'consistency' : 'direction'}):\` header. ${synthesisSection} @@ -665,7 +718,7 @@ ${synthesisSection} \`\`\`bash ${ctx.paths.binDir}/gstack-review-log '{"skill":"design-outside-voices","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","commit":"'"$(git rev-parse --short HEAD)"'"}' \`\`\` -Replace STATUS with "clean" or "issues_found", SOURCE with "codex+subagent", "codex-only", "subagent-only", or "unavailable".`; +Replace STATUS with "clean" or "issues_found", SOURCE with "codex+subagent", "gemini+subagent", "codex-only", "gemini-only", "subagent-only", or "unavailable".`; } // ─── Design Hard Rules (OpenAI framework + gstack slop blacklist) ─── diff --git a/scripts/resolvers/review.ts b/scripts/resolvers/review.ts index 382a8ddf3..fe8581746 100644 --- a/scripts/resolvers/review.ts +++ b/scripts/resolvers/review.ts @@ -251,13 +251,22 @@ export function generateCodexSecondOpinion(ctx: TemplateContext): string { return `## Phase 3.5: Cross-Model Second Opinion (optional) -**Binary check first:** +**Detect available second-opinion provider:** \`\`\`bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "\${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" \`\`\` -Use AskUserQuestion (regardless of codex availability): +Use AskUserQuestion (regardless of provider availability): > Want a second opinion from an independent AI perspective? It will review your problem statement, key answers, premises, and any landscape findings from this session without having seen this conversation — it gets a structured summary. Usually takes 2-5 minutes. > A) Yes, get a second opinion @@ -265,7 +274,7 @@ Use AskUserQuestion (regardless of codex availability): If B: skip Phase 3.5 entirely. Remember that the second opinion did NOT run (affects design doc, founder signals, and Phase 4 below). -**If A: Run the Codex cold read.** +**If A: Run the cross-model cold read.** 1. Assemble a structured context block from Phases 1-3: - Mode (Startup or Builder) @@ -278,7 +287,7 @@ If B: skip Phase 3.5 entirely. Remember that the second opinion did NOT run (aff 2. **Write the assembled prompt to a temp file** (prevents shell injection from user-derived content): \`\`\`bash -CODEX_PROMPT_FILE=$(mktemp /tmp/gstack-codex-oh-XXXXXXXX.txt) +SECOND_OPINION_PROMPT_FILE=$(mktemp /tmp/gstack-secondop-XXXXXXXX.txt) \`\`\` Write the full prompt (context block + instructions) to this file. Use the mode-appropriate variant: @@ -287,28 +296,37 @@ Write the full prompt (context block + instructions) to this file. Use the mode- **Builder mode instructions:** "You are an independent technical advisor reading a transcript of a builder brainstorming session. [CONTEXT BLOCK HERE]. Your job: 1) What is the COOLEST version of this they haven't considered? 2) What's the ONE thing from their answers that reveals what excites them most? Quote it. 3) What existing open source project or tool gets them 50% of the way there — and what's the 50% they'd need to build? 4) If you had a weekend to build this, what would you build first? Be specific. Be direct. No preamble." -3. Run Codex: +3. **Run the second-opinion provider:** + +**If Codex is the provider:** \`\`\`bash TMPERR_OH=$(mktemp /tmp/codex-oh-err-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } -codex exec "$(cat "$CODEX_PROMPT_FILE")" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_OH" +codex exec "$(cat "$SECOND_OPINION_PROMPT_FILE")" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_OH" +\`\`\` + +**If Gemini is the provider:** + +\`\`\`bash +TMPERR_OH=$(mktemp /tmp/gemini-oh-err-XXXXXXXX) +gemini --prompt "$(cat "$SECOND_OPINION_PROMPT_FILE")" 2>"$TMPERR_OH" \`\`\` Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr: \`\`\`bash cat "$TMPERR_OH" -rm -f "$TMPERR_OH" "$CODEX_PROMPT_FILE" +rm -f "$TMPERR_OH" "$SECOND_OPINION_PROMPT_FILE" \`\`\` **Error handling:** All errors are non-blocking — second opinion is a quality enhancement, not a prerequisite. -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \\\`codex login\\\` to authenticate." Fall back to Claude subagent. -- **Timeout:** "Codex timed out after 5 minutes." Fall back to Claude subagent. -- **Empty response:** "Codex returned no response." Fall back to Claude subagent. +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "[Provider] authentication failed. Run \\\`codex login\\\` or \\\`gemini\\\` to authenticate." Fall back to Claude subagent. +- **Timeout:** "[Provider] timed out after 5 minutes." Fall back to Claude subagent. +- **Empty response:** "[Provider] returned no response." Fall back to Claude subagent. -On any Codex error, fall back to the Claude subagent below. +On any provider error, fall back to the Claude subagent below. -**If CODEX_NOT_AVAILABLE (or Codex errored):** +**If no provider available (or provider errored):** Dispatch via the Agent tool. The subagent has fresh context — genuine independence. @@ -320,19 +338,11 @@ If the subagent fails or times out: "Second opinion unavailable. Continuing to P 4. **Presentation:** -If Codex ran: -\`\`\` -SECOND OPINION (Codex): -════════════════════════════════════════════════════════════ - -════════════════════════════════════════════════════════════ -\`\`\` - -If Claude subagent ran: +Use the provider name in the header: \`\`\` -SECOND OPINION (Claude subagent): +SECOND OPINION ({Provider name — Codex, Gemini, or Claude subagent}): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ \`\`\` @@ -341,10 +351,10 @@ SECOND OPINION (Claude subagent): - Where Claude disagrees and why - Whether the challenged premise changes Claude's recommendation -6. **Premise revision check:** If Codex challenged an agreed premise, use AskUserQuestion: +6. **Premise revision check:** If the second opinion challenged an agreed premise, use AskUserQuestion: -> Codex challenged premise #{N}: "{premise text}". Their argument: "{reasoning}". -> A) Revise this premise based on Codex's input +> [Provider] challenged premise #{N}: "{premise text}". Their argument: "{reasoning}". +> A) Revise this premise based on the outside input > B) Keep the original premise — proceed to alternatives If A: revise the premise and note the revision. If B: proceed (and note that the user defended this premise with reasoning — this is a founder signal if they articulate WHY they disagree, not just dismiss).`; @@ -367,7 +377,16 @@ Adversarial review thoroughness scales automatically based on diff size. No conf DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0") DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0") DIFF_TOTAL=$((DIFF_INS + DIFF_DEL)) -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "\${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" # Respect old opt-out OLD_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || true) echo "DIFF_SIZE: $DIFF_TOTAL" @@ -389,9 +408,9 @@ If \`OLD_CFG\` is \`disabled\`: skip this step silently. Continue to the next st Claude's structured review already ran. Now add a **cross-model adversarial challenge**. -**If Codex is available:** run the Codex adversarial challenge. **If Codex is NOT available:** fall back to the Claude adversarial subagent instead. +**If a second-opinion provider is available (Codex or Gemini):** run the adversarial challenge with that provider. **If neither is available:** fall back to the Claude adversarial subagent instead. -**Codex adversarial:** +**Adversarial challenge (Codex):** \`\`\`bash TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX) @@ -399,6 +418,14 @@ _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_ADV" \`\`\` +**Adversarial challenge (Gemini):** + +\`\`\`bash +TMPERR_ADV=$(mktemp /tmp/gemini-adv-XXXXXXXX) +DIFF_CONTENT=$(git diff origin/) +gemini --prompt "Review this git diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems. THE DIFF: $DIFF_CONTENT" 2>"$TMPERR_ADV" +\`\`\` + Set the Bash tool's \`timeout\` parameter to \`300000\` (5 minutes). Do NOT use the \`timeout\` shell command — it doesn't exist on macOS. After the command completes, read stderr: \`\`\`bash cat "$TMPERR_ADV" @@ -407,11 +434,11 @@ cat "$TMPERR_ADV" Present the full output verbatim. This is informational — it never blocks shipping. **Error handling:** All errors are non-blocking — adversarial review is a quality enhancement, not a prerequisite. -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \\\`codex login\\\` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response. Stderr: ." +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "[Provider] authentication failed. Run \\\`codex login\\\` or \\\`gemini\\\` to authenticate." +- **Timeout:** "[Provider] timed out after 5 minutes." +- **Empty response:** "[Provider] returned no response. Stderr: ." -On any Codex error, fall back to the Claude adversarial subagent automatically. +On any provider error, fall back to the Claude adversarial subagent automatically. **Claude adversarial subagent** (fallback when Codex unavailable or errored): @@ -428,7 +455,7 @@ If the subagent fails or times out: "Claude adversarial subagent unavailable. Co \`\`\`bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"medium","commit":"'"$(git rev-parse --short HEAD)"'"}' \`\`\` -Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOURCE: "codex" if Codex ran, "claude" if subagent ran. If both failed, do NOT persist. +Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOURCE: "codex" if Codex ran, "gemini" if Gemini ran, "claude" if subagent ran. If all failed, do NOT persist. **Cleanup:** Run \`rm -f "$TMPERR_ADV"\` after processing (if Codex was used). @@ -438,7 +465,9 @@ Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOU Claude's structured review already ran. Now run **all three remaining passes** for maximum coverage: -**1. Codex structured review (if available):** +**1. Cross-model structured review (if Codex or Gemini available):** + +**If Codex:** \`\`\`bash TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } @@ -446,6 +475,13 @@ cd "$_REPO_ROOT" codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" \`\`\` +**If Gemini (no native \`review\` subcommand — construct equivalent):** +\`\`\`bash +TMPERR=$(mktemp /tmp/gemini-review-XXXXXXXX) +DIFF_CONTENT=$(git diff origin/) +gemini --prompt "Review this git diff for a pre-landing code review. For each issue found, classify as [P1] (must fix before merge — security, data loss, correctness) or [P2] (should fix — performance, maintainability, edge cases). Format: [P1] or [P2] tag, file:line, problem, suggested fix. THE DIFF: $DIFF_CONTENT" 2>"$TMPERR" +\`\`\` + Set the Bash tool's \`timeout\` parameter to \`300000\` (5 minutes). Do NOT use the \`timeout\` shell command — it doesn't exist on macOS. Present output under \`CODEX SAYS (code review):\` header. Check for \`[P1]\` markers: found → \`GATE: FAIL\`, not found → \`GATE: PASS\`. @@ -467,13 +503,13 @@ After stderr: \`rm -f "$TMPERR"\` **3. Codex adversarial challenge (if available):** Run \`codex exec\` with the adversarial prompt (same as medium tier). -If Codex is not available for steps 1 and 3, note to the user: "Codex CLI not found — large-diff review ran Claude structured + Claude adversarial (2 of 4 passes). Install Codex for full 4-pass coverage: \`npm install -g @openai/codex\`" +If no second-opinion provider is available for steps 1 and 3, note to the user: "No cross-model CLI found — large-diff review ran Claude structured + Claude adversarial (2 of 4 passes). Install Codex (\`npm install -g @openai/codex\`) or Gemini CLI (\`npm install -g @google/gemini-cli\`) for full 4-pass coverage." **Persist the review result AFTER all passes complete** (not after each sub-step): \`\`\`bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"large","gate":"GATE","commit":"'"$(git rev-parse --short HEAD)"'"}' \`\`\` -Substitute: STATUS = "clean" if no findings across ALL passes, "issues_found" if any pass found issues. SOURCE = "both" if Codex ran, "claude" if only Claude subagent ran. GATE = the Codex structured review gate result ("pass"/"fail"), or "informational" if Codex was unavailable. If all passes failed, do NOT persist. +Substitute: STATUS = "clean" if no findings across ALL passes, "issues_found" if any pass found issues. SOURCE = "both" if a cross-model provider (Codex or Gemini) ran, "claude" if only Claude subagent ran. GATE = the structured review gate result ("pass"/"fail"), or "informational" if no provider was available. If all passes failed, do NOT persist. --- @@ -487,8 +523,8 @@ ADVERSARIAL REVIEW SYNTHESIS (auto: TIER, N lines): High confidence (found by multiple sources): [findings agreed on by >1 pass] Unique to Claude structured review: [from earlier step] Unique to Claude adversarial: [from subagent, if ran] - Unique to Codex: [from codex adversarial or code review, if ran] - Models used: Claude structured ✓ Claude adversarial ✓/✗ Codex ✓/✗ + Unique to cross-model provider: [from Codex/Gemini adversarial or code review, if ran] + Models used: Claude structured ✓ Claude adversarial ✓/✗ Cross-model (Codex/Gemini) ✓/✗ ════════════════════════════════════════════════════════════ \`\`\` @@ -507,10 +543,19 @@ After all review sections are complete, offer an independent second opinion from different AI system. Two models agreeing on a plan is stronger signal than one model's thorough review. -**Check tool availability:** +**Detect available second-opinion provider:** \`\`\`bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "\${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" \`\`\` Use AskUserQuestion: @@ -549,7 +594,7 @@ compliments. Just the problems. THE PLAN: " -**If CODEX_AVAILABLE:** +**If Codex is the provider:** \`\`\`bash TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX) @@ -557,6 +602,13 @@ _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" codex exec "" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_PV" \`\`\` +**If Gemini is the provider:** + +\`\`\`bash +TMPERR_PV=$(mktemp /tmp/gemini-planreview-XXXXXXXX) +gemini --prompt "" 2>"$TMPERR_PV" +\`\`\` + Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr: \`\`\`bash cat "$TMPERR_PV" @@ -565,20 +617,22 @@ cat "$TMPERR_PV" Present the full output verbatim: \`\`\` -CODEX SAYS (plan review — outside voice): +{PROVIDER} SAYS (plan review — outside voice): ════════════════════════════════════════════════════════════ - + ════════════════════════════════════════════════════════════ \`\`\` +Substitute {PROVIDER} with "CODEX" or "GEMINI" based on which ran. + **Error handling:** All errors are non-blocking — the outside voice is informational. -- Auth failure (stderr contains "auth", "login", "unauthorized"): "Codex auth failed. Run \\\`codex login\\\` to authenticate." -- Timeout: "Codex timed out after 5 minutes." -- Empty response: "Codex returned no response." +- Auth failure (stderr contains "auth", "login", "unauthorized"): "[Provider] auth failed. Run \\\`codex login\\\` or \\\`gemini\\\` to authenticate." +- Timeout: "[Provider] timed out after 5 minutes." +- Empty response: "[Provider] returned no response." -On any Codex error, fall back to the Claude adversarial subagent. +On any provider error, fall back to the Claude adversarial subagent. -**If CODEX_NOT_AVAILABLE (or Codex errored):** +**If no provider available (or provider errored):** Dispatch via the Agent tool. The subagent has fresh context — genuine independence. @@ -615,7 +669,7 @@ If no tension points exist, note: "No cross-model tension — both reviewers agr \`\`\` Substitute: STATUS = "clean" if no findings, "issues_found" if findings exist. -SOURCE = "codex" if Codex ran, "claude" if subagent ran. +SOURCE = "codex" if Codex ran, "gemini" if Gemini ran, "claude" if subagent ran. **Cleanup:** Run \`rm -f "$TMPERR_PV"\` after processing (if Codex was used). diff --git a/ship/SKILL.md b/ship/SKILL.md index f3f2ec013..1e4f71bf2 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -1340,20 +1340,37 @@ source <(~/.claude/skills/gstack/bin/gstack-diff-scope 2>/dev/null) Substitute: TIMESTAMP = ISO 8601 datetime, STATUS = "clean" if 0 findings or "issues_found", N = total findings, M = auto-fixed count, COMMIT = output of `git rev-parse --short HEAD`. -7. **Codex design voice** (optional, automatic if available): +7. **Cross-model design voice** (optional, automatic if available): ```bash -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" ``` -If Codex is available, run a lightweight design check on the diff: +If a provider is available, run a lightweight design check on the diff: +**If Codex:** ```bash TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): 1. Brand/product unmistakable in first screen? 2. One strong visual anchor present? 3. Page understandable by scanning headlines only? 4. Each section has one job? 5. Are cards actually necessary? 6. Does motion improve hierarchy or atmosphere? 7. Would design feel premium with all decorative shadows removed? Flag any hard rejections: 1. Generic SaaS card grid as first impression 2. Beautiful image with weak brand 3. Strong headline with no clear action 4. Busy imagery behind text 5. Sections repeating same mood statement 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout 5 most important design findings only. Reference file:line." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL" ``` +**If Gemini:** +```bash +TMPERR_DRL=$(mktemp /tmp/gemini-drl-XXXXXXXX) +DIFF_CONTENT=$(git diff origin/) +gemini --prompt "Review this git diff for design issues. Run 7 litmus checks (YES/NO each): 1. Brand/product unmistakable in first screen? 2. One strong visual anchor present? 3. Page understandable by scanning headlines only? 4. Each section has one job? 5. Are cards actually necessary? 6. Does motion improve hierarchy or atmosphere? 7. Would design feel premium with all decorative shadows removed? Flag any hard rejections: 1. Generic SaaS card grid as first impression 2. Beautiful image with weak brand 3. Strong headline with no clear action 4. Busy imagery behind text 5. Sections repeating same mood statement 6. Carousel with no narrative purpose 7. App UI made of stacked cards instead of layout 5 most important design findings only. Reference file:line. THE DIFF: $DIFF_CONTENT" 2>"$TMPERR_DRL" +``` + Use a 5-minute timeout (`timeout: 300000`). After the command completes, read stderr: ```bash cat "$TMPERR_DRL" && rm -f "$TMPERR_DRL" @@ -1361,7 +1378,7 @@ cat "$TMPERR_DRL" && rm -f "$TMPERR_DRL" **Error handling:** All errors are non-blocking. On auth failure, timeout, or empty response — skip with a brief note and continue. -Present Codex output under a `CODEX (design):` header, merged with the checklist findings above. +Present output under a `{PROVIDER} (design):` header (substituting Codex or Gemini), merged with the checklist findings above. Include any design findings alongside the code review findings. They follow the same Fix-First flow below. @@ -1445,7 +1462,16 @@ Adversarial review thoroughness scales automatically based on diff size. No conf DIFF_INS=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ insertion' | grep -oE '[0-9]+' || echo "0") DIFF_DEL=$(git diff origin/ --stat | tail -1 | grep -oE '[0-9]+ deletion' | grep -oE '[0-9]+' || echo "0") DIFF_TOTAL=$((DIFF_INS + DIFF_DEL)) -which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE" +SECOND_OPINION_BIN="" +SECOND_OPINION_PROVIDER="" +if which codex 2>/dev/null; then + SECOND_OPINION_BIN="codex" + SECOND_OPINION_PROVIDER="Codex" +elif which gemini 2>/dev/null; then + SECOND_OPINION_BIN="gemini" + SECOND_OPINION_PROVIDER="Gemini" +fi +echo "${SECOND_OPINION_PROVIDER:-NONE}_AVAILABLE" # Respect old opt-out OLD_CFG=$(~/.claude/skills/gstack/bin/gstack-config get codex_reviews 2>/dev/null || true) echo "DIFF_SIZE: $DIFF_TOTAL" @@ -1467,9 +1493,9 @@ If `OLD_CFG` is `disabled`: skip this step silently. Continue to the next step. Claude's structured review already ran. Now add a **cross-model adversarial challenge**. -**If Codex is available:** run the Codex adversarial challenge. **If Codex is NOT available:** fall back to the Claude adversarial subagent instead. +**If a second-opinion provider is available (Codex or Gemini):** run the adversarial challenge with that provider. **If neither is available:** fall back to the Claude adversarial subagent instead. -**Codex adversarial:** +**Adversarial challenge (Codex):** ```bash TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX) @@ -1477,6 +1503,14 @@ _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" codex exec "Review the changes on this branch against the base branch. Run git diff origin/ to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_ADV" ``` +**Adversarial challenge (Gemini):** + +```bash +TMPERR_ADV=$(mktemp /tmp/gemini-adv-XXXXXXXX) +DIFF_CONTENT=$(git diff origin/) +gemini --prompt "Review this git diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems. THE DIFF: $DIFF_CONTENT" 2>"$TMPERR_ADV" +``` + Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. After the command completes, read stderr: ```bash cat "$TMPERR_ADV" @@ -1485,11 +1519,11 @@ cat "$TMPERR_ADV" Present the full output verbatim. This is informational — it never blocks shipping. **Error handling:** All errors are non-blocking — adversarial review is a quality enhancement, not a prerequisite. -- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \`codex login\` to authenticate." -- **Timeout:** "Codex timed out after 5 minutes." -- **Empty response:** "Codex returned no response. Stderr: ." +- **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "[Provider] authentication failed. Run \`codex login\` or \`gemini\` to authenticate." +- **Timeout:** "[Provider] timed out after 5 minutes." +- **Empty response:** "[Provider] returned no response. Stderr: ." -On any Codex error, fall back to the Claude adversarial subagent automatically. +On any provider error, fall back to the Claude adversarial subagent automatically. **Claude adversarial subagent** (fallback when Codex unavailable or errored): @@ -1506,7 +1540,7 @@ If the subagent fails or times out: "Claude adversarial subagent unavailable. Co ```bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"medium","commit":"'"$(git rev-parse --short HEAD)"'"}' ``` -Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOURCE: "codex" if Codex ran, "claude" if subagent ran. If both failed, do NOT persist. +Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOURCE: "codex" if Codex ran, "gemini" if Gemini ran, "claude" if subagent ran. If all failed, do NOT persist. **Cleanup:** Run `rm -f "$TMPERR_ADV"` after processing (if Codex was used). @@ -1516,7 +1550,9 @@ Substitute STATUS: "clean" if no findings, "issues_found" if findings exist. SOU Claude's structured review already ran. Now run **all three remaining passes** for maximum coverage: -**1. Codex structured review (if available):** +**1. Cross-model structured review (if Codex or Gemini available):** + +**If Codex:** ```bash TMPERR=$(mktemp /tmp/codex-review-XXXXXXXX) _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; } @@ -1524,6 +1560,13 @@ cd "$_REPO_ROOT" codex review --base -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR" ``` +**If Gemini (no native `review` subcommand — construct equivalent):** +```bash +TMPERR=$(mktemp /tmp/gemini-review-XXXXXXXX) +DIFF_CONTENT=$(git diff origin/) +gemini --prompt "Review this git diff for a pre-landing code review. For each issue found, classify as [P1] (must fix before merge — security, data loss, correctness) or [P2] (should fix — performance, maintainability, edge cases). Format: [P1] or [P2] tag, file:line, problem, suggested fix. THE DIFF: $DIFF_CONTENT" 2>"$TMPERR" +``` + Set the Bash tool's `timeout` parameter to `300000` (5 minutes). Do NOT use the `timeout` shell command — it doesn't exist on macOS. Present output under `CODEX SAYS (code review):` header. Check for `[P1]` markers: found → `GATE: FAIL`, not found → `GATE: PASS`. @@ -1545,13 +1588,13 @@ After stderr: `rm -f "$TMPERR"` **3. Codex adversarial challenge (if available):** Run `codex exec` with the adversarial prompt (same as medium tier). -If Codex is not available for steps 1 and 3, note to the user: "Codex CLI not found — large-diff review ran Claude structured + Claude adversarial (2 of 4 passes). Install Codex for full 4-pass coverage: `npm install -g @openai/codex`" +If no second-opinion provider is available for steps 1 and 3, note to the user: "No cross-model CLI found — large-diff review ran Claude structured + Claude adversarial (2 of 4 passes). Install Codex (`npm install -g @openai/codex`) or Gemini CLI (`npm install -g @google/gemini-cli`) for full 4-pass coverage." **Persist the review result AFTER all passes complete** (not after each sub-step): ```bash ~/.claude/skills/gstack/bin/gstack-review-log '{"skill":"adversarial-review","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","tier":"large","gate":"GATE","commit":"'"$(git rev-parse --short HEAD)"'"}' ``` -Substitute: STATUS = "clean" if no findings across ALL passes, "issues_found" if any pass found issues. SOURCE = "both" if Codex ran, "claude" if only Claude subagent ran. GATE = the Codex structured review gate result ("pass"/"fail"), or "informational" if Codex was unavailable. If all passes failed, do NOT persist. +Substitute: STATUS = "clean" if no findings across ALL passes, "issues_found" if any pass found issues. SOURCE = "both" if a cross-model provider (Codex or Gemini) ran, "claude" if only Claude subagent ran. GATE = the structured review gate result ("pass"/"fail"), or "informational" if no provider was available. If all passes failed, do NOT persist. --- @@ -1565,8 +1608,8 @@ ADVERSARIAL REVIEW SYNTHESIS (auto: TIER, N lines): High confidence (found by multiple sources): [findings agreed on by >1 pass] Unique to Claude structured review: [from earlier step] Unique to Claude adversarial: [from subagent, if ran] - Unique to Codex: [from codex adversarial or code review, if ran] - Models used: Claude structured ✓ Claude adversarial ✓/✗ Codex ✓/✗ + Unique to cross-model provider: [from Codex/Gemini adversarial or code review, if ran] + Models used: Claude structured ✓ Claude adversarial ✓/✗ Cross-model (Codex/Gemini) ✓/✗ ════════════════════════════════════════════════════════════ ```