Skip to content

fix: batch-eval docs + preserve config-bundle placeholders on AB-test promote #1193

fix: batch-eval docs + preserve config-bundle placeholders on AB-test promote

fix: batch-eval docs + preserve config-bundle placeholders on AB-test promote #1193

name: Claude Security Review
# This workflow inlines the security-review prompt rather than calling the
# bundled /security-review slash command. The bundled skill silently bombs
# whenever the runner's clone gets shallowed mid-run (claude-code-action's
# restoreConfigFromBase does this on every PR by design — see
# https://github.com/anthropics/claude-code-action/blob/v1/src/github/operations/restore-config.ts),
# because its first action is `git diff origin/HEAD...` and a shallow clone
# has no merge base. Computing the diff ourselves before the action starts
# eliminates that whole class of failure.
on:
pull_request_target:
types: [opened, reopened, synchronize, labeled]
# Only review PRs targeting our two long-lived release branches. PRs
# into short-lived feature branches don't need a security gate — they
# get reviewed when those features are merged into main or
# feat/summit_release.
branches:
- main
- feat/summit_release
workflow_dispatch:
inputs:
pr_number:
description:
PR number to review (workflow_dispatch will NOT post inline comments — use only for prompt smoke tests)
required: true
type: string
permissions:
id-token: write
pull-requests: write
issues: write
contents: read
concurrency:
# Don't cancel-in-progress: a cancelled run that has already started its labels/checkout
# but not the actual review still triggers always() steps and ends up posting a misleading
# "no findings" summary (since the inline-comment buffer is empty when the analysis step
# was skipped due to cancellation). Letting both runs complete is the safer default.
group: pr-security-review-${{ github.event.pull_request.number || inputs.pr_number }}
cancel-in-progress: false
jobs:
authorize:
runs-on: ubuntu-latest
# On 'labeled' events, only proceed when the label is exactly 'safe-to-review'.
# Other labels (e.g. size/m) are filtered out so we don't spawn API calls.
if: |
github.event_name != 'pull_request_target' ||
github.event.action != 'labeled' ||
github.event.label.name == 'safe-to-review'
outputs:
authorized: ${{ steps.auth.outputs.authorized || steps.dispatch-auth.outputs.authorized }}
steps:
- name: Check authorization
id: auth
if: github.event_name == 'pull_request_target'
uses: actions/github-script@v9
with:
script: |
const isLabel = context.payload.action === 'labeled';
const user = isLabel
? context.payload.sender.login
: context.payload.pull_request.user.login;
const reason = isLabel ? `labeler ${user}` : `PR author ${user}`;
try {
await github.rest.teams.getMembershipForUserInOrg({
org: context.repo.owner,
team_slug: 'agentcore-cli-devs',
username: user,
});
core.setOutput('authorized', 'true');
} catch {
try {
const { data } = await github.rest.repos.getCollaboratorPermissionLevel({
owner: context.repo.owner,
repo: context.repo.repo,
username: user,
});
core.setOutput('authorized', ['write', 'admin'].includes(data.permission) ? 'true' : 'false');
} catch {
core.setOutput('authorized', 'false');
}
}
- name: Auto-authorize workflow_dispatch
id: dispatch-auth
if: github.event_name == 'workflow_dispatch'
run: echo "authorized=true" >> "$GITHUB_OUTPUT"
review:
needs: authorize
if: needs.authorize.outputs.authorized == 'true'
runs-on: ubuntu-latest
timeout-minutes: 30
env:
AWS_REGION: us-west-2
steps:
- name: Generate GitHub App token
id: app-token
uses: actions/create-github-app-token@v1
with:
app-id: ${{ vars.APP_ID }}
private-key: ${{ secrets.APP_PRIVATE_KEY }}
- name: Resolve PR
id: pr
uses: actions/github-script@v9
env:
PR_NUMBER_INPUT: ${{ inputs.pr_number }}
with:
github-token: ${{ steps.app-token.outputs.token }}
script: |
const num = context.eventName === 'workflow_dispatch'
? parseInt(process.env.PR_NUMBER_INPUT, 10)
: context.payload.pull_request.number;
const { data: pr } = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: num,
});
core.setOutput('number', num);
core.setOutput('head_sha', pr.head.sha);
core.setOutput('base_ref', pr.base.ref);
- name: Add reviewing label
uses: actions/github-script@v9
env:
PR_NUMBER: ${{ steps.pr.outputs.number }}
with:
github-token: ${{ steps.app-token.outputs.token }}
script: |
const prNumber = parseInt(process.env.PR_NUMBER, 10);
try {
await github.rest.issues.getLabel({
owner: context.repo.owner,
repo: context.repo.repo,
name: 'claude-security-reviewing',
});
} catch (e) {
if (e.status === 404) {
await github.rest.issues.createLabel({
owner: context.repo.owner,
repo: context.repo.repo,
name: 'claude-security-reviewing',
color: 'D73A4A',
description: 'Claude Code security review in progress',
});
}
}
await github.rest.issues.addLabels({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
labels: ['claude-security-reviewing'],
});
- name: Checkout PR head
uses: actions/checkout@v6
with:
ref: ${{ steps.pr.outputs.head_sha }}
fetch-depth: 0
- name: Compute diff
id: diff
env:
BASE_REF: ${{ steps.pr.outputs.base_ref }}
run: |
set -euo pipefail
# Compute the diff *before* claude-code-action shallows the repo.
# The action's restoreConfigFromBase() runs `git fetch --depth=1`
# against the base branch on startup, which strips history and
# would break any base-vs-head diff after that point. Doing it
# here means the model gets a frozen artifact that can't be
# invalidated by anything the action does later.
git fetch --no-tags origin "+refs/heads/$BASE_REF:refs/remotes/origin/$BASE_REF"
git diff "origin/$BASE_REF...HEAD" > /tmp/pr.diff
BYTES=$(wc -c < /tmp/pr.diff)
FILES=$(git diff --name-only "origin/$BASE_REF...HEAD" | wc -l | tr -d ' ')
echo "bytes=$BYTES" >> "$GITHUB_OUTPUT"
echo "files=$FILES" >> "$GITHUB_OUTPUT"
echo "Diff: $BYTES bytes across $FILES files"
- name: Build prompt
if: steps.diff.outputs.bytes != '0'
run: |
set -euo pipefail
mkdir -p "$RUNNER_TEMP/prompt"
cat > "$RUNNER_TEMP/prompt/prompt.md" <<'PROMPT_EOF'
You are performing a HIGH-CONFIDENCE security code review of a pull
request. The complete diff is at `/tmp/pr.diff` — read it first
using the Read tool. That file is the ground truth for what the
PR changes; do not run `git diff` or any other git commands. To
understand context — callers of a changed function, existing
sanitization patterns, the project's threat model — use Grep,
Glob, and Read against the repository working tree. Do not use
Bash.
# OBJECTIVE
Identify HIGH-CONFIDENCE security vulnerabilities newly introduced
by this PR that have real exploitation potential. This is NOT a
general code review. Focus ONLY on security implications added by
the PR. Do not comment on pre-existing issues.
# CRITICAL INSTRUCTIONS
1. MINIMIZE FALSE POSITIVES: Only flag issues where you are >80%
confident of actual exploitability.
2. AVOID NOISE: Skip theoretical issues, style concerns, or
low-impact findings.
3. FOCUS ON IMPACT: Prioritize vulnerabilities that lead to
unauthorized access, data breach, or system compromise.
4. DO NOT report any of:
- Denial of service / resource exhaustion / rate limiting
- Secrets at rest on disk (handled by other tooling)
- Memory consumption or CPU exhaustion
# CATEGORIES TO EXAMINE
**Input validation**: SQL injection, command injection, XXE,
template injection, NoSQL injection, path traversal.
**AuthN/AuthZ**: authentication bypass, privilege escalation,
session/JWT flaws, authorization-logic bypasses.
**Crypto & secrets**: hardcoded keys/passwords/tokens, weak
algorithms, improper key storage, weak randomness, certificate
validation bypass.
**Code execution**: deserialization RCE (pickle, YAML, etc.),
eval injection, XSS (reflected/stored/DOM) — only in unsafe paths
(see precedents).
**Data exposure**: sensitive logging, PII handling violations,
API leakage, debug-info exposure.
A finding can still be HIGH severity if only exploitable from the
local network.
# METHODOLOGY
Phase 1 — Repository context: identify existing security
libraries/frameworks, sanitization patterns, the project's threat
model. Use search tools.
Phase 2 — Comparative analysis: compare new changes against
established patterns; flag deviations and net-new attack surface.
Phase 3 — Vulnerability assessment: for each modified file,
trace user input → sensitive operations, look for unsafe privilege
boundary crossings, identify injection points.
# FALSE-POSITIVE FILTER (apply hard)
Read the code (Read/Grep/Glob); do not run commands to reproduce
or write files.
HARD EXCLUSIONS — drop any finding matching:
1. DoS / resource exhaustion.
2. Secrets/credentials on disk if otherwise secured.
3. Rate limiting or service overload.
4. Memory/CPU exhaustion.
5. Missing input validation on non-security-critical fields.
6. Input sanitization in GitHub Actions workflows unless clearly
triggerable via untrusted input.
7. Lack of hardening; only flag concrete vulns.
8. Theoretical race conditions or timing attacks.
9. Outdated third-party libraries (handled separately).
10. Memory-safety issues in memory-safe languages (Rust, Go,
JS/TS, Python).
11. Files that are unit tests or test-only.
12. Log spoofing — un-sanitized user input to logs is not a vuln.
13. SSRF that only controls the path (host/protocol control is
required).
14. User-controlled content in AI system prompts is not a vuln.
15. Regex injection.
16. Regex DoS.
17. Insecure documentation (.md and similar).
18. Lack of audit logs.
PRECEDENTS:
1. Plaintext-logging high-value secrets IS a vuln; logging URLs
is assumed safe.
2. UUIDs are unguessable and need no validation.
3. Env vars and CLI flags are trusted inputs.
4. Resource leaks (memory, fd) are not vulns.
5. Tabnabbing, XS-Leaks, prototype pollution, open redirects:
only with extremely high confidence.
6. React / Angular: do not report XSS in components or .tsx files
unless using `dangerouslySetInnerHTML`,
`bypassSecurityTrustHtml`, or equivalents.
7. GitHub Actions workflow vulns: only when a concrete attack
path through untrusted input exists.
8. Missing AuthN/AuthZ in client-side code is not a vuln —
validation is the server's job.
9. MEDIUM findings only when obvious and concrete.
10. .ipynb notebook vulns: only with a concrete attack path
through untrusted input.
11. Logging non-PII data is not a vuln. Only flag when the data
is secrets, passwords, or PII.
12. Command injection in shell scripts: only when there is a
concrete attack path through untrusted input.
For each surviving finding, score confidence 1–10:
- 1–3: low / likely noise — drop
- 4–6: medium — drop unless obvious and concrete
- 7–10: high — keep
# PROCESS
Run this in three steps, exactly:
1. Spawn a Task sub-agent to identify candidate vulnerabilities.
Pass the full instructions above (objective, categories,
methodology, hard exclusions, precedents). Have it return a
structured list of candidates with file/line/category/
description/exploit/fix.
2. For EACH candidate from step 1, spawn an independent Task
sub-agent IN PARALLEL to adversarially verify it. Each
verifier gets the full FALSE-POSITIVE FILTER above and is
told to default to "drop" if uncertain. Each returns a
confidence score 1–10.
3. Drop any finding with confidence < 8. For every finding that
survives, call:
mcp__github_inline_comment__create_inline_comment
with `{ path, line, body }` pointing at the exact file and
line in the diff. The body should follow:
**<Severity>: <Category>**
<One-paragraph description of the issue and concrete
exploit scenario.>
**Recommendation:** <One-sentence fix.>
Do NOT post a single summary comment listing all findings —
the workflow handles a top-level summary after this run
completes. If zero findings survive Phase 3, exit without
calling any tool.
Begin.
PROMPT_EOF
- name: Configure AWS credentials
if: steps.diff.outputs.bytes != '0'
uses: aws-actions/configure-aws-credentials@v6
with:
role-to-assume: ${{ secrets.BEDROCK_SECURITY_REVIEW_ROLE_ARN }}
aws-region: us-west-2
- name: Load prompt into env
id: load-prompt
if: steps.diff.outputs.bytes != '0'
# The action only accepts `prompt:` (a string), not a file path —
# passing prompt_file silently no-ops, leaves the action with no
# trigger, and skips the run with "No trigger found". Read the
# built prompt into an environment variable so we can pass it
# inline below. Using GITHUB_ENV with a randomized heredoc
# sentinel is the standard Actions idiom for multi-line values.
env:
PROMPT_FILE: ${{ runner.temp }}/prompt/prompt.md
run: |
set -euo pipefail
DELIM="EOF_$(uuidgen)"
{
echo "PROMPT_BODY<<$DELIM"
cat "$PROMPT_FILE"
echo "$DELIM"
} >> "$GITHUB_ENV"
- name: Run Claude Code
id: review
if: steps.diff.outputs.bytes != '0'
uses: anthropics/claude-code-action@v1
with:
github_token: ${{ steps.app-token.outputs.token }}
use_bedrock: 'true'
prompt: ${{ env.PROMPT_BODY }}
show_full_output: 'true'
# Read/Grep/Glob let the model explore the repo for context
# (existing sanitization patterns, threat model, callers of a
# changed function). Task is needed for the parallel verifier
# sub-agents in Phase 2. The github_inline_comment MCP tool is
# the output channel; allow-listing it is also what tells the
# action to attach the inline-comment MCP server. Bash is
# intentionally NOT allowed: the prompt forbids running
# commands, and keeping Bash off the list makes the diff at
# /tmp/pr.diff the only ground truth (no `git diff` re-runs
# against a possibly-shallow clone).
claude_args: >-
--model us.anthropic.claude-opus-4-7 --max-turns 60 --allowedTools "Read Grep Glob Task
mcp__github_inline_comment__create_inline_comment"
- name: Verify model ran productively
id: model-ran
if:
steps.diff.outputs.bytes != '0' && (steps.review.conclusion == 'success' || steps.review.conclusion ==
'failure')
env:
OUTPUT_JSON:
${{ steps.review.outputs.execution_file || format('{0}/claude-execution-output.json', runner.temp) }}
run: |
set -euo pipefail
if [ ! -s "$OUTPUT_JSON" ]; then
echo "::warning::No execution transcript at $OUTPUT_JSON — cannot verify"
echo "ran=unknown" >> "$GITHUB_OUTPUT"
echo "num_turns=0" >> "$GITHUB_OUTPUT"
exit 0
fi
NUM_TURNS=$(jq -r '.[-1].num_turns // 0' "$OUTPUT_JSON")
IS_ERROR=$(jq -r '.[-1].is_error // false' "$OUTPUT_JSON")
OUTPUT_TOKENS=$(jq -r '.[-1].usage.output_tokens // 0' "$OUTPUT_JSON")
echo "num_turns=$NUM_TURNS, is_error=$IS_ERROR, output_tokens=$OUTPUT_TOKENS"
echo "num_turns=$NUM_TURNS" >> "$GITHUB_OUTPUT"
if [ "$IS_ERROR" = "true" ] || [ "$NUM_TURNS" = "0" ] || [ "$OUTPUT_TOKENS" = "0" ]; then
echo "::group::Last messages from SDK transcript"
jq -r '.[] | select(.type == "user" or .type == "system") | .message.content // .subtype' "$OUTPUT_JSON" | tail -40
echo "::endgroup::"
echo "::error::Model did not run productively (turns=$NUM_TURNS, output_tokens=$OUTPUT_TOKENS, is_error=$IS_ERROR)"
echo "ran=false" >> "$GITHUB_OUTPUT"
exit 1
fi
echo "ran=true" >> "$GITHUB_OUTPUT"
- name: Count findings
id: findings
if:
steps.diff.outputs.bytes != '0' && (steps.review.conclusion == 'success' || steps.review.conclusion ==
'failure')
run: |
set -euo pipefail
BUFFER=/tmp/inline-comments-buffer.jsonl
if [ -s "$BUFFER" ]; then
COUNT=$(wc -l < "$BUFFER" | tr -d ' ')
else
COUNT=0
fi
echo "count=$COUNT" >> "$GITHUB_OUTPUT"
echo "Buffered findings: $COUNT"
- name: Post summary
if: always()
uses: actions/github-script@v9
env:
PR_NUMBER: ${{ steps.pr.outputs.number }}
FINDING_COUNT: ${{ steps.findings.outputs.count }}
REVIEW_CONCLUSION: ${{ steps.review.conclusion }}
MODEL_RAN: ${{ steps.model-ran.outputs.ran }}
NUM_TURNS: ${{ steps.model-ran.outputs.num_turns }}
DIFF_BYTES: ${{ steps.diff.outputs.bytes }}
DIFF_FILES: ${{ steps.diff.outputs.files }}
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
with:
github-token: ${{ steps.app-token.outputs.token }}
script: |
const prNumber = parseInt(process.env.PR_NUMBER, 10);
const count = parseInt(process.env.FINDING_COUNT || '0', 10);
const conclusion = process.env.REVIEW_CONCLUSION || 'skipped';
const modelRan = process.env.MODEL_RAN || 'unknown';
const numTurns = process.env.NUM_TURNS || '0';
const runUrl = process.env.RUN_URL;
const diffBytes = parseInt(process.env.DIFF_BYTES || '0', 10);
let body;
if (diffBytes === 0) {
body = `**Claude Security Review:** PR has an empty diff against base — nothing to review. ([run](${runUrl}))`;
} else if (modelRan !== 'true') {
body = `**Claude Security Review:** the review did not analyze this PR (model took ${numTurns} turn${numTurns === '1' ? '' : 's'}). See the [run](${runUrl}) for details; a later push or re-run is needed.`;
} else if (conclusion === 'success') {
body = count > 0
? `**Claude Security Review:** posted ${count} inline finding${count === 1 ? '' : 's'} on this PR. ([run](${runUrl}))`
: `**Claude Security Review:** no high-confidence findings. ([run](${runUrl}))`;
} else if (conclusion === 'failure') {
body = `**Claude Security Review:** the review run failed before completing. See the [run](${runUrl}) for details.`;
} else {
body = `**Claude Security Review:** the review run was ${conclusion} before analysis could complete. See the [run](${runUrl}); a later run on this PR will replace this status.`;
}
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body,
});
- name: Remove reviewing label
if: always()
uses: actions/github-script@v9
env:
PR_NUMBER: ${{ steps.pr.outputs.number }}
with:
github-token: ${{ steps.app-token.outputs.token }}
script: |
const prNumber = parseInt(process.env.PR_NUMBER, 10);
try {
await github.rest.issues.removeLabel({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
name: 'claude-security-reviewing',
});
} catch (error) {
console.log('Label removal failed (may not exist):', error.message);
}