Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 100 additions & 17 deletions .github/workflows/assign-linked-issue-author.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,34 +31,115 @@ jobs:
run: |
set -euo pipefail

# Discover linked issues by parsing PR body for GitHub's closing
# keywords and issue references (REST + body-parse path), instead
# of querying the GraphQL `closingIssuesReferences` field.
#
# Rationale: the GraphQL endpoint has periodic transient HTTP 401
# auth flakes on `pull_request_target` runs that cause this
# workflow to fail across many open PRs simultaneously. The REST
# endpoint for PR metadata (`/repos/{owner}/{repo}/pulls/{N}`)
# does not exhibit the same flake, and the closing-keyword
# contract in PR bodies is the canonical user-facing source of
# "what does this PR close".
#
# Trade-off: body-parsing handles GitHub's documented closing
# keywords for same-repo issues (the only case this workflow
# cares about — it can only assign authors to issues in the
# same repo). It does NOT pick up linked issues added solely
# via the PR sidebar's "Development" picker without a body
# keyword. That edge case is rare and has not been observed
# for this workflow's job scope.

extract_linked_issues() {
# Read body from stdin, write one same-repo issue number per line.
python3 - "$REPO" <<'PY'
import os, re, sys

repo = sys.argv[1]
owner, name = repo.split("/", 1)
body = sys.stdin.read() or ""

# Strip fenced code blocks and HTML comments so closing keywords
# inside example snippets don't trigger false-positive assignments.
body = re.sub(r"```.*?```", "", body, flags=re.DOTALL)
body = re.sub(r"<!--.*?-->", "", body, flags=re.DOTALL)

# GitHub closing keywords (case-insensitive):
# close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved
# Same-repo issue reference forms recognised:
# #N
# OWNER/REPO#N (where OWNER/REPO matches this repo)
# GH-N
# https://github.com/OWNER/REPO/issues/N (same repo)
keyword = r"(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)"
repo_q = re.escape(f"{owner}/{name}")
ref = (
r"(?:"
r"#(?P<short>\d+)"
r"|GH-(?P<gh>\d+)"
r"|" + repo_q + r"#(?P<qual>\d+)"
r"|https?://github\.com/" + repo_q + r"/issues/(?P<url>\d+)"
r")"
)
pattern = re.compile(
r"\b" + keyword + r"\b\s*:?\s*" + ref,
flags=re.IGNORECASE,
)
Comment on lines +85 to +88

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check if any existing PR bodies use comma-separated closing references
gh api --paginate "repos/$GITHUB_REPOSITORY/pulls?state=all&per_page=100" \
  --jq '.[] | select(.body != null) | {number, body}' 2>/dev/null | \
  grep -iE '(close[sd]?|fix(e[sd])?|resolve[sd]?)\s*#[0-9]+\s*,\s*#[0-9]+' | head -5

Repository: NVIDIA/NemoClaw

Length of output: 41


🏁 Script executed:

#!/bin/bash
# Identify whether any PR bodies in this repo contain comma- or "and"-separated issue lists
set -euo pipefail

REPO="${GITHUB_REPOSITORY:-NVIDIA/NemoClaw}"
echo "Repo: $REPO" >&2

# Comma-separated examples: "Closes `#1`, `#2`" or "Fixes `#1`, `#2`"
gh api --paginate "repos/$REPO/pulls?state=all&per_page=100" \
  --jq '.[] | select(.body != null) | {number, body}' 2>/dev/null | \
  rg -i --no-heading '(close[sd]?|fix(e[sd])?|resolve[sd]?)\s*#\d+\s*,\s*#\d+' --max-count 5

# "and"-separated examples: "Fixes `#1` and `#2`"
gh api --paginate "repos/$REPO/pulls?state=all&per_page=100" \
  --jq '.[] | select(.body != null) | {number, body}' 2>/dev/null | \
  rg -i --no-heading '(close[sd]?|fix(e[sd])?|resolve[sd]?)\s*#\d+\s+(and|&)\s*#\d+' --max-count 5

# Also get rough counts for each pattern (without printing bodies)
echo "Counts (comma-separated / and-separated):" >&2
gh api --paginate "repos/$REPO/pulls?state=all&per_page=100" \
  --jq '[.[] | select(.body != null) | select((.body|test("(?i)(close[sd]?|fix(e[sd])?|resolve[sd]?)\\\\s*#\\\\d+\\\\s*,\\\\s*#\\\\d+"))) ] | length' 2>/dev/null
gh api --paginate "repos/$REPO/pulls?state=all&per_page=100" \
  --jq '[.[] | select(.body != null) | select((.body|test("(?i)(close[sd]?|fix(e[sd])?|resolve[sd]?)\\\\s*#\\\\d+\\\\s+(and|&|\\\\band\\\\b)\\\\s*#\\\\d+"))) ] | length' 2>/dev/null

Repository: NVIDIA/NemoClaw

Length of output: 8400


🏁 Script executed:

#!/bin/bash
# Identify whether any PR bodies in this repo contain comma- or "and"-separated issue lists
set -euo pipefail

REPO="${GITHUB_REPOSITORY:-NVIDIA/NemoClaw}"
echo "Repo: $REPO" >&2

comma_regex='(close[sd]?|fix(e[sd])?|resolve[sd]?)\s*#[0-9]+\s*,\s*#[0-9]+'
and_regex='(close[sd]?|fix(e[sd])?|resolve[sd]?)\s*#[0-9]+\s+(and|&)\s*#[0-9]+'

# Print up to 5 matching PR numbers/bodies (compact)
gh api --paginate "repos/$REPO/pulls?state=all&per_page=100" \
  --jq '.[] | select(.body != null) | {number, body}' 2>/dev/null | \
  grep -iE "$comma_regex" | head -5

gh api --paginate "repos/$REPO/pulls?state=all&per_page=100" \
  --jq '.[] | select(.body != null) | {number, body}' 2>/dev/null | \
  grep -iE "$and_regex" | head -5

# Get rough counts (still limited to what the API returns within paginate)
echo "Counts:" >&2
gh api --paginate "repos/$REPO/pulls?state=all&per_page=100" \
  --jq '[.[] | select(.body != null) | select((.body | test($comma_re; "i")))] | length' 2>/dev/null --arg comma_re "$comma_regex"
gh api --paginate "repos/$REPO/pulls?state=all&per_page=100" \
  --jq '[.[] | select(.body != null) | select((.body | test($and_re; "i")))] | length' 2>/dev/null --arg and_re "$and_regex"

Repository: NVIDIA/NemoClaw

Length of output: 8400


🏁 Script executed:

#!/bin/bash
set -euo pipefail

FILE=".github/workflows/assign-linked-issue-author.yaml"
echo "== $FILE =="
nl -ba "$FILE" | sed -n '1,200p'

Repository: NVIDIA/NemoClaw

Length of output: 176


🏁 Script executed:

#!/bin/bash
set -euo pipefail

FILE=".github/workflows/assign-linked-issue-author.yaml"
echo "== $FILE =="
# cat -n is commonly available; fall back to awk if needed
if command -v cat >/dev/null 2>&1 && command -v sed >/dev/null 2>&1; then
  cat -n "$FILE" | sed -n '1,220p'
else
  awk '{printf "%6d\t%s\n", NR, $0}' "$FILE" | sed -n '1,220p'
fi

Repository: NVIDIA/NemoClaw

Length of output: 7851


Fix linked-issue parsing for multi-issue lists (“Closes #1, #2” / “Fixes #1 and #2”)

.github/workflows/assign-linked-issue-author.yaml builds pattern = re.compile(r"\b" + keyword + r"\b\s*:?\s*" + ref, flags=re.IGNORECASE), which matches only a single keyword + one issue ref per match. For bodies like Closes #3562, #3568 / `Fixes `#1` and `#2, only the first #N gets captured, so later issues won’t be assigned to the PR author.
Extend the regex/parsing to collect additional #N refs after the initial keyword when separated by commas/“and”.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In @.github/workflows/assign-linked-issue-author.yaml around lines 85 - 88, The
current regex built into pattern (pattern = re.compile(r"\b" + keyword +
r"\b\s*:?\s*" + ref, flags=re.IGNORECASE)) only captures one issue ref after the
keyword; update the logic so it collects subsequent refs separated by commas or
the word "and". Modify the compiled regex (or add a follow-up findall) to allow
repeated occurrences of ref after the initial match (for example make the part
after keyword accept one ref followed by zero-or-more groups like
(?:\s*(?:,|and)\s*ref) or run a second re.findall using the ref pattern on the
substring following the keyword match) and then iterate those matches to assign
each issue; refer to the existing variables pattern, keyword and ref and update
the code that extracts matches so all refs like "Closes `#1`, `#2` and `#3`" are
collected.


seen = []
seen_set = set()
for m in pattern.finditer(body):
n = m.group("short") or m.group("gh") or m.group("qual") or m.group("url")
if n and n not in seen_set:
seen_set.add(n)
seen.append(n)
for n in seen:
print(n)
PY
}

assign_author_to_issue() {
local issue_number="$1"
local author="$2"
local pr_number="$3"

# The assignability probe is REST-only.
if gh api "repos/$REPO/issues/$issue_number/assignees/$author" >/dev/null 2>&1; then
gh issue edit "$issue_number" --repo "$REPO" --add-assignee "$author"
echo "Assigned issue #$issue_number to @$author from PR #$pr_number"
# Add the assignee via REST instead of `gh issue edit`, which
# uses GraphQL under the hood.
if gh api -X POST "repos/$REPO/issues/$issue_number/assignees" \
-f "assignees[]=$author" >/dev/null 2>&1; then
echo "Assigned issue #$issue_number to @$author from PR #$pr_number"
else
echo "::warning::Failed to add @$author to issue #$issue_number from PR #$pr_number"
fi
else
echo "::notice::@$author cannot be assigned to issue #$issue_number from PR #$pr_number"
fi
}

process_pr_json() {
local pr_json="$1"
local author
local issues
local pr_number
process_pr() {
local pr_number="$1"

# Fetch via REST (no GraphQL dependency).
local pr_payload
if ! pr_payload="$(gh api "repos/$REPO/pulls/$pr_number" 2>/dev/null)"; then
echo "::warning::Failed to fetch PR #$pr_number metadata; skipping"
return
fi

pr_number="$(jq -r '.number' <<<"$pr_json")"
author="$(jq -r '.author.login // empty' <<<"$pr_json")"
local author body
author="$(jq -r '.user.login // empty' <<<"$pr_payload")"
body="$(jq -r '.body // ""' <<<"$pr_payload")"

if [ -z "$author" ]; then
echo "::notice::PR #$pr_number has no assignable author"
return
fi

issues="$(jq -r '.closingIssuesReferences[]?.number' <<<"$pr_json")"
local issues
issues="$(printf '%s' "$body" | extract_linked_issues)"

if [ -z "$issues" ]; then
echo "PR #$pr_number does not reference any closing issues"
Expand All @@ -72,13 +153,15 @@ jobs:
}

if [ -n "${PR_NUMBER:-}" ]; then
pr_json="$(gh pr view "$PR_NUMBER" --repo "$REPO" --json author,closingIssuesReferences,number)"
process_pr_json "$pr_json"
process_pr "$PR_NUMBER"
else
gh pr list --repo "$REPO" --state open --limit 1000 --json author,closingIssuesReferences,number |
jq -c '.[]' |
while IFS= read -r pr_json; do
[ -n "$pr_json" ] || continue
process_pr_json "$pr_json"
# Schedule / workflow_dispatch: paginate through open PRs via REST.
# Uses Link-header pagination automatically through `--paginate`.
gh api --paginate \
"repos/$REPO/pulls?state=open&per_page=100" \
--jq '.[] | .number' |
while IFS= read -r pr_number; do
[ -n "$pr_number" ] || continue
process_pr "$pr_number"
done
fi