From d1fed54250c8acb4a88f6810b8f610d620dc9507 Mon Sep 17 00:00:00 2001 From: Hai-Yuan Cao <2003072+caohy1988@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:04:05 -0700 Subject: [PATCH] ci(check-file-contents): exclude OAuth scope URLs from endpoint scan (#4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "Check for hardcoded googleapis.com endpoints" step in .github/workflows/check-file-contents.yml uses grep -lE 'https?://[a-zA-Z0-9.-]+\.googleapis\.com' to find files that should also declare an `.mtls.googleapis.com` counterpart for dynamic endpoint selection. The regex matches any googleapis.com URL — including OAuth 2.0 scope URLs like https://www.googleapis.com/auth/cloud-platform and .../auth/bigquery — which are identity strings, not API endpoints. They don't have mTLS counterparts and never will. Any file that legitimately declares an OAuth scope (very common for ADK plugins integrating Google APIs) trips the gate even when no real endpoint is hardcoded. Fix: add a second pass that filters the candidate set down to files that have at least one googleapis.com URL OUTSIDE the OAuth scope namespace (i.e. not matching `googleapis.com/auth/`). The mTLS check runs only against that filtered set. Verified against four synthesized cases: only_oauth.py (only OAuth scopes) → ignored ✓ real_endpoint.py (endpoint, no mTLS) → flagged ✓ real_endpoint_with_mtls (endpoint + mTLS) → passes ✓ mixed.py (OAuth + endpoint, no mTLS)→ flagged ✓ No effect on the surrounding `logger`, `from __future__`, or `cli` import checks. CI policy intent unchanged: real hardcoded googleapis.com endpoints still must declare their `.mtls` counterpart. Refs: - caohy1988/adk-python#2 (the BQAA Storage Write regional routing fix that surfaced this false positive) - GoogleCloudPlatform/BigQuery-Agent-Analytics-SDK#262 --- .github/workflows/check-file-contents.yml | 25 +++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/.github/workflows/check-file-contents.yml b/.github/workflows/check-file-contents.yml index 42d820ab47..a36d373c48 100644 --- a/.github/workflows/check-file-contents.yml +++ b/.github/workflows/check-file-contents.yml @@ -108,13 +108,30 @@ jobs: if [ -n "$CHANGED_FILES" ]; then echo "Checking for hardcoded endpoints in: $CHANGED_FILES" - # 1. Identify files containing any googleapis.com URL. + # 1. Identify files containing any googleapis.com URL (candidate set). set +e FILES_WITH_ENDPOINTS=$(grep -lE 'https?://[a-zA-Z0-9.-]+\.googleapis\.com' $CHANGED_FILES) - # 2. From those, identify files that are MISSING the required mTLS version. - if [ -n "$FILES_WITH_ENDPOINTS" ]; then - FILES_MISSING_MTLS=$(grep -L '.mtls.googleapis.com' $FILES_WITH_ENDPOINTS) + # 2. Filter the candidate set: drop files whose only googleapis.com + # references are OAuth 2.0 scope URLs (e.g. + # https://www.googleapis.com/auth/cloud-platform). Those are + # identity strings, not API endpoints — they don't have mTLS + # counterparts and never will. Without this filter, any source + # file that legitimately declares an OAuth scope (very common + # for ADK plugins integrating Google APIs) trips the gate even + # when no real endpoint is hardcoded. + FILES_WITH_REAL_ENDPOINTS="" + for f in $FILES_WITH_ENDPOINTS; do + if grep -E 'https?://[a-zA-Z0-9.-]+\.googleapis\.com' "$f" \ + | grep -vqE 'googleapis\.com/auth/'; then + FILES_WITH_REAL_ENDPOINTS="$FILES_WITH_REAL_ENDPOINTS $f" + fi + done + + # 3. From the filtered set, identify files MISSING the required + # mTLS variant. + if [ -n "$FILES_WITH_REAL_ENDPOINTS" ]; then + FILES_MISSING_MTLS=$(grep -L '.mtls.googleapis.com' $FILES_WITH_REAL_ENDPOINTS) fi set -e