From 7cfb215dc871449f40af2e6195e100b4016ae8b7 Mon Sep 17 00:00:00 2001 From: Todd Green Date: Mon, 1 Jun 2026 15:48:28 -0700 Subject: [PATCH 1/8] ci: add pgspot SQL security gate for shipped extension SQL Add a dedicated `pgspot` CI job that lints the static SQL pg_durable actually ships -- the generated CREATE EXTENSION install SQL plus any non-frozen upgrade script -- for search_path / privilege-escalation issues (CVE-2018-1058 class) and other unsafe constructs. - scripts/run-pgspot.sh: generic engine pinning pgspot 0.9.2 in a venv, with per-file attribution and a documented (currently empty) --ignore allowlist. pgspot already exits non-zero on any error, warning, or unknown, so the strict zero-findings policy is enforced by its exit code. - scripts/pgspot-gate.sh: project entry point that scans the generated install SQL plus upgrade scripts not listed in the frozen baseline. - sql/pgspot-frozen.txt: baseline exempting already-released, immutable upgrade scripts. New upgrade scripts are gated automatically; the frozen first-version fixture is excluded by construction. - ci.yml: dedicated pgspot job (pg17) generating install SQL via cargo pgrx schema and running the gate, isolated for a clean red/green signal. This commit intentionally lands the gate without source fixes; the generated install SQL findings are addressed in follow-up commits. --- .github/workflows/ci.yml | 106 ++++++++++++++++++++++++++++++++++ scripts/pgspot-gate.sh | 70 +++++++++++++++++++++++ scripts/run-pgspot.sh | 120 +++++++++++++++++++++++++++++++++++++++ sql/pgspot-frozen.txt | 20 +++++++ 4 files changed, 316 insertions(+) create mode 100755 scripts/pgspot-gate.sh create mode 100755 scripts/run-pgspot.sh create mode 100644 sql/pgspot-frozen.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4436c347..b3fbc6f1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -215,3 +215,109 @@ jobs: - name: Stop PostgreSQL after pg_regress if: steps.pg_regress.outcome != 'skipped' run: ./scripts/pg-stop.sh --pg-version ${{ matrix.pg_version }} + + pgspot: + name: pgspot SQL Security Gate + runs-on: ubuntu-latest + needs: [format] + permissions: + contents: read + env: + PG_VERSION: 17 + steps: + - uses: actions/checkout@v4 + + - name: Free disk space and show usage + run: | + echo "Disk usage before cleanup" + df -h + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /usr/local/.ghcup + docker system prune -af || true + echo "Disk usage after cleanup" + df -h + + - name: Install Rust nightly + run: | + rustup toolchain install nightly --profile minimal + rustup default nightly + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y \ + pkg-config \ + libssl-dev \ + libclang-dev \ + clang \ + bison \ + flex \ + libreadline-dev \ + zlib1g-dev \ + libxml2-dev \ + libxslt1-dev \ + libicu-dev + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Cache cargo-pgrx tool + uses: actions/cache@v5 + with: + path: | + ~/.cargo/.crates2.json + ~/.cargo/bin/cargo-pgrx + key: ${{ runner.os }}-cargo-pgrx-0.16.1-v1 + + - name: Cache Cargo dependency sources + uses: actions/cache@v5 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + key: ${{ runner.os }}-cargo-deps-${{ hashFiles('**/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo-deps- + + - name: Cache PostgreSQL build + uses: actions/cache@v5 + with: + path: | + ~/.pgrx/config.toml + ~/.pgrx/${{ env.PG_VERSION }}.* + key: ${{ runner.os }}-pgrx-pg${{ env.PG_VERSION }} + + - name: Install cargo-pgrx + run: | + if ! command -v cargo-pgrx &> /dev/null || ! cargo pgrx --version | grep -q "0.16.1"; then + cargo install cargo-pgrx --version 0.16.1 --locked + else + echo "cargo-pgrx 0.16.1 already installed, skipping" + fi + + - name: Initialize pgrx + run: | + if [ ! -d "$HOME/.pgrx/${{ env.PG_VERSION }}."* ]; then + cargo pgrx init --pg${{ env.PG_VERSION }} download + else + echo "PostgreSQL ${{ env.PG_VERSION }} already initialized, skipping download" + fi + + # Generate the CREATE EXTENSION install SQL with the same feature set used + # by the tested/released build, so the gate scans exactly what ships. + - name: Generate extension install SQL + run: | + cargo pgrx schema pg${{ env.PG_VERSION }} \ + --no-default-features \ + --features pg${{ env.PG_VERSION }},http-allow-test-domains \ + -o /tmp/pg_durable-install.sql + echo "Generated install SQL:" + wc -l /tmp/pg_durable-install.sql + + # Scan the generated install SQL plus active upgrade scripts that are not in + # the frozen baseline (sql/pgspot-frozen.txt). Released upgrade scripts and the + # frozen first-version fixture are intentionally exempt; new upgrade scripts are + # gated automatically. + - name: Run pgspot SQL security gate + run: scripts/pgspot-gate.sh /tmp/pg_durable-install.sql diff --git a/scripts/pgspot-gate.sh b/scripts/pgspot-gate.sh new file mode 100755 index 00000000..cd551ba5 --- /dev/null +++ b/scripts/pgspot-gate.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# Copyright (c) Microsoft Corporation. +# Licensed under the PostgreSQL License. + +# pgspot-gate.sh - Project entry point for the pgspot SQL security gate. +# +# Scans the generated `CREATE EXTENSION` install SQL plus every active upgrade +# script that is NOT in the frozen baseline (sql/pgspot-frozen.txt). New upgrade +# scripts are therefore gated automatically; released ones are exempt. +# +# Usage: +# scripts/pgspot-gate.sh [INSTALL_SQL] +# +# INSTALL_SQL Path to the generated install SQL to scan. Optional: if omitted +# (e.g. when a pgrx PostgreSQL install is unavailable locally) only +# the non-frozen upgrade scripts are scanned. In CI the install SQL +# is always generated and passed. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +SQL_DIR="$PROJECT_DIR/sql" +FROZEN_LIST="$SQL_DIR/pgspot-frozen.txt" + +install_sql="${1:-}" + +# Build the set of frozen (exempt) upgrade-script basenames. +declare -A frozen=() +if [[ -f "$FROZEN_LIST" ]]; then + while IFS= read -r line; do + line="${line%%#*}" + line="$(echo "$line" | xargs || true)" + [[ -z "$line" ]] && continue + frozen["$line"]=1 + done < "$FROZEN_LIST" +fi + +# Collect non-frozen upgrade scripts (basename matches `*--*--*.sql`, i.e. two +# `--` separators; the single-`--` first-version fixture is never matched). +targets=() +shopt -s nullglob +for f in "$SQL_DIR"/pg_durable--*--*.sql; do + base="$(basename "$f")" + if [[ -n "${frozen[$base]:-}" ]]; then + echo "frozen (skip): $base" + continue + fi + targets+=("$f") +done +shopt -u nullglob + +scan=() +if [[ -n "$install_sql" ]]; then + if [[ ! -f "$install_sql" ]]; then + echo "ERROR: install SQL not found: $install_sql" >&2 + exit 2 + fi + scan+=("$install_sql") +else + echo "NOTE: no install SQL provided; scanning non-frozen upgrade scripts only." >&2 +fi +scan+=("${targets[@]}") + +if [[ ${#scan[@]} -eq 0 ]]; then + echo "Nothing to scan (no install SQL and no non-frozen upgrade scripts)." + exit 0 +fi + +exec "$SCRIPT_DIR/run-pgspot.sh" "${scan[@]}" diff --git a/scripts/run-pgspot.sh b/scripts/run-pgspot.sh new file mode 100755 index 00000000..751fa7dc --- /dev/null +++ b/scripts/run-pgspot.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# Copyright (c) Microsoft Corporation. +# Licensed under the PostgreSQL License. + +# run-pgspot.sh - Lint shipped extension SQL with pgspot +# +# Scans the static SQL that pg_durable actually ships -- the generated +# `CREATE EXTENSION` install SQL and the active upgrade scripts -- for +# search_path / privilege-escalation issues (CVE-2018-1058 class) and other +# unsafe SQL constructs. +# +# Strict policy: the gate passes only when pgspot reports ZERO errors, ZERO +# warnings and ZERO unknowns for every file, except for codes on the documented +# allowlist below. pgspot already exits non-zero on any error, warning or +# unknown, so this wrapper relies on that exit code and adds: a pinned pgspot +# version, an explicit allowlist with justifications, and per-file attribution. +# +# Usage: +# scripts/run-pgspot.sh FILE [FILE ...] +# +# Each FILE is checked independently. Globs should be expanded by the caller; +# patterns that match nothing are reported and skipped. +# +# Environment: +# PGSPOT_VERSION pgspot version to pin (default: 0.9.2) +# PGSPOT_VENV venv directory to install/reuse (default: a cache dir) +# PGSPOT_BIN path to an existing pgspot executable (skips venv setup) + +set -euo pipefail + +PGSPOT_VERSION="${PGSPOT_VERSION:-0.9.2}" +PGSPOT_VENV="${PGSPOT_VENV:-${XDG_CACHE_HOME:-$HOME/.cache}/pg_durable/pgspot-venv}" + +# --- Allowlist ------------------------------------------------------------- +# +# Each entry MUST carry a justification. Keep this list as small as possible; +# prefer fixing the source over ignoring a code. Codes here are passed to +# pgspot via --ignore, which suppresses both the finding and the exit code. +# +# (Intentionally empty for the initial gate so that all real findings surface. +# Add entries only for constructs we provably cannot control from source, e.g. +# pgrx-emitted DDL, each with a one-line reason.) +PGSPOT_IGNORE=( + # Example (disabled): PS010 # `CREATE SCHEMA IF NOT EXISTS df` emitted by pgrx #[pg_schema] +) + +# --------------------------------------------------------------------------- + +err() { printf '%s\n' "$*" >&2; } + +resolve_pgspot() { + if [[ -n "${PGSPOT_BIN:-}" ]]; then + if "$PGSPOT_BIN" --version 2>/dev/null | grep -q "pgspot ${PGSPOT_VERSION}"; then + PGSPOT="$PGSPOT_BIN" + return + fi + err "PGSPOT_BIN=$PGSPOT_BIN is not pgspot ${PGSPOT_VERSION}" + exit 2 + fi + + local venv_bin="$PGSPOT_VENV/bin/pgspot" + if [[ -x "$venv_bin" ]] && "$venv_bin" --version 2>/dev/null | grep -q "pgspot ${PGSPOT_VERSION}"; then + PGSPOT="$venv_bin" + return + fi + + err "Installing pgspot ${PGSPOT_VERSION} into ${PGSPOT_VENV} ..." + python3 -m venv "$PGSPOT_VENV" + "$PGSPOT_VENV/bin/pip" install --quiet --upgrade pip + "$PGSPOT_VENV/bin/pip" install --quiet "pgspot==${PGSPOT_VERSION}" + PGSPOT="$venv_bin" +} + +main() { + if [[ $# -eq 0 ]]; then + err "usage: $0 FILE [FILE ...]" + exit 2 + fi + + resolve_pgspot + + local ignore_args=() + local code + for code in "${PGSPOT_IGNORE[@]:-}"; do + [[ -z "$code" ]] && continue + ignore_args+=(--ignore "$code") + done + + local failed=0 + local checked=0 + local file + for file in "$@"; do + if [[ ! -f "$file" ]]; then + err "skip (not found): $file" + continue + fi + checked=$((checked + 1)) + printf '\n=== pgspot: %s ===\n' "$file" + if "$PGSPOT" "${ignore_args[@]}" "$file"; then + printf 'OK: %s\n' "$file" + else + err "FAIL: $file" + failed=$((failed + 1)) + fi + done + + if [[ $checked -eq 0 ]]; then + err "ERROR: no files were checked" + exit 2 + fi + + printf '\n--- pgspot summary: %d file(s) checked, %d failed ---\n' "$checked" "$failed" + if [[ $failed -ne 0 ]]; then + err "pgspot gate FAILED ($failed file(s) with findings)" + exit 1 + fi + printf 'pgspot gate PASSED\n' +} + +main "$@" diff --git a/sql/pgspot-frozen.txt b/sql/pgspot-frozen.txt new file mode 100644 index 00000000..b4182589 --- /dev/null +++ b/sql/pgspot-frozen.txt @@ -0,0 +1,20 @@ +# pgspot frozen baseline +# +# Upgrade scripts listed here are RELEASED, immutable artifacts and are exempt +# from the pgspot SQL security gate. A customer on an older version runs these +# verbatim during `ALTER EXTENSION UPDATE`, and `test-upgrade.sh` compares the +# resulting schema, so they must not be edited after release. Their security +# posture is owned by the generated install SQL of the current version (fixed in +# Rust source), not by retrofitting historical upgrade SQL. +# +# The frozen first-version install fixture `pg_durable--0.1.1.sql` is excluded +# separately (the gate only scans upgrade scripts matching `*--*--*.sql`). +# +# Process: any NEW upgrade script is gated automatically (it is not in this +# list). When a version is released, add its upgrade script's basename here. +# +# One basename per line; blank lines and `#` comments are ignored. + +pg_durable--0.1.1--0.2.0.sql +pg_durable--0.2.0--0.2.1.sql +pg_durable--0.2.1--0.2.2.sql From 21fce9477cb1e55045351bd7c6dfdcb356296260 Mon Sep 17 00:00:00 2001 From: Todd Green Date: Mon, 1 Jun 2026 16:18:10 -0700 Subject: [PATCH 2/8] fix: schema-qualify df-schema DDL to satisfy pgspot gate Schema-qualify every function, type cast and operator in the generated `df` install DDL so pgspot reports no search_path / CVE-2018-1058 class findings: - now() -> pg_catalog.now() (column defaults) - quote_ident(current_user)::regrole -> pg_catalog.quote_ident(current_user)::pg_catalog.regrole (vars default and the three per-user RLS policies) - CHECK / RLS operators: `~` -> OPERATOR(pg_catalog.~); `IN (...)` -> OPERATOR(pg_catalog.=) ANY (ARRAY[...]); RLS `=` -> OPERATOR(pg_catalog.=) These changes are purely textual: the parser binds built-in objects to the same OIDs regardless of qualification, so the stored catalog representation (pg_get_constraintdef / pg_get_expr / policy defs) is byte-identical to the unqualified form. Verified against a live server that old and new DDL produce identical catalog state, so the upgrade test (Scenario A) is unaffected and no upgrade script is required. Allowlist PS010 for the pgrx-emitted `CREATE SCHEMA IF NOT EXISTS df` (not controllable from source; residual risk negligible). Schemas we control (CREATE SCHEMA duroxide) omit IF NOT EXISTS and remain gated. --- scripts/run-pgspot.sh | 9 ++++++- src/lib.rs | 60 +++++++++++++++++++++---------------------- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/scripts/run-pgspot.sh b/scripts/run-pgspot.sh index 751fa7dc..f5f89dd7 100755 --- a/scripts/run-pgspot.sh +++ b/scripts/run-pgspot.sh @@ -41,7 +41,14 @@ PGSPOT_VENV="${PGSPOT_VENV:-${XDG_CACHE_HOME:-$HOME/.cache}/pg_durable/pgspot-ve # Add entries only for constructs we provably cannot control from source, e.g. # pgrx-emitted DDL, each with a one-line reason.) PGSPOT_IGNORE=( - # Example (disabled): PS010 # `CREATE SCHEMA IF NOT EXISTS df` emitted by pgrx #[pg_schema] + # PS010 "Unsafe schema creation: df" -- pgrx emits `CREATE SCHEMA IF NOT + # EXISTS df` from #[pg_schema]; the `IF NOT EXISTS` (the construct pgspot + # flags, as it can adopt a pre-existing attacker-crafted schema) is not + # controllable from our source. Residual risk is negligible: all df objects + # are created by the installing superuser and we ship no SECURITY DEFINER + # functions. Schemas we DO control are created without IF NOT EXISTS (see + # `CREATE SCHEMA duroxide`), so they fail loudly and are not ignored here. + PS010 ) # --------------------------------------------------------------------------- diff --git a/src/lib.rs b/src/lib.rs index 662b392e..f7fe4cc1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -166,8 +166,8 @@ CREATE TABLE IF NOT EXISTS df.nodes ( error TEXT, submitted_by REGROLE, database TEXT, - created_at TIMESTAMPTZ DEFAULT now(), - updated_at TIMESTAMPTZ DEFAULT now() + created_at TIMESTAMPTZ DEFAULT pg_catalog.now(), + updated_at TIMESTAMPTZ DEFAULT pg_catalog.now() ); COMMENT ON COLUMN df.nodes.submitted_by IS @@ -181,8 +181,8 @@ CREATE TABLE IF NOT EXISTS df.instances ( status TEXT DEFAULT 'pending', submitted_by REGROLE NOT NULL, database TEXT, - created_at TIMESTAMPTZ DEFAULT now(), - updated_at TIMESTAMPTZ DEFAULT now(), + created_at TIMESTAMPTZ DEFAULT pg_catalog.now(), + updated_at TIMESTAMPTZ DEFAULT pg_catalog.now(), completed_at TIMESTAMPTZ ); @@ -200,7 +200,7 @@ CREATE INDEX IF NOT EXISTS idx_nodes_instance ON df.nodes(instance_id); CREATE TABLE IF NOT EXISTS df.vars ( name TEXT NOT NULL, value TEXT, - owner REGROLE NOT NULL DEFAULT quote_ident(current_user)::regrole, + owner REGROLE NOT NULL DEFAULT pg_catalog.quote_ident(current_user)::pg_catalog.regrole, PRIMARY KEY (owner, name) ); @@ -210,17 +210,17 @@ CREATE TABLE IF NOT EXISTS df.vars ( -- recreation even though the extension is always "present" in pg_extension. CREATE TABLE IF NOT EXISTS df._worker_epoch ( epoch_id UUID PRIMARY KEY, - started_at TIMESTAMPTZ DEFAULT now(), - last_seen_at TIMESTAMPTZ DEFAULT now() + started_at TIMESTAMPTZ DEFAULT pg_catalog.now(), + last_seen_at TIMESTAMPTZ DEFAULT pg_catalog.now() ); ALTER TABLE df.instances ADD CONSTRAINT instances_id_format_chk - CHECK (id ~ '^[0-9a-f]{8}$') NOT VALID, + CHECK (id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, ADD CONSTRAINT instances_root_node_format_chk - CHECK (root_node ~ '^[0-9a-f]{8}$') NOT VALID, + CHECK (root_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, ADD CONSTRAINT instances_status_chk - CHECK (status IN ('pending', 'running', 'completed', 'failed', 'cancelled')) NOT VALID, + CHECK (status OPERATOR(pg_catalog.=) ANY (ARRAY['pending', 'running', 'completed', 'failed', 'cancelled'])) NOT VALID, -- Supports the composite FK from df.nodes that ties node identity to the instance row. ADD CONSTRAINT instances_identity_key UNIQUE (id, submitted_by); @@ -231,35 +231,35 @@ ALTER TABLE df.nodes ADD CONSTRAINT nodes_submitted_by_present_chk CHECK (submitted_by IS NOT NULL) NOT VALID, ADD CONSTRAINT nodes_id_format_chk - CHECK (id ~ '^[0-9a-f]{8}$') NOT VALID, + CHECK (id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, ADD CONSTRAINT nodes_instance_id_format_chk - CHECK (instance_id ~ '^[0-9a-f]{8}$') NOT VALID, + CHECK (instance_id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, ADD CONSTRAINT nodes_left_node_format_chk - CHECK (left_node IS NULL OR left_node ~ '^[0-9a-f]{8}$') NOT VALID, + CHECK (left_node IS NULL OR left_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, ADD CONSTRAINT nodes_right_node_format_chk - CHECK (right_node IS NULL OR right_node ~ '^[0-9a-f]{8}$') NOT VALID, + CHECK (right_node IS NULL OR right_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, ADD CONSTRAINT nodes_node_type_chk - CHECK (node_type IN ('SQL', 'THEN', 'IF', 'JOIN', 'LOOP', 'BREAK', 'RACE', 'SLEEP', 'WAIT_SCHEDULE', 'HTTP', 'SIGNAL')) NOT VALID, + CHECK (node_type OPERATOR(pg_catalog.=) ANY (ARRAY['SQL', 'THEN', 'IF', 'JOIN', 'LOOP', 'BREAK', 'RACE', 'SLEEP', 'WAIT_SCHEDULE', 'HTTP', 'SIGNAL'])) NOT VALID, ADD CONSTRAINT nodes_result_name_chk - CHECK (result_name IS NULL OR result_name ~ '^[A-Za-z_][A-Za-z0-9_]*$') NOT VALID, + CHECK (result_name IS NULL OR result_name OPERATOR(pg_catalog.~) '^[A-Za-z_][A-Za-z0-9_]*$') NOT VALID, ADD CONSTRAINT nodes_status_chk - CHECK (status IN ('pending', 'running', 'completed', 'failed')) NOT VALID, + CHECK (status OPERATOR(pg_catalog.=) ANY (ARRAY['pending', 'running', 'completed', 'failed'])) NOT VALID, ADD CONSTRAINT nodes_result_status_chk - CHECK (result IS NULL OR status IN ('completed', 'failed')) NOT VALID, + CHECK (result IS NULL OR status OPERATOR(pg_catalog.=) ANY (ARRAY['completed', 'failed'])) NOT VALID, ADD CONSTRAINT nodes_structure_chk CHECK ( CASE - WHEN node_type IN ('SQL', 'SLEEP', 'WAIT_SCHEDULE', 'BREAK', 'HTTP', 'SIGNAL') + WHEN node_type OPERATOR(pg_catalog.=) ANY (ARRAY['SQL', 'SLEEP', 'WAIT_SCHEDULE', 'BREAK', 'HTTP', 'SIGNAL']) THEN left_node IS NULL AND right_node IS NULL AND query IS NOT NULL - WHEN node_type = 'THEN' + WHEN node_type OPERATOR(pg_catalog.=) 'THEN' THEN left_node IS NOT NULL AND right_node IS NOT NULL AND query IS NULL - WHEN node_type = 'IF' + WHEN node_type OPERATOR(pg_catalog.=) 'IF' THEN left_node IS NOT NULL AND right_node IS NOT NULL AND query IS NOT NULL - WHEN node_type = 'LOOP' + WHEN node_type OPERATOR(pg_catalog.=) 'LOOP' THEN left_node IS NOT NULL AND right_node IS NULL - WHEN node_type = 'JOIN' + WHEN node_type OPERATOR(pg_catalog.=) 'JOIN' THEN left_node IS NOT NULL AND right_node IS NOT NULL - WHEN node_type = 'RACE' + WHEN node_type OPERATOR(pg_catalog.=) 'RACE' THEN left_node IS NOT NULL AND right_node IS NOT NULL AND query IS NULL ELSE FALSE END @@ -302,24 +302,24 @@ ALTER TABLE df.instances ENABLE ROW LEVEL SECURITY; CREATE POLICY instances_user_isolation ON df.instances FOR ALL - USING (submitted_by = quote_ident(current_user)::regrole) - WITH CHECK (submitted_by = quote_ident(current_user)::regrole); + USING (submitted_by OPERATOR(pg_catalog.=) pg_catalog.quote_ident(current_user)::pg_catalog.regrole) + WITH CHECK (submitted_by OPERATOR(pg_catalog.=) pg_catalog.quote_ident(current_user)::pg_catalog.regrole); -- Enable RLS on df.nodes ALTER TABLE df.nodes ENABLE ROW LEVEL SECURITY; CREATE POLICY nodes_user_isolation ON df.nodes FOR ALL - USING (submitted_by = quote_ident(current_user)::regrole) - WITH CHECK (submitted_by = quote_ident(current_user)::regrole); + USING (submitted_by OPERATOR(pg_catalog.=) pg_catalog.quote_ident(current_user)::pg_catalog.regrole) + WITH CHECK (submitted_by OPERATOR(pg_catalog.=) pg_catalog.quote_ident(current_user)::pg_catalog.regrole); -- Enable RLS on df.vars (per-user variable isolation) ALTER TABLE df.vars ENABLE ROW LEVEL SECURITY; CREATE POLICY vars_user_isolation ON df.vars FOR ALL - USING (owner = quote_ident(current_user)::regrole) - WITH CHECK (owner = quote_ident(current_user)::regrole); + USING (owner OPERATOR(pg_catalog.=) pg_catalog.quote_ident(current_user)::pg_catalog.regrole) + WITH CHECK (owner OPERATOR(pg_catalog.=) pg_catalog.quote_ident(current_user)::pg_catalog.regrole); -- No automatic PUBLIC grants. Admins must explicitly grant privileges -- to application roles after CREATE EXTENSION. From 7e0fe8af54c49dacd431f741f768aef0c50cf52c Mon Sep 17 00:00:00 2001 From: Todd Green Date: Mon, 1 Jun 2026 17:41:40 -0700 Subject: [PATCH 3/8] review: harden pgspot gate and qualify install-time DO blocks - Qualify the two install-SQL DO blocks so unqualified refs can't resolve via the session search_path (CVE-2018-1058); verified clean in isolation - Scope PS010 to the df schema via a per-finding allowlist (not global --ignore) - Make scan_file fail-closed with a two-pass check (catches fatals/unknowns) - Add DO-block isolation scan to defeat pgspot's whole-file search_path leak - Treat an empty scan set as a hard error - Bump 0.2.2 -> 0.2.3 with a no-op upgrade script (qualification is catalog-invisible) to preserve release immutability - Docs: freeze-step/qualification guidance + Version-Specific Changes entry --- .github/workflows/ci.yml | 7 +- Cargo.lock | 2 +- Cargo.toml | 2 +- docs/upgrade-testing.md | 28 ++++- scripts/extract-do-blocks.py | 85 ++++++++++++++ scripts/pgspot-gate.sh | 6 +- scripts/run-pgspot.sh | 186 +++++++++++++++++++++++++------ sql/pg_durable--0.2.2--0.2.3.sql | 22 ++++ src/lib.rs | 16 ++- 9 files changed, 309 insertions(+), 45 deletions(-) create mode 100644 scripts/extract-do-blocks.py create mode 100644 sql/pg_durable--0.2.2--0.2.3.sql diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b3fbc6f1..d3778195 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -304,8 +304,11 @@ jobs: echo "PostgreSQL ${{ env.PG_VERSION }} already initialized, skipping download" fi - # Generate the CREATE EXTENSION install SQL with the same feature set used - # by the tested/released build, so the gate scans exactly what ships. + # Generate the CREATE EXTENSION install SQL to scan. `http-allow-test-domains` + # is a test-only feature (it widens df.http()'s domain allowlist) and is NOT + # part of the released build, but it emits no extra DDL, so the install SQL's + # DDL surface here is identical to what ships. It is enabled only to match the + # clippy/test jobs' feature set and keep one cached build. - name: Generate extension install SQL run: | cargo pgrx schema pg${{ env.PG_VERSION }} \ diff --git a/Cargo.lock b/Cargo.lock index ca1c17ac..55e3e3d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1892,7 +1892,7 @@ dependencies = [ [[package]] name = "pg_durable" -version = "0.2.2" +version = "0.2.3" dependencies = [ "bigdecimal", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 6bf06449..45a400ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pg_durable" -version = "0.2.2" +version = "0.2.3" edition = "2021" license = "PostgreSQL" repository = "https://github.com/microsoft/pg_durable" diff --git a/docs/upgrade-testing.md b/docs/upgrade-testing.md index 567b831d..33a9e79a 100644 --- a/docs/upgrade-testing.md +++ b/docs/upgrade-testing.md @@ -158,8 +158,9 @@ Each PR that changes the extension schema or modifies SQL queries in Rust code s 1. Add the necessary DDL to the upgrade script (`sql/pg_durable----.sql`) 2. Ensure the `.so` is backward compatible with **all** previous schemas in the same provider compatibility line (Scenario B1) -3. Add version-specific notes to this document under "Version-Specific Changes" below -4. Pass upgrade tests in CI +3. Keep all new DDL — in the Rust install SQL *and* in any new upgrade script — schema-qualified so it passes the pgspot SQL security gate (`scripts/pgspot-gate.sh`): qualify operators as `OPERATOR(pg_catalog.)`, functions/types/objects by schema (e.g. `pg_catalog.now()`), and qualify references inside anonymous `DO` blocks (they run under the session search_path and are isolation-scanned by the gate). New upgrade scripts are gated automatically; they are exempt only after being frozen (see below). +4. Add version-specific notes to this document under "Version-Specific Changes" below +5. Pass upgrade and pgspot tests in CI ### Future work @@ -185,6 +186,21 @@ No additional fixture is needed for subsequent minors — intermediate versions `cargo pgrx package` generates the new major's install SQL. The previous major's install SQL and upgrade scripts are still needed for the A/B2 upgrade chain when the provider line continues across the major bump. B1 will be a no-op if there are no previous compatible versions within the new major, or if `PROVIDER_COMPAT_START_VERSION` marks the new major as the start of a new provider line. +### Freezing released upgrade scripts (pgspot gate) + +The pgspot gate scans every upgrade script matching `*--*--*.sql` that is **not** +listed in `sql/pgspot-frozen.txt`. A released upgrade script is an immutable +artifact (customers run it verbatim during `ALTER EXTENSION UPDATE`), so once a +version ships its upgrade script is added to the frozen list and exempted. + +**Freezing must be done in a separate post-release PR**, never in the same PR +that introduces or edits the script. Adding a script to the frozen list in the +same change that authors it would let unqualified DDL bypass the gate entirely. +The intended lifecycle is: a new upgrade script is gated (and must pass) while in +development → the version is tagged/released → a follow-up PR appends the now- +immutable script's basename to `sql/pgspot-frozen.txt`. Each frozen basename +should therefore correspond to an already-released tag. + --- ## Version-Specific Changes @@ -192,6 +208,14 @@ No additional fixture is needed for subsequent minors — intermediate versions Each schema-changing PR should add a section here documenting what changed, what the upgrade script handles, and any backward compatibility considerations. +### v0.2.2 → v0.2.3 + +#### pgspot SQL security gate + schema-qualified install DDL +- **DDL change:** None at the catalog level. The generated install SQL is hardened so every operator/function/object reference is schema-qualified (`OPERATOR(pg_catalog.)`, `pg_catalog.now()`, `pg_catalog.current_database()`, etc.), including the references inside the two install-time anonymous `DO` blocks (worker-role superuser check and database validation), closing the CVE-2018-1058 search_path vector. A pgspot CI gate (`scripts/pgspot-gate.sh`, wired into `.github/workflows/ci.yml`) enforces qualification going forward and isolation-scans `DO` blocks to defeat pgspot's whole-file search_path leak. The `sql/pg_durable--0.2.2--0.2.3.sql` upgrade script is intentionally a no-op. +- **Scenario A considerations:** Schema qualification is invisible to the catalogs — PostgreSQL resolves each reference to the same `pg_catalog` OID at parse time and the deparser omits the prefix, so stored CHECK/policy/default expression text is byte-identical to the unqualified 0.2.2 forms. Fresh 0.2.3 install therefore matches a 0.2.2 install upgraded through the no-op script. This is the first boundary at or after `PROVIDER_COMPAT_START_VERSION` (0.2.2), so Scenario A actively runs here. +- **Scenario B1 considerations:** No runtime query contract changed; the new `.so` works against the 0.2.2 schema unchanged. The qualification is confined to install-time DDL. +- **Scenario B2 considerations:** No data migration. Existing rows in `df.instances`, `df.nodes`, and `df.vars` are untouched. + ### v0.2.1 → v0.2.2 #### #162 quote_ident-wrapped `current_user::regrole` (fixes #161) diff --git a/scripts/extract-do-blocks.py b/scripts/extract-do-blocks.py new file mode 100644 index 00000000..392c1a45 --- /dev/null +++ b/scripts/extract-do-blocks.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +# Copyright (c) Microsoft Corporation. +# Licensed under the PostgreSQL License. + +"""Extract top-level DO blocks from SQL file(s) for isolated pgspot scanning. + +Why this exists +--------------- +pgspot scans a whole file and, once it has seen a function that establishes a +trusted search_path (``CREATE FUNCTION ... SET search_path = ..., df, ...`` with +``CREATE SCHEMA df`` earlier in the file), it marks the top-level state as +"search_path secure" and exempts every SUBSEQUENT top-level statement from the +unqualified-reference rules (PS001/PS016/PS017). + +Anonymous ``DO`` blocks, however, do NOT inherit a function's ``SET search_path`` +at run time -- they execute under the installing role's *session* search_path. +An unqualified reference inside a DO block is therefore a genuine CVE-2018-1058 +surface, yet pgspot's whole-file pass masks it. + +This script pulls each top-level ``DO`` block out and writes it to its own file, +prefixed with a plain ``CREATE SCHEMA df;`` (so ``df.``-qualified references have +trusted-schema context, and -- being a plain CREATE, not ``IF NOT EXISTS`` -- it +does not itself trip PS010). Scanning those files in isolation defeats the leak +for the DO-block class. + +This is a TARGETED regression guard for anonymous DO blocks, not a complete fix +for pgspot's whole-file search_path leak; other statement classes that cannot +carry their own search_path are guarded by manually schema-qualifying the +install DDL (see docs/upgrade-testing.md). + +Usage: + extract-do-blocks.py OUTDIR FILE [FILE ...] + +Writes ``/.doN.sql`` for each DO block found and prints each +written path on stdout. Exits 0 even when no DO blocks are found. +""" + +import os +import sys + +from pglast import ast, parse_sql, split + + +def extract(path, outdir): + with open(path, encoding="utf-8") as fh: + sql = fh.read() + + written = [] + base = os.path.basename(path) + index = 0 + for stmt in split(sql): + try: + node = parse_sql(stmt)[0].stmt + except Exception: + # A fragment pglast can re-split but not re-parse in isolation is not + # a DO block we can check; the whole-file pass still covers the file. + continue + if not isinstance(node, ast.DoStmt): + continue + index += 1 + out_path = os.path.join(outdir, f"{base}.do{index}.sql") + with open(out_path, "w", encoding="utf-8") as out: + # Plain CREATE SCHEMA df (not IF NOT EXISTS) gives df trusted-schema + # context without tripping PS010. + out.write("CREATE SCHEMA df;\n") + out.write(stmt) + out.write(";\n") + written.append(out_path) + return written + + +def main(argv): + if len(argv) < 3: + sys.stderr.write("usage: extract-do-blocks.py OUTDIR FILE [FILE ...]\n") + return 2 + outdir = argv[1] + os.makedirs(outdir, exist_ok=True) + for path in argv[2:]: + for out_path in extract(path, outdir): + print(out_path) + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/scripts/pgspot-gate.sh b/scripts/pgspot-gate.sh index cd551ba5..f9bb7713 100755 --- a/scripts/pgspot-gate.sh +++ b/scripts/pgspot-gate.sh @@ -63,8 +63,10 @@ fi scan+=("${targets[@]}") if [[ ${#scan[@]} -eq 0 ]]; then - echo "Nothing to scan (no install SQL and no non-frozen upgrade scripts)." - exit 0 + echo "ERROR: nothing to scan (no install SQL and no non-frozen upgrade scripts)." >&2 + echo " In CI the generated install SQL must always be passed as an argument;" >&2 + echo " an empty scan set is treated as a gate failure, not a silent pass." >&2 + exit 2 fi exec "$SCRIPT_DIR/run-pgspot.sh" "${scan[@]}" diff --git a/scripts/run-pgspot.sh b/scripts/run-pgspot.sh index f5f89dd7..372f24ae 100755 --- a/scripts/run-pgspot.sh +++ b/scripts/run-pgspot.sh @@ -9,11 +9,23 @@ # search_path / privilege-escalation issues (CVE-2018-1058 class) and other # unsafe SQL constructs. # -# Strict policy: the gate passes only when pgspot reports ZERO errors, ZERO -# warnings and ZERO unknowns for every file, except for codes on the documented -# allowlist below. pgspot already exits non-zero on any error, warning or -# unknown, so this wrapper relies on that exit code and adds: a pinned pgspot -# version, an explicit allowlist with justifications, and per-file attribution. +# Strict policy: a file passes only when every pgspot finding is on the +# documented per-finding allowlist (PGSPOT_ALLOW). Each file is scanned twice so +# the result is fail-closed (see scan_file): pass A verifies that only +# allowlisted findings are present; pass B (ignoring the allowlisted codes) must +# come back completely clean, which catches unknowns, parse fatals, and any +# finding pgspot reports only via its exit code. +# +# Known pgspot limitation (DO-block isolation): +# pgspot scans a whole file and, once it sees a function that establishes a +# trusted search_path, marks top-level state "search_path secure" and exempts +# all LATER top-level statements from the unqualified-reference rules. Anonymous +# `DO` blocks do NOT inherit a function's `SET search_path` at run time, so an +# unqualified reference inside a DO block is a real CVE-2018-1058 surface that +# the whole-file pass masks. To close this for DO blocks, each DO block is also +# extracted (see extract-do-blocks.py) and scanned in isolation. Other statement +# classes that cannot carry their own search_path (e.g. plain DML at install +# time) are NOT auto-isolated; keep all install DDL schema-qualified. # # Usage: # scripts/run-pgspot.sh FILE [FILE ...] @@ -22,43 +34,127 @@ # patterns that match nothing are reported and skipped. # # Environment: -# PGSPOT_VERSION pgspot version to pin (default: 0.9.2) -# PGSPOT_VENV venv directory to install/reuse (default: a cache dir) -# PGSPOT_BIN path to an existing pgspot executable (skips venv setup) +# PGSPOT_VERSION pgspot version to pin (default: 0.9.2) +# PGSPOT_VENV venv directory to install/reuse (default: a cache dir) +# PGSPOT_BIN path to an existing pgspot executable (skips venv setup) +# PGSPOT_DO_ISOLATION set to 0 to disable DO-block isolation (debug only) set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PGSPOT_VERSION="${PGSPOT_VERSION:-0.9.2}" PGSPOT_VENV="${PGSPOT_VENV:-${XDG_CACHE_HOME:-$HOME/.cache}/pg_durable/pgspot-venv}" +# Also isolate-scan anonymous DO blocks to defeat pgspot's whole-file +# search_path leak (see extract-do-blocks.py). On by default; set to 0 only for +# debugging. +PGSPOT_DO_ISOLATION="${PGSPOT_DO_ISOLATION:-1}" -# --- Allowlist ------------------------------------------------------------- +# --- Finding allowlist ----------------------------------------------------- # -# Each entry MUST carry a justification. Keep this list as small as possible; -# prefer fixing the source over ignoring a code. Codes here are passed to -# pgspot via --ignore, which suppresses both the finding and the exit code. +# pgspot prints one line per error/warning: "PSxxx: : <context> at line +# N". Instead of suppressing whole codes with --ignore (which is GLOBAL and +# would also hide future, genuinely-unsafe instances of the same code), we +# allow specific findings by exact line. A finding is permitted only if it +# matches one of the regexes below; everything else fails the gate. Unknowns, +# fatals/parse errors, and any unexplained non-zero pgspot exit also fail. # -# (Intentionally empty for the initial gate so that all real findings surface. -# Add entries only for constructs we provably cannot control from source, e.g. -# pgrx-emitted DDL, each with a one-line reason.) -PGSPOT_IGNORE=( - # PS010 "Unsafe schema creation: df" -- pgrx emits `CREATE SCHEMA IF NOT - # EXISTS df` from #[pg_schema]; the `IF NOT EXISTS` (the construct pgspot - # flags, as it can adopt a pre-existing attacker-crafted schema) is not - # controllable from our source. Residual risk is negligible: all df objects - # are created by the installing superuser and we ship no SECURITY DEFINER - # functions. Schemas we DO control are created without IF NOT EXISTS (see - # `CREATE SCHEMA duroxide`), so they fail loudly and are not ignored here. - PS010 +# Each entry MUST carry a justification. Keep this list as small as possible; +# prefer fixing the source. +PGSPOT_ALLOW=( + # pgrx emits `CREATE SCHEMA IF NOT EXISTS df` from #[pg_schema]; the + # `IF NOT EXISTS` (the construct PS010 flags, as it can adopt a pre-existing + # attacker-crafted schema) is not controllable from our source. Residual risk + # is negligible: all df objects are created by the installing superuser and we + # ship no SECURITY DEFINER functions. ONLY the df schema is allowed -- any + # other PS010 (e.g. a future `CREATE SCHEMA IF NOT EXISTS something_else`) + # still fails the gate. Schemas we fully control are created without + # IF NOT EXISTS (see `CREATE SCHEMA duroxide`) and never trip PS010 at all. + '^PS010: Unsafe schema creation: df at line [0-9]+$' ) +# Whole codes to suppress globally via pgspot --ignore. Prefer PGSPOT_ALLOW +# (precise, per-finding) over this. Intentionally empty. +PGSPOT_IGNORE=() + # --------------------------------------------------------------------------- err() { printf '%s\n' "$*" >&2; } +# Build the two --ignore argument sets from PGSPOT_IGNORE and PGSPOT_ALLOW. +# - GLOBAL: codes in PGSPOT_IGNORE only (suppressed everywhere). Used in pass A. +# - ALLOW: GLOBAL plus the codes mentioned by PGSPOT_ALLOW regexes. Used in +# pass B, where the file must come back completely clean. +IGNORE_GLOBAL_ARGS=() +IGNORE_ALLOW_ARGS=() +build_ignore_args() { + local code re + for code in "${PGSPOT_IGNORE[@]:-}"; do + [[ -z "$code" ]] && continue + IGNORE_GLOBAL_ARGS+=(--ignore "$code") + IGNORE_ALLOW_ARGS+=(--ignore "$code") + done + for re in "${PGSPOT_ALLOW[@]:-}"; do + if [[ "$re" =~ (PS[0-9]+) ]]; then + IGNORE_ALLOW_ARGS+=(--ignore "${BASH_REMATCH[1]}") + fi + done +} + +# scan_file FILE +# Decide pass/fail for FILE against PGSPOT_ALLOW using two pgspot passes, so the +# result is fail-closed (an unexpected fatal/parse error or unknown can never be +# masked by the presence of an allowlisted finding). +# +# Pass A (no allow-code ignores): print all findings and verify every printed +# "PSxxx:" line matches the allowlist. This is what catches a DISALLOWED +# instance of an allowlisted code (e.g. PS010 for a schema other than df) -- +# pass B would hide it because --ignore is per-code/global. +# Pass B (ignore the allowlisted codes): the file must now be COMPLETELY clean +# (exit 0). This catches unknowns, parse fatals, and any non-allowlisted +# finding regardless of how pgspot reports it. +# +# FILE passes only if pass A finds no disallowed line AND pass B exits clean. +scan_file() { + local file="$1" + + local outA + # Pass A relies on the printed findings, not the exit code; `|| true` keeps + # `set -e` from aborting when pgspot exits non-zero on a finding. + outA="$("$PGSPOT" "${IGNORE_GLOBAL_ARGS[@]}" "$file" 2>&1)" || true + printf '%s\n' "$outA" + + local disallowed=0 line re ok + while IFS= read -r line; do + [[ "$line" =~ ^PS[0-9]+:\ ]] || continue + ok=0 + for re in "${PGSPOT_ALLOW[@]}"; do + [[ -z "$re" ]] && continue + if [[ "$line" =~ $re ]]; then ok=1; break; fi + done + if [[ $ok -eq 0 ]]; then + disallowed=$((disallowed + 1)) + err " disallowed finding: $line" + fi + done <<< "$outA" + + local rcB=0 + "$PGSPOT" "${IGNORE_ALLOW_ARGS[@]}" "$file" >/dev/null 2>&1 || rcB=$? + + if [[ $disallowed -gt 0 ]]; then + return 1 + fi + if [[ $rcB -ne 0 ]]; then + err " pgspot reports residual findings after ignoring allowlisted codes (unknown/fatal/non-allowlisted); exit $rcB" + return 1 + fi + return 0 +} + resolve_pgspot() { if [[ -n "${PGSPOT_BIN:-}" ]]; then if "$PGSPOT_BIN" --version 2>/dev/null | grep -q "pgspot ${PGSPOT_VERSION}"; then PGSPOT="$PGSPOT_BIN" + PGSPOT_PY="$(dirname "$PGSPOT_BIN")/python3" return fi err "PGSPOT_BIN=$PGSPOT_BIN is not pgspot ${PGSPOT_VERSION}" @@ -68,6 +164,7 @@ resolve_pgspot() { local venv_bin="$PGSPOT_VENV/bin/pgspot" if [[ -x "$venv_bin" ]] && "$venv_bin" --version 2>/dev/null | grep -q "pgspot ${PGSPOT_VERSION}"; then PGSPOT="$venv_bin" + PGSPOT_PY="$PGSPOT_VENV/bin/python3" return fi @@ -76,6 +173,7 @@ resolve_pgspot() { "$PGSPOT_VENV/bin/pip" install --quiet --upgrade pip "$PGSPOT_VENV/bin/pip" install --quiet "pgspot==${PGSPOT_VERSION}" PGSPOT="$venv_bin" + PGSPOT_PY="$PGSPOT_VENV/bin/python3" } main() { @@ -85,13 +183,21 @@ main() { fi resolve_pgspot - - local ignore_args=() - local code - for code in "${PGSPOT_IGNORE[@]:-}"; do - [[ -z "$code" ]] && continue - ignore_args+=(--ignore "$code") - done + build_ignore_args + + local do_isolation=0 + local workdir="" + if [[ "$PGSPOT_DO_ISOLATION" == "1" ]]; then + if "$PGSPOT_PY" -c 'import pglast' 2>/dev/null; then + do_isolation=1 + workdir="$(mktemp -d)" + # shellcheck disable=SC2064 + trap "rm -rf '$workdir'" EXIT + else + err "ERROR: DO-block isolation requested but pglast is unavailable in $PGSPOT_PY" + exit 2 + fi + fi local failed=0 local checked=0 @@ -103,12 +209,28 @@ main() { fi checked=$((checked + 1)) printf '\n=== pgspot: %s ===\n' "$file" - if "$PGSPOT" "${ignore_args[@]}" "$file"; then + if scan_file "$file"; then printf 'OK: %s\n' "$file" else err "FAIL: $file" failed=$((failed + 1)) fi + + # Supplementary: isolate-scan each anonymous DO block so the whole-file + # search_path leak cannot mask an unqualified reference inside it. + if [[ $do_isolation -eq 1 ]]; then + local do_file + while IFS= read -r do_file; do + [[ -z "$do_file" ]] && continue + printf '\n=== pgspot (DO-isolation): %s ===\n' "$do_file" + if scan_file "$do_file"; then + printf 'OK: %s\n' "$do_file" + else + err "FAIL (DO-isolation, source file $file): $do_file" + failed=$((failed + 1)) + fi + done < <("$PGSPOT_PY" "$SCRIPT_DIR/extract-do-blocks.py" "$workdir" "$file") + fi done if [[ $checked -eq 0 ]]; then diff --git a/sql/pg_durable--0.2.2--0.2.3.sql b/sql/pg_durable--0.2.2--0.2.3.sql new file mode 100644 index 00000000..6be9639c --- /dev/null +++ b/sql/pg_durable--0.2.2--0.2.3.sql @@ -0,0 +1,22 @@ +-- Copyright (c) Microsoft Corporation. +-- Licensed under the PostgreSQL License. + +-- pg_durable upgrade: 0.2.2 → 0.2.3 +-- +-- No-op (intentionally empty). +-- +-- The 0.2.3 changes are a security hardening of the install DDL only: +-- * every operator/function/object reference in the generated install SQL is +-- schema-qualified (e.g. `OPERATOR(pg_catalog.=)`, `pg_catalog.now()`), +-- including the references inside the install-time anonymous DO blocks, to +-- close the CVE-2018-1058 search_path vector; and +-- * a pgspot CI gate (scripts/pgspot-gate.sh) enforces this going forward. +-- +-- Schema qualification is invisible to the system catalogs: PostgreSQL binds +-- each reference to the same pg_catalog OID at parse time and the deparser omits +-- the pg_catalog prefix, so the stored CHECK/policy/default expressions are +-- byte-identical to the unqualified 0.2.2 forms. The DO blocks run only at +-- install time and leave no catalog footprint. There is therefore nothing to +-- migrate for an existing 0.2.2 database, and test-upgrade.sh (Scenario A) +-- verifies that a fresh 0.2.3 install matches a 0.2.2 install upgraded through +-- this script. diff --git a/src/lib.rs b/src/lib.rs index f7fe4cc1..78481854 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -216,6 +216,12 @@ CREATE TABLE IF NOT EXISTS df._worker_epoch ( ALTER TABLE df.instances ADD CONSTRAINT instances_id_format_chk + -- Operators are written as OPERATOR(pg_catalog.<op>) and functions are + -- schema-qualified (e.g. pg_catalog.now) throughout this install DDL so + -- that name resolution never depends on the session search_path. This + -- closes the CVE-2018-1058 vector (a malicious schema earlier on + -- search_path shadowing `=`, `~`, etc.) and is enforced by the pgspot + -- CI gate (scripts/pgspot-gate.sh). CHECK (id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, ADD CONSTRAINT instances_root_node_format_chk CHECK (root_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, @@ -525,12 +531,12 @@ DECLARE wrole TEXT; is_super BOOLEAN; BEGIN - wrole := current_setting('pg_durable.worker_role', true); - IF wrole IS NULL OR wrole = '' THEN + wrole := pg_catalog.current_setting('pg_durable.worker_role', true); + IF wrole IS NULL OR wrole OPERATOR(pg_catalog.=) '' THEN wrole := 'azuresu'; END IF; - SELECT rolsuper INTO is_super FROM pg_roles WHERE rolname = wrole; + SELECT rolsuper INTO is_super FROM pg_catalog.pg_roles WHERE rolname OPERATOR(pg_catalog.=) wrole; IF is_super IS NULL THEN RAISE WARNING 'pg_durable: worker role "%" does not exist. The background worker will not be able to process workflows. Create the role as a superuser before using pg_durable.', wrole; ELSIF NOT is_super THEN @@ -578,12 +584,12 @@ DECLARE target_db TEXT; BEGIN -- Get the current database - SELECT current_database() INTO current_db; + SELECT pg_catalog.current_database() INTO current_db; -- Get the target database that the background worker will connect to SELECT df.target_database() INTO target_db; - IF current_db != target_db THEN + IF current_db OPERATOR(pg_catalog.<>) target_db THEN RAISE EXCEPTION 'pg_durable extension must be created in database "%" (currently in "%"). The background worker only processes functions in the database specified by the pg_durable.database GUC (defaults to "postgres").', target_db, current_db USING HINT = 'Connect to the correct database and run: CREATE EXTENSION pg_durable;'; END IF; From e4ec90ee1c6b47896ef745e8389b398ef95110fd Mon Sep 17 00:00:00 2001 From: Todd Green <greentodd@microsoft.com> Date: Mon, 1 Jun 2026 18:40:06 -0700 Subject: [PATCH 4/8] Factor version bump out of this PR (keep at 0.2.2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 0.2.2 -> 0.2.3 bump activated the B1/B2 upgrade scenarios (they only run for versions past the 0.2.2 provider-compat start), which exercise a single long-lived BGW across repeated DROP/CREATE EXTENSION while the non-extension- owned duroxide schema persists. That path hits a duroxide runtime/schema mismatch (orchestrator_queue / enqueue_orchestrator_work / fetch_work_item missing, cached-plan invalidation), leaving instances stuck pending — a pre-existing issue unrelated to the pgspot gate or the DDL qualification here. Keep this PR focused on the security gate + schema qualification (catalog- invisible, no bump required). The bump and the B1/B2 enablement will land in a dedicated PR where the worker/upgrade-harness issue can be addressed properly. Removes the no-op sql/pg_durable--0.2.2--0.2.3.sql and the v0.2.2->v0.2.3 version-specific doc entry; keeps the general pgspot qualification/freeze guidance. --- Cargo.lock | 2 +- Cargo.toml | 2 +- docs/upgrade-testing.md | 8 -------- sql/pg_durable--0.2.2--0.2.3.sql | 22 ---------------------- 4 files changed, 2 insertions(+), 32 deletions(-) delete mode 100644 sql/pg_durable--0.2.2--0.2.3.sql diff --git a/Cargo.lock b/Cargo.lock index 55e3e3d7..ca1c17ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1892,7 +1892,7 @@ dependencies = [ [[package]] name = "pg_durable" -version = "0.2.3" +version = "0.2.2" dependencies = [ "bigdecimal", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 45a400ad..6bf06449 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pg_durable" -version = "0.2.3" +version = "0.2.2" edition = "2021" license = "PostgreSQL" repository = "https://github.com/microsoft/pg_durable" diff --git a/docs/upgrade-testing.md b/docs/upgrade-testing.md index 33a9e79a..1fa12ede 100644 --- a/docs/upgrade-testing.md +++ b/docs/upgrade-testing.md @@ -208,14 +208,6 @@ should therefore correspond to an already-released tag. Each schema-changing PR should add a section here documenting what changed, what the upgrade script handles, and any backward compatibility considerations. -### v0.2.2 → v0.2.3 - -#### pgspot SQL security gate + schema-qualified install DDL -- **DDL change:** None at the catalog level. The generated install SQL is hardened so every operator/function/object reference is schema-qualified (`OPERATOR(pg_catalog.<op>)`, `pg_catalog.now()`, `pg_catalog.current_database()`, etc.), including the references inside the two install-time anonymous `DO` blocks (worker-role superuser check and database validation), closing the CVE-2018-1058 search_path vector. A pgspot CI gate (`scripts/pgspot-gate.sh`, wired into `.github/workflows/ci.yml`) enforces qualification going forward and isolation-scans `DO` blocks to defeat pgspot's whole-file search_path leak. The `sql/pg_durable--0.2.2--0.2.3.sql` upgrade script is intentionally a no-op. -- **Scenario A considerations:** Schema qualification is invisible to the catalogs — PostgreSQL resolves each reference to the same `pg_catalog` OID at parse time and the deparser omits the prefix, so stored CHECK/policy/default expression text is byte-identical to the unqualified 0.2.2 forms. Fresh 0.2.3 install therefore matches a 0.2.2 install upgraded through the no-op script. This is the first boundary at or after `PROVIDER_COMPAT_START_VERSION` (0.2.2), so Scenario A actively runs here. -- **Scenario B1 considerations:** No runtime query contract changed; the new `.so` works against the 0.2.2 schema unchanged. The qualification is confined to install-time DDL. -- **Scenario B2 considerations:** No data migration. Existing rows in `df.instances`, `df.nodes`, and `df.vars` are untouched. - ### v0.2.1 → v0.2.2 #### #162 quote_ident-wrapped `current_user::regrole` (fixes #161) diff --git a/sql/pg_durable--0.2.2--0.2.3.sql b/sql/pg_durable--0.2.2--0.2.3.sql deleted file mode 100644 index 6be9639c..00000000 --- a/sql/pg_durable--0.2.2--0.2.3.sql +++ /dev/null @@ -1,22 +0,0 @@ --- Copyright (c) Microsoft Corporation. --- Licensed under the PostgreSQL License. - --- pg_durable upgrade: 0.2.2 → 0.2.3 --- --- No-op (intentionally empty). --- --- The 0.2.3 changes are a security hardening of the install DDL only: --- * every operator/function/object reference in the generated install SQL is --- schema-qualified (e.g. `OPERATOR(pg_catalog.=)`, `pg_catalog.now()`), --- including the references inside the install-time anonymous DO blocks, to --- close the CVE-2018-1058 search_path vector; and --- * a pgspot CI gate (scripts/pgspot-gate.sh) enforces this going forward. --- --- Schema qualification is invisible to the system catalogs: PostgreSQL binds --- each reference to the same pg_catalog OID at parse time and the deparser omits --- the pg_catalog prefix, so the stored CHECK/policy/default expressions are --- byte-identical to the unqualified 0.2.2 forms. The DO blocks run only at --- install time and leave no catalog footprint. There is therefore nothing to --- migrate for an existing 0.2.2 database, and test-upgrade.sh (Scenario A) --- verifies that a fresh 0.2.3 install matches a 0.2.2 install upgraded through --- this script. From 418a6b63ed7f7d427eb15dad5193c31e0ec161fb Mon Sep 17 00:00:00 2001 From: Todd Green <greentodd@microsoft.com> Date: Tue, 2 Jun 2026 08:14:36 -0700 Subject: [PATCH 5/8] Make pgspot gate comments crisp and concise Trim verbose comments and docstrings across the pgspot gate files (run-pgspot.sh, pgspot-gate.sh, extract-do-blocks.py, pgspot-frozen.txt, ci.yml, lib.rs auditability note) for readability. Comments only -- no code or logic changes. --- .github/workflows/ci.yml | 15 ++--- scripts/extract-do-blocks.py | 41 ++++--------- scripts/pgspot-gate.sh | 24 ++++---- scripts/run-pgspot.sh | 112 ++++++++++++----------------------- sql/pgspot-frozen.txt | 21 +++---- src/lib.rs | 11 ++-- 6 files changed, 79 insertions(+), 145 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d3778195..1a6ebffd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -304,11 +304,10 @@ jobs: echo "PostgreSQL ${{ env.PG_VERSION }} already initialized, skipping download" fi - # Generate the CREATE EXTENSION install SQL to scan. `http-allow-test-domains` - # is a test-only feature (it widens df.http()'s domain allowlist) and is NOT - # part of the released build, but it emits no extra DDL, so the install SQL's - # DDL surface here is identical to what ships. It is enabled only to match the - # clippy/test jobs' feature set and keep one cached build. + # Generate the install SQL to scan. http-allow-test-domains is a test-only + # feature (widens df.http()'s allowlist) that emits no extra DDL, so this DDL + # surface matches the shipped build. Enabled only to match the clippy/test + # jobs' feature set and reuse one cached build. - name: Generate extension install SQL run: | cargo pgrx schema pg${{ env.PG_VERSION }} \ @@ -318,9 +317,7 @@ jobs: echo "Generated install SQL:" wc -l /tmp/pg_durable-install.sql - # Scan the generated install SQL plus active upgrade scripts that are not in - # the frozen baseline (sql/pgspot-frozen.txt). Released upgrade scripts and the - # frozen first-version fixture are intentionally exempt; new upgrade scripts are - # gated automatically. + # Scan the install SQL plus non-frozen upgrade scripts. Released scripts and + # the first-version fixture are exempt; new scripts are gated automatically. - name: Run pgspot SQL security gate run: scripts/pgspot-gate.sh /tmp/pg_durable-install.sql diff --git a/scripts/extract-do-blocks.py b/scripts/extract-do-blocks.py index 392c1a45..c72c96e0 100644 --- a/scripts/extract-do-blocks.py +++ b/scripts/extract-do-blocks.py @@ -4,35 +4,18 @@ """Extract top-level DO blocks from SQL file(s) for isolated pgspot scanning. -Why this exists ---------------- -pgspot scans a whole file and, once it has seen a function that establishes a -trusted search_path (``CREATE FUNCTION ... SET search_path = ..., df, ...`` with -``CREATE SCHEMA df`` earlier in the file), it marks the top-level state as -"search_path secure" and exempts every SUBSEQUENT top-level statement from the -unqualified-reference rules (PS001/PS016/PS017). - -Anonymous ``DO`` blocks, however, do NOT inherit a function's ``SET search_path`` -at run time -- they execute under the installing role's *session* search_path. -An unqualified reference inside a DO block is therefore a genuine CVE-2018-1058 -surface, yet pgspot's whole-file pass masks it. - -This script pulls each top-level ``DO`` block out and writes it to its own file, -prefixed with a plain ``CREATE SCHEMA df;`` (so ``df.``-qualified references have -trusted-schema context, and -- being a plain CREATE, not ``IF NOT EXISTS`` -- it -does not itself trip PS010). Scanning those files in isolation defeats the leak -for the DO-block class. - -This is a TARGETED regression guard for anonymous DO blocks, not a complete fix -for pgspot's whole-file search_path leak; other statement classes that cannot -carry their own search_path are guarded by manually schema-qualifying the -install DDL (see docs/upgrade-testing.md). - -Usage: - extract-do-blocks.py OUTDIR FILE [FILE ...] - -Writes ``<OUTDIR>/<basename>.doN.sql`` for each DO block found and prints each -written path on stdout. Exits 0 even when no DO blocks are found. +pgspot marks a file "search_path secure" after the first function that sets a +trusted search_path, then skips the unqualified-reference checks on later +top-level statements. Anonymous DO blocks don't inherit that search_path at run +time, so an unqualified reference inside one is a real CVE-2018-1058 surface the +whole-file pass misses. This pulls each top-level DO block into its own file, +prefixed with a plain `CREATE SCHEMA df;` (gives df.* trusted context without +tripping PS010), so scanning them in isolation catches those references. It is a +targeted guard for DO blocks only; keep other install DDL schema-qualified. + +Usage: extract-do-blocks.py OUTDIR FILE [FILE ...] +Writes <OUTDIR>/<basename>.doN.sql per DO block and prints each path. Exits 0 +even when none are found. """ import os diff --git a/scripts/pgspot-gate.sh b/scripts/pgspot-gate.sh index f9bb7713..e497bd38 100755 --- a/scripts/pgspot-gate.sh +++ b/scripts/pgspot-gate.sh @@ -4,17 +4,14 @@ # pgspot-gate.sh - Project entry point for the pgspot SQL security gate. # -# Scans the generated `CREATE EXTENSION` install SQL plus every active upgrade -# script that is NOT in the frozen baseline (sql/pgspot-frozen.txt). New upgrade -# scripts are therefore gated automatically; released ones are exempt. +# Scans the generated install SQL plus every active upgrade script NOT in the +# frozen baseline (sql/pgspot-frozen.txt). New scripts are gated automatically; +# released ones are exempt. # -# Usage: -# scripts/pgspot-gate.sh [INSTALL_SQL] -# -# INSTALL_SQL Path to the generated install SQL to scan. Optional: if omitted -# (e.g. when a pgrx PostgreSQL install is unavailable locally) only -# the non-frozen upgrade scripts are scanned. In CI the install SQL -# is always generated and passed. +# Usage: scripts/pgspot-gate.sh [INSTALL_SQL] +# INSTALL_SQL install SQL to scan. Optional (omit when no local pgrx install); +# CI always generates and passes it. Without it, only non-frozen +# upgrade scripts are scanned. set -euo pipefail @@ -36,8 +33,8 @@ if [[ -f "$FROZEN_LIST" ]]; then done < "$FROZEN_LIST" fi -# Collect non-frozen upgrade scripts (basename matches `*--*--*.sql`, i.e. two -# `--` separators; the single-`--` first-version fixture is never matched). +# Non-frozen upgrade scripts (basename `*--*--*.sql`; the single-`--` first- +# version fixture never matches). targets=() shopt -s nullglob for f in "$SQL_DIR"/pg_durable--*--*.sql; do @@ -64,8 +61,7 @@ scan+=("${targets[@]}") if [[ ${#scan[@]} -eq 0 ]]; then echo "ERROR: nothing to scan (no install SQL and no non-frozen upgrade scripts)." >&2 - echo " In CI the generated install SQL must always be passed as an argument;" >&2 - echo " an empty scan set is treated as a gate failure, not a silent pass." >&2 + echo " CI must pass the generated install SQL; an empty scan set fails the gate." >&2 exit 2 fi diff --git a/scripts/run-pgspot.sh b/scripts/run-pgspot.sh index 372f24ae..4406bd89 100755 --- a/scripts/run-pgspot.sh +++ b/scripts/run-pgspot.sh @@ -2,88 +2,58 @@ # Copyright (c) Microsoft Corporation. # Licensed under the PostgreSQL License. -# run-pgspot.sh - Lint shipped extension SQL with pgspot +# run-pgspot.sh - Lint shipped extension SQL with pgspot. # -# Scans the static SQL that pg_durable actually ships -- the generated -# `CREATE EXTENSION` install SQL and the active upgrade scripts -- for -# search_path / privilege-escalation issues (CVE-2018-1058 class) and other -# unsafe SQL constructs. +# Scans the SQL pg_durable ships (generated install SQL + active upgrade scripts) +# for search_path / privilege-escalation issues (CVE-2018-1058). # -# Strict policy: a file passes only when every pgspot finding is on the -# documented per-finding allowlist (PGSPOT_ALLOW). Each file is scanned twice so -# the result is fail-closed (see scan_file): pass A verifies that only -# allowlisted findings are present; pass B (ignoring the allowlisted codes) must -# come back completely clean, which catches unknowns, parse fatals, and any -# finding pgspot reports only via its exit code. +# A file passes only if every pgspot finding is on the per-finding allowlist +# (PGSPOT_ALLOW); scan_file is fail-closed via two passes (see its comment). # -# Known pgspot limitation (DO-block isolation): -# pgspot scans a whole file and, once it sees a function that establishes a -# trusted search_path, marks top-level state "search_path secure" and exempts -# all LATER top-level statements from the unqualified-reference rules. Anonymous -# `DO` blocks do NOT inherit a function's `SET search_path` at run time, so an -# unqualified reference inside a DO block is a real CVE-2018-1058 surface that -# the whole-file pass masks. To close this for DO blocks, each DO block is also -# extracted (see extract-do-blocks.py) and scanned in isolation. Other statement -# classes that cannot carry their own search_path (e.g. plain DML at install -# time) are NOT auto-isolated; keep all install DDL schema-qualified. +# DO-block limitation: once pgspot sees a function with a trusted search_path it +# stops checking later top-level statements. Anonymous DO blocks don't inherit +# that search_path at run time, so each is also extracted (extract-do-blocks.py) +# and scanned in isolation. Keep all other install DDL schema-qualified. # -# Usage: -# scripts/run-pgspot.sh FILE [FILE ...] +# Usage: scripts/run-pgspot.sh FILE [FILE ...] (globs expanded by caller) # -# Each FILE is checked independently. Globs should be expanded by the caller; -# patterns that match nothing are reported and skipped. -# -# Environment: -# PGSPOT_VERSION pgspot version to pin (default: 0.9.2) -# PGSPOT_VENV venv directory to install/reuse (default: a cache dir) -# PGSPOT_BIN path to an existing pgspot executable (skips venv setup) -# PGSPOT_DO_ISOLATION set to 0 to disable DO-block isolation (debug only) +# Env: +# PGSPOT_VERSION pgspot version to pin (default: 0.9.2) +# PGSPOT_VENV venv dir to install/reuse (default: a cache dir) +# PGSPOT_BIN existing pgspot executable (skips venv setup) +# PGSPOT_DO_ISOLATION set to 0 to disable DO-block isolation (debug only) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PGSPOT_VERSION="${PGSPOT_VERSION:-0.9.2}" PGSPOT_VENV="${PGSPOT_VENV:-${XDG_CACHE_HOME:-$HOME/.cache}/pg_durable/pgspot-venv}" -# Also isolate-scan anonymous DO blocks to defeat pgspot's whole-file -# search_path leak (see extract-do-blocks.py). On by default; set to 0 only for -# debugging. +# Isolate-scan anonymous DO blocks too (see extract-do-blocks.py). Set 0 to debug. PGSPOT_DO_ISOLATION="${PGSPOT_DO_ISOLATION:-1}" # --- Finding allowlist ----------------------------------------------------- -# -# pgspot prints one line per error/warning: "PSxxx: <title>: <context> at line -# N". Instead of suppressing whole codes with --ignore (which is GLOBAL and -# would also hide future, genuinely-unsafe instances of the same code), we -# allow specific findings by exact line. A finding is permitted only if it -# matches one of the regexes below; everything else fails the gate. Unknowns, -# fatals/parse errors, and any unexplained non-zero pgspot exit also fail. -# -# Each entry MUST carry a justification. Keep this list as small as possible; -# prefer fixing the source. +# pgspot prints one line per finding: "PSxxx: <title>: <context> at line N". We +# allow specific findings by exact match rather than suppressing whole codes +# globally (--ignore), so a future unsafe instance of the same code still fails. +# Anything not matched here -- plus unknowns, fatals, and unexplained non-zero +# exits -- fails the gate. Keep this list minimal; prefer fixing the source. PGSPOT_ALLOW=( # pgrx emits `CREATE SCHEMA IF NOT EXISTS df` from #[pg_schema]; the - # `IF NOT EXISTS` (the construct PS010 flags, as it can adopt a pre-existing - # attacker-crafted schema) is not controllable from our source. Residual risk - # is negligible: all df objects are created by the installing superuser and we - # ship no SECURITY DEFINER functions. ONLY the df schema is allowed -- any - # other PS010 (e.g. a future `CREATE SCHEMA IF NOT EXISTS something_else`) - # still fails the gate. Schemas we fully control are created without - # IF NOT EXISTS (see `CREATE SCHEMA duroxide`) and never trip PS010 at all. + # IF NOT EXISTS (what PS010 flags) is not controllable from source. Only df is + # allowed -- any other PS010 still fails. Schemas we control omit IF NOT EXISTS + # (e.g. `CREATE SCHEMA duroxide`) and never trip PS010. '^PS010: Unsafe schema creation: df at line [0-9]+$' ) -# Whole codes to suppress globally via pgspot --ignore. Prefer PGSPOT_ALLOW -# (precise, per-finding) over this. Intentionally empty. +# Whole codes to suppress globally (pgspot --ignore). Prefer PGSPOT_ALLOW. Empty. PGSPOT_IGNORE=() # --------------------------------------------------------------------------- err() { printf '%s\n' "$*" >&2; } -# Build the two --ignore argument sets from PGSPOT_IGNORE and PGSPOT_ALLOW. -# - GLOBAL: codes in PGSPOT_IGNORE only (suppressed everywhere). Used in pass A. -# - ALLOW: GLOBAL plus the codes mentioned by PGSPOT_ALLOW regexes. Used in -# pass B, where the file must come back completely clean. +# Build the two --ignore sets: GLOBAL (PGSPOT_IGNORE only, used in pass A) and +# ALLOW (GLOBAL + the allowlisted codes, used in pass B). IGNORE_GLOBAL_ARGS=() IGNORE_ALLOW_ARGS=() build_ignore_args() { @@ -100,26 +70,19 @@ build_ignore_args() { done } -# scan_file FILE -# Decide pass/fail for FILE against PGSPOT_ALLOW using two pgspot passes, so the -# result is fail-closed (an unexpected fatal/parse error or unknown can never be -# masked by the presence of an allowlisted finding). -# -# Pass A (no allow-code ignores): print all findings and verify every printed -# "PSxxx:" line matches the allowlist. This is what catches a DISALLOWED -# instance of an allowlisted code (e.g. PS010 for a schema other than df) -- -# pass B would hide it because --ignore is per-code/global. -# Pass B (ignore the allowlisted codes): the file must now be COMPLETELY clean -# (exit 0). This catches unknowns, parse fatals, and any non-allowlisted -# finding regardless of how pgspot reports it. -# -# FILE passes only if pass A finds no disallowed line AND pass B exits clean. +# scan_file FILE -- fail-closed pass/fail against PGSPOT_ALLOW via two passes: +# Pass A: print all findings; every "PSxxx:" line must match the allowlist. +# Catches a disallowed instance of an allowlisted code (e.g. PS010 for a +# non-df schema), which pass B's per-code --ignore would otherwise hide. +# Pass B: ignore the allowlisted codes; the file must exit fully clean. Catches +# unknowns, parse fatals, and findings pgspot reports only via exit code. +# FILE passes iff pass A has no disallowed line AND pass B exits clean. scan_file() { local file="$1" local outA - # Pass A relies on the printed findings, not the exit code; `|| true` keeps - # `set -e` from aborting when pgspot exits non-zero on a finding. + # Pass A uses the printed findings, not the exit code; `|| true` keeps set -e + # from aborting when pgspot exits non-zero on a finding. outA="$("$PGSPOT" "${IGNORE_GLOBAL_ARGS[@]}" "$file" 2>&1)" || true printf '%s\n' "$outA" @@ -216,8 +179,7 @@ main() { failed=$((failed + 1)) fi - # Supplementary: isolate-scan each anonymous DO block so the whole-file - # search_path leak cannot mask an unqualified reference inside it. + # Isolate-scan each DO block so the whole-file leak can't mask it. if [[ $do_isolation -eq 1 ]]; then local do_file while IFS= read -r do_file; do diff --git a/sql/pgspot-frozen.txt b/sql/pgspot-frozen.txt index b4182589..08e4e74d 100644 --- a/sql/pgspot-frozen.txt +++ b/sql/pgspot-frozen.txt @@ -1,19 +1,16 @@ # pgspot frozen baseline # -# Upgrade scripts listed here are RELEASED, immutable artifacts and are exempt -# from the pgspot SQL security gate. A customer on an older version runs these -# verbatim during `ALTER EXTENSION UPDATE`, and `test-upgrade.sh` compares the -# resulting schema, so they must not be edited after release. Their security -# posture is owned by the generated install SQL of the current version (fixed in -# Rust source), not by retrofitting historical upgrade SQL. +# Released, immutable upgrade scripts -- exempt from the pgspot gate. Customers +# run these verbatim during `ALTER EXTENSION UPDATE` and test-upgrade.sh compares +# the resulting schema, so they must not change after release. Current security +# posture is owned by the generated install SQL (fixed in Rust source), not by +# retrofitting historical upgrade SQL. # -# The frozen first-version install fixture `pg_durable--0.1.1.sql` is excluded -# separately (the gate only scans upgrade scripts matching `*--*--*.sql`). +# New upgrade scripts are gated automatically (absent here). On release, add the +# script's basename. The first-version fixture `pg_durable--0.1.1.sql` is exempt +# separately (the gate only scans `*--*--*.sql`). # -# Process: any NEW upgrade script is gated automatically (it is not in this -# list). When a version is released, add its upgrade script's basename here. -# -# One basename per line; blank lines and `#` comments are ignored. +# One basename per line; blank lines and `#` comments ignored. pg_durable--0.1.1--0.2.0.sql pg_durable--0.2.0--0.2.1.sql diff --git a/src/lib.rs b/src/lib.rs index 78481854..7dd88d9e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -216,12 +216,11 @@ CREATE TABLE IF NOT EXISTS df._worker_epoch ( ALTER TABLE df.instances ADD CONSTRAINT instances_id_format_chk - -- Operators are written as OPERATOR(pg_catalog.<op>) and functions are - -- schema-qualified (e.g. pg_catalog.now) throughout this install DDL so - -- that name resolution never depends on the session search_path. This - -- closes the CVE-2018-1058 vector (a malicious schema earlier on - -- search_path shadowing `=`, `~`, etc.) and is enforced by the pgspot - -- CI gate (scripts/pgspot-gate.sh). + -- Operators (OPERATOR(pg_catalog.<op>)) and functions (e.g. pg_catalog.now) + -- are schema-qualified throughout this install DDL so name resolution never + -- depends on the session search_path -- closing the CVE-2018-1058 vector + -- (a malicious schema shadowing `=`, `~`, etc.). Enforced by the pgspot CI + -- gate (scripts/pgspot-gate.sh). CHECK (id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, ADD CONSTRAINT instances_root_node_format_chk CHECK (root_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, From 5d5cb8e221caccafe17b84c780f574435b950b64 Mon Sep 17 00:00:00 2001 From: Todd Green <greentodd@microsoft.com> Date: Tue, 2 Jun 2026 10:27:02 -0700 Subject: [PATCH 6/8] Hardcode pgspot exclude list, drop frozen-list file Replace sql/pgspot-frozen.txt and its parsing with a small hardcoded EXCLUDE array in pgspot-gate.sh listing the pre-pgspot legacy upgrade scripts (authored before the install DDL was schema-qualified, so they don't all pass pgspot). Scripts written after qualification pass the gate and need no entry, so the list is fixed rather than growing per release. Update ci.yml comment and upgrade-testing.md accordingly. --- .github/workflows/ci.yml | 4 ++-- docs/upgrade-testing.md | 22 +++++++----------- scripts/pgspot-gate.sh | 48 ++++++++++++++++++++++------------------ sql/pgspot-frozen.txt | 17 -------------- 4 files changed, 36 insertions(+), 55 deletions(-) delete mode 100644 sql/pgspot-frozen.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1a6ebffd..315f5487 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -317,7 +317,7 @@ jobs: echo "Generated install SQL:" wc -l /tmp/pg_durable-install.sql - # Scan the install SQL plus non-frozen upgrade scripts. Released scripts and - # the first-version fixture are exempt; new scripts are gated automatically. + # Scan the install SQL plus upgrade scripts. Pre-pgspot legacy scripts are + # excluded in the gate; new scripts are gated automatically. - name: Run pgspot SQL security gate run: scripts/pgspot-gate.sh /tmp/pg_durable-install.sql diff --git a/docs/upgrade-testing.md b/docs/upgrade-testing.md index 1fa12ede..380716cb 100644 --- a/docs/upgrade-testing.md +++ b/docs/upgrade-testing.md @@ -158,7 +158,7 @@ Each PR that changes the extension schema or modifies SQL queries in Rust code s 1. Add the necessary DDL to the upgrade script (`sql/pg_durable--<prev>--<current>.sql`) 2. Ensure the `.so` is backward compatible with **all** previous schemas in the same provider compatibility line (Scenario B1) -3. Keep all new DDL — in the Rust install SQL *and* in any new upgrade script — schema-qualified so it passes the pgspot SQL security gate (`scripts/pgspot-gate.sh`): qualify operators as `OPERATOR(pg_catalog.<op>)`, functions/types/objects by schema (e.g. `pg_catalog.now()`), and qualify references inside anonymous `DO` blocks (they run under the session search_path and are isolation-scanned by the gate). New upgrade scripts are gated automatically; they are exempt only after being frozen (see below). +3. Keep all new DDL — in the Rust install SQL *and* in any new upgrade script — schema-qualified so it passes the pgspot SQL security gate (`scripts/pgspot-gate.sh`): qualify operators as `OPERATOR(pg_catalog.<op>)`, functions/types/objects by schema (e.g. `pg_catalog.now()`), and qualify references inside anonymous `DO` blocks (they run under the session search_path and are isolation-scanned by the gate). New upgrade scripts are gated automatically. 4. Add version-specific notes to this document under "Version-Specific Changes" below 5. Pass upgrade and pgspot tests in CI @@ -186,20 +186,14 @@ No additional fixture is needed for subsequent minors — intermediate versions `cargo pgrx package` generates the new major's install SQL. The previous major's install SQL and upgrade scripts are still needed for the A/B2 upgrade chain when the provider line continues across the major bump. B1 will be a no-op if there are no previous compatible versions within the new major, or if `PROVIDER_COMPAT_START_VERSION` marks the new major as the start of a new provider line. -### Freezing released upgrade scripts (pgspot gate) +### Upgrade scripts and the pgspot gate -The pgspot gate scans every upgrade script matching `*--*--*.sql` that is **not** -listed in `sql/pgspot-frozen.txt`. A released upgrade script is an immutable -artifact (customers run it verbatim during `ALTER EXTENSION UPDATE`), so once a -version ships its upgrade script is added to the frozen list and exempted. - -**Freezing must be done in a separate post-release PR**, never in the same PR -that introduces or edits the script. Adding a script to the frozen list in the -same change that authors it would let unqualified DDL bypass the gate entirely. -The intended lifecycle is: a new upgrade script is gated (and must pass) while in -development → the version is tagged/released → a follow-up PR appends the now- -immutable script's basename to `sql/pgspot-frozen.txt`. Each frozen basename -should therefore correspond to an already-released tag. +The pgspot gate scans every upgrade script matching `*--*--*.sql`, except a small +hardcoded list of pre-pgspot legacy scripts in `scripts/pgspot-gate.sh` (authored +before the install DDL was schema-qualified, and immutable now that they're +released). Every new upgrade script is gated and must pass — keep its DDL +schema-qualified (see step 3 above). Scripts written after qualification pass the +gate, so they never need to be added to the exclude list. --- diff --git a/scripts/pgspot-gate.sh b/scripts/pgspot-gate.sh index e497bd38..a342cd43 100755 --- a/scripts/pgspot-gate.sh +++ b/scripts/pgspot-gate.sh @@ -4,43 +4,47 @@ # pgspot-gate.sh - Project entry point for the pgspot SQL security gate. # -# Scans the generated install SQL plus every active upgrade script NOT in the -# frozen baseline (sql/pgspot-frozen.txt). New scripts are gated automatically; -# released ones are exempt. +# Scans the generated install SQL plus every active upgrade script, except the +# pre-pgspot legacy scripts listed below. New scripts are gated automatically. # # Usage: scripts/pgspot-gate.sh [INSTALL_SQL] # INSTALL_SQL install SQL to scan. Optional (omit when no local pgrx install); -# CI always generates and passes it. Without it, only non-frozen -# upgrade scripts are scanned. +# CI always generates and passes it. Without it, only upgrade +# scripts are scanned. set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" SQL_DIR="$PROJECT_DIR/sql" -FROZEN_LIST="$SQL_DIR/pgspot-frozen.txt" install_sql="${1:-}" -# Build the set of frozen (exempt) upgrade-script basenames. -declare -A frozen=() -if [[ -f "$FROZEN_LIST" ]]; then - while IFS= read -r line; do - line="${line%%#*}" - line="$(echo "$line" | xargs || true)" - [[ -z "$line" ]] && continue - frozen["$line"]=1 - done < "$FROZEN_LIST" -fi +# Released upgrade scripts authored before the install DDL was schema-qualified. +# They are immutable and don't all pass pgspot, so they're excluded. Scripts +# written after qualification pass the gate, so they need no entry here. +EXCLUDE=( + pg_durable--0.1.1--0.2.0.sql + pg_durable--0.2.0--0.2.1.sql + pg_durable--0.2.1--0.2.2.sql +) + +is_excluded() { + local base="$1" e + for e in "${EXCLUDE[@]}"; do + [[ "$base" == "$e" ]] && return 0 + done + return 1 +} -# Non-frozen upgrade scripts (basename `*--*--*.sql`; the single-`--` first- -# version fixture never matches). +# Upgrade scripts (basename `*--*--*.sql`; the single-`--` first-version fixture +# never matches), minus the excluded legacy ones. targets=() shopt -s nullglob for f in "$SQL_DIR"/pg_durable--*--*.sql; do base="$(basename "$f")" - if [[ -n "${frozen[$base]:-}" ]]; then - echo "frozen (skip): $base" + if is_excluded "$base"; then + echo "excluded (pre-pgspot): $base" continue fi targets+=("$f") @@ -55,12 +59,12 @@ if [[ -n "$install_sql" ]]; then fi scan+=("$install_sql") else - echo "NOTE: no install SQL provided; scanning non-frozen upgrade scripts only." >&2 + echo "NOTE: no install SQL provided; scanning upgrade scripts only." >&2 fi scan+=("${targets[@]}") if [[ ${#scan[@]} -eq 0 ]]; then - echo "ERROR: nothing to scan (no install SQL and no non-frozen upgrade scripts)." >&2 + echo "ERROR: nothing to scan (no install SQL and no gated upgrade scripts)." >&2 echo " CI must pass the generated install SQL; an empty scan set fails the gate." >&2 exit 2 fi diff --git a/sql/pgspot-frozen.txt b/sql/pgspot-frozen.txt deleted file mode 100644 index 08e4e74d..00000000 --- a/sql/pgspot-frozen.txt +++ /dev/null @@ -1,17 +0,0 @@ -# pgspot frozen baseline -# -# Released, immutable upgrade scripts -- exempt from the pgspot gate. Customers -# run these verbatim during `ALTER EXTENSION UPDATE` and test-upgrade.sh compares -# the resulting schema, so they must not change after release. Current security -# posture is owned by the generated install SQL (fixed in Rust source), not by -# retrofitting historical upgrade SQL. -# -# New upgrade scripts are gated automatically (absent here). On release, add the -# script's basename. The first-version fixture `pg_durable--0.1.1.sql` is exempt -# separately (the gate only scans `*--*--*.sql`). -# -# One basename per line; blank lines and `#` comments ignored. - -pg_durable--0.1.1--0.2.0.sql -pg_durable--0.2.0--0.2.1.sql -pg_durable--0.2.1--0.2.2.sql From 0f7c11f5717c3774cd32c2a801fb03a8bffbf864 Mon Sep 17 00:00:00 2001 From: Todd Green <greentodd@microsoft.com> Date: Tue, 2 Jun 2026 10:56:25 -0700 Subject: [PATCH 7/8] Remove DO-block isolation workaround from pgspot gate The gate extracted each anonymous DO block and scanned it in isolation to work around a pgspot bug: pgspot's file-level search_path-secure flag leaks into DO blocks, suppressing unqualified-reference checks even though a DO block runs under the session search_path. The workaround added a pglast dependency and notable complexity. Drop the extraction (delete extract-do-blocks.py and the isolation loop, env var, and helper plumbing in run-pgspot.sh). Our DO blocks are already fully schema-qualified; the gate still qualifies everything else. Document the pgspot limitation so DO-block qualification is checked in review. A fix is being submitted upstream. --- docs/upgrade-testing.md | 2 +- scripts/extract-do-blocks.py | 68 ------------------------------------ scripts/run-pgspot.sh | 45 +++--------------------- 3 files changed, 6 insertions(+), 109 deletions(-) delete mode 100644 scripts/extract-do-blocks.py diff --git a/docs/upgrade-testing.md b/docs/upgrade-testing.md index 380716cb..a0096264 100644 --- a/docs/upgrade-testing.md +++ b/docs/upgrade-testing.md @@ -158,7 +158,7 @@ Each PR that changes the extension schema or modifies SQL queries in Rust code s 1. Add the necessary DDL to the upgrade script (`sql/pg_durable--<prev>--<current>.sql`) 2. Ensure the `.so` is backward compatible with **all** previous schemas in the same provider compatibility line (Scenario B1) -3. Keep all new DDL — in the Rust install SQL *and* in any new upgrade script — schema-qualified so it passes the pgspot SQL security gate (`scripts/pgspot-gate.sh`): qualify operators as `OPERATOR(pg_catalog.<op>)`, functions/types/objects by schema (e.g. `pg_catalog.now()`), and qualify references inside anonymous `DO` blocks (they run under the session search_path and are isolation-scanned by the gate). New upgrade scripts are gated automatically. +3. Keep all new DDL — in the Rust install SQL *and* in any new upgrade script — schema-qualified so it passes the pgspot SQL security gate (`scripts/pgspot-gate.sh`): qualify operators as `OPERATOR(pg_catalog.<op>)`, functions/types/objects by schema (e.g. `pg_catalog.now()`), and qualify references inside anonymous `DO` blocks (they run under the session search_path). Note: pgspot cannot currently see unqualified references inside a `DO` block when an earlier statement set a trusted search_path (a known pgspot limitation, tracked upstream), so reviewers must check `DO`-block qualification by hand. New upgrade scripts are gated automatically. 4. Add version-specific notes to this document under "Version-Specific Changes" below 5. Pass upgrade and pgspot tests in CI diff --git a/scripts/extract-do-blocks.py b/scripts/extract-do-blocks.py deleted file mode 100644 index c72c96e0..00000000 --- a/scripts/extract-do-blocks.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Microsoft Corporation. -# Licensed under the PostgreSQL License. - -"""Extract top-level DO blocks from SQL file(s) for isolated pgspot scanning. - -pgspot marks a file "search_path secure" after the first function that sets a -trusted search_path, then skips the unqualified-reference checks on later -top-level statements. Anonymous DO blocks don't inherit that search_path at run -time, so an unqualified reference inside one is a real CVE-2018-1058 surface the -whole-file pass misses. This pulls each top-level DO block into its own file, -prefixed with a plain `CREATE SCHEMA df;` (gives df.* trusted context without -tripping PS010), so scanning them in isolation catches those references. It is a -targeted guard for DO blocks only; keep other install DDL schema-qualified. - -Usage: extract-do-blocks.py OUTDIR FILE [FILE ...] -Writes <OUTDIR>/<basename>.doN.sql per DO block and prints each path. Exits 0 -even when none are found. -""" - -import os -import sys - -from pglast import ast, parse_sql, split - - -def extract(path, outdir): - with open(path, encoding="utf-8") as fh: - sql = fh.read() - - written = [] - base = os.path.basename(path) - index = 0 - for stmt in split(sql): - try: - node = parse_sql(stmt)[0].stmt - except Exception: - # A fragment pglast can re-split but not re-parse in isolation is not - # a DO block we can check; the whole-file pass still covers the file. - continue - if not isinstance(node, ast.DoStmt): - continue - index += 1 - out_path = os.path.join(outdir, f"{base}.do{index}.sql") - with open(out_path, "w", encoding="utf-8") as out: - # Plain CREATE SCHEMA df (not IF NOT EXISTS) gives df trusted-schema - # context without tripping PS010. - out.write("CREATE SCHEMA df;\n") - out.write(stmt) - out.write(";\n") - written.append(out_path) - return written - - -def main(argv): - if len(argv) < 3: - sys.stderr.write("usage: extract-do-blocks.py OUTDIR FILE [FILE ...]\n") - return 2 - outdir = argv[1] - os.makedirs(outdir, exist_ok=True) - for path in argv[2:]: - for out_path in extract(path, outdir): - print(out_path) - return 0 - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/scripts/run-pgspot.sh b/scripts/run-pgspot.sh index 4406bd89..4c8419df 100755 --- a/scripts/run-pgspot.sh +++ b/scripts/run-pgspot.sh @@ -10,10 +10,11 @@ # A file passes only if every pgspot finding is on the per-finding allowlist # (PGSPOT_ALLOW); scan_file is fail-closed via two passes (see its comment). # -# DO-block limitation: once pgspot sees a function with a trusted search_path it -# stops checking later top-level statements. Anonymous DO blocks don't inherit -# that search_path at run time, so each is also extracted (extract-do-blocks.py) -# and scanned in isolation. Keep all other install DDL schema-qualified. +# Known pgspot limitation: pgspot does not check unqualified references inside an +# anonymous DO block when an earlier statement set a trusted search_path (its +# file-level "secure" flag leaks into the block, though a DO block actually runs +# under the session search_path). So qualify DO-block references by hand; the +# gate cannot verify them. Tracked upstream. # # Usage: scripts/run-pgspot.sh FILE [FILE ...] (globs expanded by caller) # @@ -21,15 +22,11 @@ # PGSPOT_VERSION pgspot version to pin (default: 0.9.2) # PGSPOT_VENV venv dir to install/reuse (default: a cache dir) # PGSPOT_BIN existing pgspot executable (skips venv setup) -# PGSPOT_DO_ISOLATION set to 0 to disable DO-block isolation (debug only) set -euo pipefail -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PGSPOT_VERSION="${PGSPOT_VERSION:-0.9.2}" PGSPOT_VENV="${PGSPOT_VENV:-${XDG_CACHE_HOME:-$HOME/.cache}/pg_durable/pgspot-venv}" -# Isolate-scan anonymous DO blocks too (see extract-do-blocks.py). Set 0 to debug. -PGSPOT_DO_ISOLATION="${PGSPOT_DO_ISOLATION:-1}" # --- Finding allowlist ----------------------------------------------------- # pgspot prints one line per finding: "PSxxx: <title>: <context> at line N". We @@ -117,7 +114,6 @@ resolve_pgspot() { if [[ -n "${PGSPOT_BIN:-}" ]]; then if "$PGSPOT_BIN" --version 2>/dev/null | grep -q "pgspot ${PGSPOT_VERSION}"; then PGSPOT="$PGSPOT_BIN" - PGSPOT_PY="$(dirname "$PGSPOT_BIN")/python3" return fi err "PGSPOT_BIN=$PGSPOT_BIN is not pgspot ${PGSPOT_VERSION}" @@ -127,7 +123,6 @@ resolve_pgspot() { local venv_bin="$PGSPOT_VENV/bin/pgspot" if [[ -x "$venv_bin" ]] && "$venv_bin" --version 2>/dev/null | grep -q "pgspot ${PGSPOT_VERSION}"; then PGSPOT="$venv_bin" - PGSPOT_PY="$PGSPOT_VENV/bin/python3" return fi @@ -136,7 +131,6 @@ resolve_pgspot() { "$PGSPOT_VENV/bin/pip" install --quiet --upgrade pip "$PGSPOT_VENV/bin/pip" install --quiet "pgspot==${PGSPOT_VERSION}" PGSPOT="$venv_bin" - PGSPOT_PY="$PGSPOT_VENV/bin/python3" } main() { @@ -148,20 +142,6 @@ main() { resolve_pgspot build_ignore_args - local do_isolation=0 - local workdir="" - if [[ "$PGSPOT_DO_ISOLATION" == "1" ]]; then - if "$PGSPOT_PY" -c 'import pglast' 2>/dev/null; then - do_isolation=1 - workdir="$(mktemp -d)" - # shellcheck disable=SC2064 - trap "rm -rf '$workdir'" EXIT - else - err "ERROR: DO-block isolation requested but pglast is unavailable in $PGSPOT_PY" - exit 2 - fi - fi - local failed=0 local checked=0 local file @@ -178,21 +158,6 @@ main() { err "FAIL: $file" failed=$((failed + 1)) fi - - # Isolate-scan each DO block so the whole-file leak can't mask it. - if [[ $do_isolation -eq 1 ]]; then - local do_file - while IFS= read -r do_file; do - [[ -z "$do_file" ]] && continue - printf '\n=== pgspot (DO-isolation): %s ===\n' "$do_file" - if scan_file "$do_file"; then - printf 'OK: %s\n' "$do_file" - else - err "FAIL (DO-isolation, source file $file): $do_file" - failed=$((failed + 1)) - fi - done < <("$PGSPOT_PY" "$SCRIPT_DIR/extract-do-blocks.py" "$workdir" "$file") - fi done if [[ $checked -eq 0 ]]; then From 96da3d017fb19de479e5c0faaa54aef182e83da5 Mon Sep 17 00:00:00 2001 From: Todd Green <greentodd@microsoft.com> Date: Tue, 2 Jun 2026 15:33:44 -0700 Subject: [PATCH 8/8] ci: fold pgspot gate into test job; trim comments Run the pgspot gate as steps in the existing pg17 test job instead of a separate job that rebuilt the whole pgrx environment. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .github/workflows/ci.yml | 121 +++++---------------------------------- docs/upgrade-testing.md | 2 +- scripts/run-pgspot.sh | 20 ++----- 3 files changed, 22 insertions(+), 121 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 315f5487..ab0fa1cb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -176,6 +176,21 @@ jobs: - name: Run clippy run: cargo clippy --no-default-features --features pg${{ matrix.pg_version }},http-allow-test-domains -- -D warnings + # pgspot SQL security gate (pg17 only; the install SQL is the shipped DDL). + # http-allow-test-domains is a test-only feature that emits no extra DDL, so + # the scanned surface matches the shipped build while reusing this job's build. + - name: Generate extension install SQL + if: matrix.pg_version == 17 + run: | + cargo pgrx schema pg${{ matrix.pg_version }} \ + --no-default-features \ + --features pg${{ matrix.pg_version }},http-allow-test-domains \ + -o /tmp/pg_durable-install.sql + + - name: Run pgspot SQL security gate + if: matrix.pg_version == 17 + run: scripts/pgspot-gate.sh /tmp/pg_durable-install.sql + # Include an http feature to allow http unit tests to run. No time for multiple feature combos. - name: Run unit tests run: cargo pgrx test pg${{ matrix.pg_version }} --features http-allow-test-domains @@ -215,109 +230,3 @@ jobs: - name: Stop PostgreSQL after pg_regress if: steps.pg_regress.outcome != 'skipped' run: ./scripts/pg-stop.sh --pg-version ${{ matrix.pg_version }} - - pgspot: - name: pgspot SQL Security Gate - runs-on: ubuntu-latest - needs: [format] - permissions: - contents: read - env: - PG_VERSION: 17 - steps: - - uses: actions/checkout@v4 - - - name: Free disk space and show usage - run: | - echo "Disk usage before cleanup" - df -h - sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /usr/local/.ghcup - docker system prune -af || true - echo "Disk usage after cleanup" - df -h - - - name: Install Rust nightly - run: | - rustup toolchain install nightly --profile minimal - rustup default nightly - - - name: Install system dependencies - run: | - sudo apt-get update - sudo apt-get install -y \ - pkg-config \ - libssl-dev \ - libclang-dev \ - clang \ - bison \ - flex \ - libreadline-dev \ - zlib1g-dev \ - libxml2-dev \ - libxslt1-dev \ - libicu-dev - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - - name: Cache cargo-pgrx tool - uses: actions/cache@v5 - with: - path: | - ~/.cargo/.crates2.json - ~/.cargo/bin/cargo-pgrx - key: ${{ runner.os }}-cargo-pgrx-0.16.1-v1 - - - name: Cache Cargo dependency sources - uses: actions/cache@v5 - with: - path: | - ~/.cargo/registry - ~/.cargo/git - key: ${{ runner.os }}-cargo-deps-${{ hashFiles('**/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo-deps- - - - name: Cache PostgreSQL build - uses: actions/cache@v5 - with: - path: | - ~/.pgrx/config.toml - ~/.pgrx/${{ env.PG_VERSION }}.* - key: ${{ runner.os }}-pgrx-pg${{ env.PG_VERSION }} - - - name: Install cargo-pgrx - run: | - if ! command -v cargo-pgrx &> /dev/null || ! cargo pgrx --version | grep -q "0.16.1"; then - cargo install cargo-pgrx --version 0.16.1 --locked - else - echo "cargo-pgrx 0.16.1 already installed, skipping" - fi - - - name: Initialize pgrx - run: | - if [ ! -d "$HOME/.pgrx/${{ env.PG_VERSION }}."* ]; then - cargo pgrx init --pg${{ env.PG_VERSION }} download - else - echo "PostgreSQL ${{ env.PG_VERSION }} already initialized, skipping download" - fi - - # Generate the install SQL to scan. http-allow-test-domains is a test-only - # feature (widens df.http()'s allowlist) that emits no extra DDL, so this DDL - # surface matches the shipped build. Enabled only to match the clippy/test - # jobs' feature set and reuse one cached build. - - name: Generate extension install SQL - run: | - cargo pgrx schema pg${{ env.PG_VERSION }} \ - --no-default-features \ - --features pg${{ env.PG_VERSION }},http-allow-test-domains \ - -o /tmp/pg_durable-install.sql - echo "Generated install SQL:" - wc -l /tmp/pg_durable-install.sql - - # Scan the install SQL plus upgrade scripts. Pre-pgspot legacy scripts are - # excluded in the gate; new scripts are gated automatically. - - name: Run pgspot SQL security gate - run: scripts/pgspot-gate.sh /tmp/pg_durable-install.sql diff --git a/docs/upgrade-testing.md b/docs/upgrade-testing.md index a0096264..c2cb3072 100644 --- a/docs/upgrade-testing.md +++ b/docs/upgrade-testing.md @@ -158,7 +158,7 @@ Each PR that changes the extension schema or modifies SQL queries in Rust code s 1. Add the necessary DDL to the upgrade script (`sql/pg_durable--<prev>--<current>.sql`) 2. Ensure the `.so` is backward compatible with **all** previous schemas in the same provider compatibility line (Scenario B1) -3. Keep all new DDL — in the Rust install SQL *and* in any new upgrade script — schema-qualified so it passes the pgspot SQL security gate (`scripts/pgspot-gate.sh`): qualify operators as `OPERATOR(pg_catalog.<op>)`, functions/types/objects by schema (e.g. `pg_catalog.now()`), and qualify references inside anonymous `DO` blocks (they run under the session search_path). Note: pgspot cannot currently see unqualified references inside a `DO` block when an earlier statement set a trusted search_path (a known pgspot limitation, tracked upstream), so reviewers must check `DO`-block qualification by hand. New upgrade scripts are gated automatically. +3. Keep all new DDL — in the Rust install SQL *and* in any new upgrade script — schema-qualified so it passes the pgspot SQL security gate (`scripts/pgspot-gate.sh`): qualify operators as `OPERATOR(pg_catalog.<op>)`, functions/types/objects by schema (e.g. `pg_catalog.now()`), and qualify references inside anonymous `DO` blocks (they run under the session search_path). New upgrade scripts are gated automatically. 4. Add version-specific notes to this document under "Version-Specific Changes" below 5. Pass upgrade and pgspot tests in CI diff --git a/scripts/run-pgspot.sh b/scripts/run-pgspot.sh index 4c8419df..a69c3163 100755 --- a/scripts/run-pgspot.sh +++ b/scripts/run-pgspot.sh @@ -10,12 +10,6 @@ # A file passes only if every pgspot finding is on the per-finding allowlist # (PGSPOT_ALLOW); scan_file is fail-closed via two passes (see its comment). # -# Known pgspot limitation: pgspot does not check unqualified references inside an -# anonymous DO block when an earlier statement set a trusted search_path (its -# file-level "secure" flag leaks into the block, though a DO block actually runs -# under the session search_path). So qualify DO-block references by hand; the -# gate cannot verify them. Tracked upstream. -# # Usage: scripts/run-pgspot.sh FILE [FILE ...] (globs expanded by caller) # # Env: @@ -30,15 +24,13 @@ PGSPOT_VENV="${PGSPOT_VENV:-${XDG_CACHE_HOME:-$HOME/.cache}/pg_durable/pgspot-ve # --- Finding allowlist ----------------------------------------------------- # pgspot prints one line per finding: "PSxxx: <title>: <context> at line N". We -# allow specific findings by exact match rather than suppressing whole codes -# globally (--ignore), so a future unsafe instance of the same code still fails. -# Anything not matched here -- plus unknowns, fatals, and unexplained non-zero -# exits -- fails the gate. Keep this list minimal; prefer fixing the source. +# allow findings by exact match, not by suppressing whole codes (--ignore), so a +# future unsafe instance of the same code still fails. Anything unmatched -- plus +# unknowns, fatals, and unexplained non-zero exits -- fails the gate. PGSPOT_ALLOW=( - # pgrx emits `CREATE SCHEMA IF NOT EXISTS df` from #[pg_schema]; the - # IF NOT EXISTS (what PS010 flags) is not controllable from source. Only df is - # allowed -- any other PS010 still fails. Schemas we control omit IF NOT EXISTS - # (e.g. `CREATE SCHEMA duroxide`) and never trip PS010. + # pgrx emits `CREATE SCHEMA IF NOT EXISTS df` from #[pg_schema]; the IF NOT + # EXISTS (what PS010 flags) isn't controllable from source. Only df is allowed; + # any other PS010 still fails. Schemas we control omit IF NOT EXISTS. '^PS010: Unsafe schema creation: df at line [0-9]+$' )