diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4436c347..ab0fa1cb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -176,6 +176,21 @@ jobs: - name: Run clippy run: cargo clippy --no-default-features --features pg${{ matrix.pg_version }},http-allow-test-domains -- -D warnings + # pgspot SQL security gate (pg17 only; the install SQL is the shipped DDL). + # http-allow-test-domains is a test-only feature that emits no extra DDL, so + # the scanned surface matches the shipped build while reusing this job's build. + - name: Generate extension install SQL + if: matrix.pg_version == 17 + run: | + cargo pgrx schema pg${{ matrix.pg_version }} \ + --no-default-features \ + --features pg${{ matrix.pg_version }},http-allow-test-domains \ + -o /tmp/pg_durable-install.sql + + - name: Run pgspot SQL security gate + if: matrix.pg_version == 17 + run: scripts/pgspot-gate.sh /tmp/pg_durable-install.sql + # Include an http feature to allow http unit tests to run. No time for multiple feature combos. - name: Run unit tests run: cargo pgrx test pg${{ matrix.pg_version }} --features http-allow-test-domains diff --git a/docs/upgrade-testing.md b/docs/upgrade-testing.md index 567b831d..c2cb3072 100644 --- a/docs/upgrade-testing.md +++ b/docs/upgrade-testing.md @@ -158,8 +158,9 @@ Each PR that changes the extension schema or modifies SQL queries in Rust code s 1. Add the necessary DDL to the upgrade script (`sql/pg_durable----.sql`) 2. Ensure the `.so` is backward compatible with **all** previous schemas in the same provider compatibility line (Scenario B1) -3. Add version-specific notes to this document under "Version-Specific Changes" below -4. Pass upgrade tests in CI +3. Keep all new DDL — in the Rust install SQL *and* in any new upgrade script — schema-qualified so it passes the pgspot SQL security gate (`scripts/pgspot-gate.sh`): qualify operators as `OPERATOR(pg_catalog.)`, functions/types/objects by schema (e.g. `pg_catalog.now()`), and qualify references inside anonymous `DO` blocks (they run under the session search_path). New upgrade scripts are gated automatically. +4. Add version-specific notes to this document under "Version-Specific Changes" below +5. Pass upgrade and pgspot tests in CI ### Future work @@ -185,6 +186,15 @@ No additional fixture is needed for subsequent minors — intermediate versions `cargo pgrx package` generates the new major's install SQL. The previous major's install SQL and upgrade scripts are still needed for the A/B2 upgrade chain when the provider line continues across the major bump. B1 will be a no-op if there are no previous compatible versions within the new major, or if `PROVIDER_COMPAT_START_VERSION` marks the new major as the start of a new provider line. +### Upgrade scripts and the pgspot gate + +The pgspot gate scans every upgrade script matching `*--*--*.sql`, except a small +hardcoded list of pre-pgspot legacy scripts in `scripts/pgspot-gate.sh` (authored +before the install DDL was schema-qualified, and immutable now that they're +released). Every new upgrade script is gated and must pass — keep its DDL +schema-qualified (see step 3 above). Scripts written after qualification pass the +gate, so they never need to be added to the exclude list. + --- ## Version-Specific Changes diff --git a/scripts/pgspot-gate.sh b/scripts/pgspot-gate.sh new file mode 100755 index 00000000..a342cd43 --- /dev/null +++ b/scripts/pgspot-gate.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# Copyright (c) Microsoft Corporation. +# Licensed under the PostgreSQL License. + +# pgspot-gate.sh - Project entry point for the pgspot SQL security gate. +# +# Scans the generated install SQL plus every active upgrade script, except the +# pre-pgspot legacy scripts listed below. New scripts are gated automatically. +# +# Usage: scripts/pgspot-gate.sh [INSTALL_SQL] +# INSTALL_SQL install SQL to scan. Optional (omit when no local pgrx install); +# CI always generates and passes it. Without it, only upgrade +# scripts are scanned. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" +SQL_DIR="$PROJECT_DIR/sql" + +install_sql="${1:-}" + +# Released upgrade scripts authored before the install DDL was schema-qualified. +# They are immutable and don't all pass pgspot, so they're excluded. Scripts +# written after qualification pass the gate, so they need no entry here. +EXCLUDE=( + pg_durable--0.1.1--0.2.0.sql + pg_durable--0.2.0--0.2.1.sql + pg_durable--0.2.1--0.2.2.sql +) + +is_excluded() { + local base="$1" e + for e in "${EXCLUDE[@]}"; do + [[ "$base" == "$e" ]] && return 0 + done + return 1 +} + +# Upgrade scripts (basename `*--*--*.sql`; the single-`--` first-version fixture +# never matches), minus the excluded legacy ones. +targets=() +shopt -s nullglob +for f in "$SQL_DIR"/pg_durable--*--*.sql; do + base="$(basename "$f")" + if is_excluded "$base"; then + echo "excluded (pre-pgspot): $base" + continue + fi + targets+=("$f") +done +shopt -u nullglob + +scan=() +if [[ -n "$install_sql" ]]; then + if [[ ! -f "$install_sql" ]]; then + echo "ERROR: install SQL not found: $install_sql" >&2 + exit 2 + fi + scan+=("$install_sql") +else + echo "NOTE: no install SQL provided; scanning upgrade scripts only." >&2 +fi +scan+=("${targets[@]}") + +if [[ ${#scan[@]} -eq 0 ]]; then + echo "ERROR: nothing to scan (no install SQL and no gated upgrade scripts)." >&2 + echo " CI must pass the generated install SQL; an empty scan set fails the gate." >&2 + exit 2 +fi + +exec "$SCRIPT_DIR/run-pgspot.sh" "${scan[@]}" diff --git a/scripts/run-pgspot.sh b/scripts/run-pgspot.sh new file mode 100755 index 00000000..a69c3163 --- /dev/null +++ b/scripts/run-pgspot.sh @@ -0,0 +1,168 @@ +#!/bin/bash +# Copyright (c) Microsoft Corporation. +# Licensed under the PostgreSQL License. + +# run-pgspot.sh - Lint shipped extension SQL with pgspot. +# +# Scans the SQL pg_durable ships (generated install SQL + active upgrade scripts) +# for search_path / privilege-escalation issues (CVE-2018-1058). +# +# A file passes only if every pgspot finding is on the per-finding allowlist +# (PGSPOT_ALLOW); scan_file is fail-closed via two passes (see its comment). +# +# Usage: scripts/run-pgspot.sh FILE [FILE ...] (globs expanded by caller) +# +# Env: +# PGSPOT_VERSION pgspot version to pin (default: 0.9.2) +# PGSPOT_VENV venv dir to install/reuse (default: a cache dir) +# PGSPOT_BIN existing pgspot executable (skips venv setup) + +set -euo pipefail + +PGSPOT_VERSION="${PGSPOT_VERSION:-0.9.2}" +PGSPOT_VENV="${PGSPOT_VENV:-${XDG_CACHE_HOME:-$HOME/.cache}/pg_durable/pgspot-venv}" + +# --- Finding allowlist ----------------------------------------------------- +# pgspot prints one line per finding: "PSxxx: : <context> at line N". We +# allow findings by exact match, not by suppressing whole codes (--ignore), so a +# future unsafe instance of the same code still fails. Anything unmatched -- plus +# unknowns, fatals, and unexplained non-zero exits -- fails the gate. +PGSPOT_ALLOW=( + # pgrx emits `CREATE SCHEMA IF NOT EXISTS df` from #[pg_schema]; the IF NOT + # EXISTS (what PS010 flags) isn't controllable from source. Only df is allowed; + # any other PS010 still fails. Schemas we control omit IF NOT EXISTS. + '^PS010: Unsafe schema creation: df at line [0-9]+$' +) + +# Whole codes to suppress globally (pgspot --ignore). Prefer PGSPOT_ALLOW. Empty. +PGSPOT_IGNORE=() + +# --------------------------------------------------------------------------- + +err() { printf '%s\n' "$*" >&2; } + +# Build the two --ignore sets: GLOBAL (PGSPOT_IGNORE only, used in pass A) and +# ALLOW (GLOBAL + the allowlisted codes, used in pass B). +IGNORE_GLOBAL_ARGS=() +IGNORE_ALLOW_ARGS=() +build_ignore_args() { + local code re + for code in "${PGSPOT_IGNORE[@]:-}"; do + [[ -z "$code" ]] && continue + IGNORE_GLOBAL_ARGS+=(--ignore "$code") + IGNORE_ALLOW_ARGS+=(--ignore "$code") + done + for re in "${PGSPOT_ALLOW[@]:-}"; do + if [[ "$re" =~ (PS[0-9]+) ]]; then + IGNORE_ALLOW_ARGS+=(--ignore "${BASH_REMATCH[1]}") + fi + done +} + +# scan_file FILE -- fail-closed pass/fail against PGSPOT_ALLOW via two passes: +# Pass A: print all findings; every "PSxxx:" line must match the allowlist. +# Catches a disallowed instance of an allowlisted code (e.g. PS010 for a +# non-df schema), which pass B's per-code --ignore would otherwise hide. +# Pass B: ignore the allowlisted codes; the file must exit fully clean. Catches +# unknowns, parse fatals, and findings pgspot reports only via exit code. +# FILE passes iff pass A has no disallowed line AND pass B exits clean. +scan_file() { + local file="$1" + + local outA + # Pass A uses the printed findings, not the exit code; `|| true` keeps set -e + # from aborting when pgspot exits non-zero on a finding. + outA="$("$PGSPOT" "${IGNORE_GLOBAL_ARGS[@]}" "$file" 2>&1)" || true + printf '%s\n' "$outA" + + local disallowed=0 line re ok + while IFS= read -r line; do + [[ "$line" =~ ^PS[0-9]+:\ ]] || continue + ok=0 + for re in "${PGSPOT_ALLOW[@]}"; do + [[ -z "$re" ]] && continue + if [[ "$line" =~ $re ]]; then ok=1; break; fi + done + if [[ $ok -eq 0 ]]; then + disallowed=$((disallowed + 1)) + err " disallowed finding: $line" + fi + done <<< "$outA" + + local rcB=0 + "$PGSPOT" "${IGNORE_ALLOW_ARGS[@]}" "$file" >/dev/null 2>&1 || rcB=$? + + if [[ $disallowed -gt 0 ]]; then + return 1 + fi + if [[ $rcB -ne 0 ]]; then + err " pgspot reports residual findings after ignoring allowlisted codes (unknown/fatal/non-allowlisted); exit $rcB" + return 1 + fi + return 0 +} + +resolve_pgspot() { + if [[ -n "${PGSPOT_BIN:-}" ]]; then + if "$PGSPOT_BIN" --version 2>/dev/null | grep -q "pgspot ${PGSPOT_VERSION}"; then + PGSPOT="$PGSPOT_BIN" + return + fi + err "PGSPOT_BIN=$PGSPOT_BIN is not pgspot ${PGSPOT_VERSION}" + exit 2 + fi + + local venv_bin="$PGSPOT_VENV/bin/pgspot" + if [[ -x "$venv_bin" ]] && "$venv_bin" --version 2>/dev/null | grep -q "pgspot ${PGSPOT_VERSION}"; then + PGSPOT="$venv_bin" + return + fi + + err "Installing pgspot ${PGSPOT_VERSION} into ${PGSPOT_VENV} ..." + python3 -m venv "$PGSPOT_VENV" + "$PGSPOT_VENV/bin/pip" install --quiet --upgrade pip + "$PGSPOT_VENV/bin/pip" install --quiet "pgspot==${PGSPOT_VERSION}" + PGSPOT="$venv_bin" +} + +main() { + if [[ $# -eq 0 ]]; then + err "usage: $0 FILE [FILE ...]" + exit 2 + fi + + resolve_pgspot + build_ignore_args + + local failed=0 + local checked=0 + local file + for file in "$@"; do + if [[ ! -f "$file" ]]; then + err "skip (not found): $file" + continue + fi + checked=$((checked + 1)) + printf '\n=== pgspot: %s ===\n' "$file" + if scan_file "$file"; then + printf 'OK: %s\n' "$file" + else + err "FAIL: $file" + failed=$((failed + 1)) + fi + done + + if [[ $checked -eq 0 ]]; then + err "ERROR: no files were checked" + exit 2 + fi + + printf '\n--- pgspot summary: %d file(s) checked, %d failed ---\n' "$checked" "$failed" + if [[ $failed -ne 0 ]]; then + err "pgspot gate FAILED ($failed file(s) with findings)" + exit 1 + fi + printf 'pgspot gate PASSED\n' +} + +main "$@" diff --git a/src/lib.rs b/src/lib.rs index 662b392e..7dd88d9e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -166,8 +166,8 @@ CREATE TABLE IF NOT EXISTS df.nodes ( error TEXT, submitted_by REGROLE, database TEXT, - created_at TIMESTAMPTZ DEFAULT now(), - updated_at TIMESTAMPTZ DEFAULT now() + created_at TIMESTAMPTZ DEFAULT pg_catalog.now(), + updated_at TIMESTAMPTZ DEFAULT pg_catalog.now() ); COMMENT ON COLUMN df.nodes.submitted_by IS @@ -181,8 +181,8 @@ CREATE TABLE IF NOT EXISTS df.instances ( status TEXT DEFAULT 'pending', submitted_by REGROLE NOT NULL, database TEXT, - created_at TIMESTAMPTZ DEFAULT now(), - updated_at TIMESTAMPTZ DEFAULT now(), + created_at TIMESTAMPTZ DEFAULT pg_catalog.now(), + updated_at TIMESTAMPTZ DEFAULT pg_catalog.now(), completed_at TIMESTAMPTZ ); @@ -200,7 +200,7 @@ CREATE INDEX IF NOT EXISTS idx_nodes_instance ON df.nodes(instance_id); CREATE TABLE IF NOT EXISTS df.vars ( name TEXT NOT NULL, value TEXT, - owner REGROLE NOT NULL DEFAULT quote_ident(current_user)::regrole, + owner REGROLE NOT NULL DEFAULT pg_catalog.quote_ident(current_user)::pg_catalog.regrole, PRIMARY KEY (owner, name) ); @@ -210,17 +210,22 @@ CREATE TABLE IF NOT EXISTS df.vars ( -- recreation even though the extension is always "present" in pg_extension. CREATE TABLE IF NOT EXISTS df._worker_epoch ( epoch_id UUID PRIMARY KEY, - started_at TIMESTAMPTZ DEFAULT now(), - last_seen_at TIMESTAMPTZ DEFAULT now() + started_at TIMESTAMPTZ DEFAULT pg_catalog.now(), + last_seen_at TIMESTAMPTZ DEFAULT pg_catalog.now() ); ALTER TABLE df.instances ADD CONSTRAINT instances_id_format_chk - CHECK (id ~ '^[0-9a-f]{8}$') NOT VALID, + -- Operators (OPERATOR(pg_catalog.<op>)) and functions (e.g. pg_catalog.now) + -- are schema-qualified throughout this install DDL so name resolution never + -- depends on the session search_path -- closing the CVE-2018-1058 vector + -- (a malicious schema shadowing `=`, `~`, etc.). Enforced by the pgspot CI + -- gate (scripts/pgspot-gate.sh). + CHECK (id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, ADD CONSTRAINT instances_root_node_format_chk - CHECK (root_node ~ '^[0-9a-f]{8}$') NOT VALID, + CHECK (root_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, ADD CONSTRAINT instances_status_chk - CHECK (status IN ('pending', 'running', 'completed', 'failed', 'cancelled')) NOT VALID, + CHECK (status OPERATOR(pg_catalog.=) ANY (ARRAY['pending', 'running', 'completed', 'failed', 'cancelled'])) NOT VALID, -- Supports the composite FK from df.nodes that ties node identity to the instance row. ADD CONSTRAINT instances_identity_key UNIQUE (id, submitted_by); @@ -231,35 +236,35 @@ ALTER TABLE df.nodes ADD CONSTRAINT nodes_submitted_by_present_chk CHECK (submitted_by IS NOT NULL) NOT VALID, ADD CONSTRAINT nodes_id_format_chk - CHECK (id ~ '^[0-9a-f]{8}$') NOT VALID, + CHECK (id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, ADD CONSTRAINT nodes_instance_id_format_chk - CHECK (instance_id ~ '^[0-9a-f]{8}$') NOT VALID, + CHECK (instance_id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, ADD CONSTRAINT nodes_left_node_format_chk - CHECK (left_node IS NULL OR left_node ~ '^[0-9a-f]{8}$') NOT VALID, + CHECK (left_node IS NULL OR left_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, ADD CONSTRAINT nodes_right_node_format_chk - CHECK (right_node IS NULL OR right_node ~ '^[0-9a-f]{8}$') NOT VALID, + CHECK (right_node IS NULL OR right_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, ADD CONSTRAINT nodes_node_type_chk - CHECK (node_type IN ('SQL', 'THEN', 'IF', 'JOIN', 'LOOP', 'BREAK', 'RACE', 'SLEEP', 'WAIT_SCHEDULE', 'HTTP', 'SIGNAL')) NOT VALID, + CHECK (node_type OPERATOR(pg_catalog.=) ANY (ARRAY['SQL', 'THEN', 'IF', 'JOIN', 'LOOP', 'BREAK', 'RACE', 'SLEEP', 'WAIT_SCHEDULE', 'HTTP', 'SIGNAL'])) NOT VALID, ADD CONSTRAINT nodes_result_name_chk - CHECK (result_name IS NULL OR result_name ~ '^[A-Za-z_][A-Za-z0-9_]*$') NOT VALID, + CHECK (result_name IS NULL OR result_name OPERATOR(pg_catalog.~) '^[A-Za-z_][A-Za-z0-9_]*$') NOT VALID, ADD CONSTRAINT nodes_status_chk - CHECK (status IN ('pending', 'running', 'completed', 'failed')) NOT VALID, + CHECK (status OPERATOR(pg_catalog.=) ANY (ARRAY['pending', 'running', 'completed', 'failed'])) NOT VALID, ADD CONSTRAINT nodes_result_status_chk - CHECK (result IS NULL OR status IN ('completed', 'failed')) NOT VALID, + CHECK (result IS NULL OR status OPERATOR(pg_catalog.=) ANY (ARRAY['completed', 'failed'])) NOT VALID, ADD CONSTRAINT nodes_structure_chk CHECK ( CASE - WHEN node_type IN ('SQL', 'SLEEP', 'WAIT_SCHEDULE', 'BREAK', 'HTTP', 'SIGNAL') + WHEN node_type OPERATOR(pg_catalog.=) ANY (ARRAY['SQL', 'SLEEP', 'WAIT_SCHEDULE', 'BREAK', 'HTTP', 'SIGNAL']) THEN left_node IS NULL AND right_node IS NULL AND query IS NOT NULL - WHEN node_type = 'THEN' + WHEN node_type OPERATOR(pg_catalog.=) 'THEN' THEN left_node IS NOT NULL AND right_node IS NOT NULL AND query IS NULL - WHEN node_type = 'IF' + WHEN node_type OPERATOR(pg_catalog.=) 'IF' THEN left_node IS NOT NULL AND right_node IS NOT NULL AND query IS NOT NULL - WHEN node_type = 'LOOP' + WHEN node_type OPERATOR(pg_catalog.=) 'LOOP' THEN left_node IS NOT NULL AND right_node IS NULL - WHEN node_type = 'JOIN' + WHEN node_type OPERATOR(pg_catalog.=) 'JOIN' THEN left_node IS NOT NULL AND right_node IS NOT NULL - WHEN node_type = 'RACE' + WHEN node_type OPERATOR(pg_catalog.=) 'RACE' THEN left_node IS NOT NULL AND right_node IS NOT NULL AND query IS NULL ELSE FALSE END @@ -302,24 +307,24 @@ ALTER TABLE df.instances ENABLE ROW LEVEL SECURITY; CREATE POLICY instances_user_isolation ON df.instances FOR ALL - USING (submitted_by = quote_ident(current_user)::regrole) - WITH CHECK (submitted_by = quote_ident(current_user)::regrole); + USING (submitted_by OPERATOR(pg_catalog.=) pg_catalog.quote_ident(current_user)::pg_catalog.regrole) + WITH CHECK (submitted_by OPERATOR(pg_catalog.=) pg_catalog.quote_ident(current_user)::pg_catalog.regrole); -- Enable RLS on df.nodes ALTER TABLE df.nodes ENABLE ROW LEVEL SECURITY; CREATE POLICY nodes_user_isolation ON df.nodes FOR ALL - USING (submitted_by = quote_ident(current_user)::regrole) - WITH CHECK (submitted_by = quote_ident(current_user)::regrole); + USING (submitted_by OPERATOR(pg_catalog.=) pg_catalog.quote_ident(current_user)::pg_catalog.regrole) + WITH CHECK (submitted_by OPERATOR(pg_catalog.=) pg_catalog.quote_ident(current_user)::pg_catalog.regrole); -- Enable RLS on df.vars (per-user variable isolation) ALTER TABLE df.vars ENABLE ROW LEVEL SECURITY; CREATE POLICY vars_user_isolation ON df.vars FOR ALL - USING (owner = quote_ident(current_user)::regrole) - WITH CHECK (owner = quote_ident(current_user)::regrole); + USING (owner OPERATOR(pg_catalog.=) pg_catalog.quote_ident(current_user)::pg_catalog.regrole) + WITH CHECK (owner OPERATOR(pg_catalog.=) pg_catalog.quote_ident(current_user)::pg_catalog.regrole); -- No automatic PUBLIC grants. Admins must explicitly grant privileges -- to application roles after CREATE EXTENSION. @@ -525,12 +530,12 @@ DECLARE wrole TEXT; is_super BOOLEAN; BEGIN - wrole := current_setting('pg_durable.worker_role', true); - IF wrole IS NULL OR wrole = '' THEN + wrole := pg_catalog.current_setting('pg_durable.worker_role', true); + IF wrole IS NULL OR wrole OPERATOR(pg_catalog.=) '' THEN wrole := 'azuresu'; END IF; - SELECT rolsuper INTO is_super FROM pg_roles WHERE rolname = wrole; + SELECT rolsuper INTO is_super FROM pg_catalog.pg_roles WHERE rolname OPERATOR(pg_catalog.=) wrole; IF is_super IS NULL THEN RAISE WARNING 'pg_durable: worker role "%" does not exist. The background worker will not be able to process workflows. Create the role as a superuser before using pg_durable.', wrole; ELSIF NOT is_super THEN @@ -578,12 +583,12 @@ DECLARE target_db TEXT; BEGIN -- Get the current database - SELECT current_database() INTO current_db; + SELECT pg_catalog.current_database() INTO current_db; -- Get the target database that the background worker will connect to SELECT df.target_database() INTO target_db; - IF current_db != target_db THEN + IF current_db OPERATOR(pg_catalog.<>) target_db THEN RAISE EXCEPTION 'pg_durable extension must be created in database "%" (currently in "%"). The background worker only processes functions in the database specified by the pg_durable.database GUC (defaults to "postgres").', target_db, current_db USING HINT = 'Connect to the correct database and run: CREATE EXTENSION pg_durable;'; END IF;