From 64ff7f71626e8d76e91474da288168409c44f544 Mon Sep 17 00:00:00 2001 From: crprashant <5108573+crprashant@users.noreply.github.com> Date: Wed, 10 Jun 2026 21:25:27 -0700 Subject: [PATCH 1/4] Move DSL operators from public to df schema (#202) pgspot flagged the 7 DSL operators (~>, |=>, &, |, ?>, !>, @>) as created in the public schema (PS017). Move them into the df schema to avoid polluting public. Operators in df.start('a' ~> 'b') resolve in the caller's session before df.start()/df.explain() run, so df must be on the session search_path for the unqualified operator syntax. This is a documented, pre-1.0 behavior change. - Qualify CREATE OPERATOR as df. in src/lib.rs (fresh install) - Move operators in the 0.2.2->0.2.3 upgrade script (DROP public, CREATE df) - Add df to database search_path in E2E runners (local + docker) - Add search_path GUC to unit-test config (postgresql_conf_options) - Remove obsolete pgspot PS017 allowlist entry - Update docs, examples, CHANGELOG, and upgrade-testing notes --- .agents/skills/pg-durable-sql/SKILL.md | 1 + .github/copilot-instructions.md | 2 + CHANGELOG.md | 6 ++ README.md | 7 +- USER_GUIDE.md | 15 +++ docs/ARCHITECTURE.md | 17 ++-- docs/api-reference.md | 2 + docs/grammar.md | 2 + docs/upgrade-testing.md | 7 ++ .../azure-functions/sql/03_start_workflow.sql | 4 + .../services/cognitive-services/test.sql | 4 + .../services/cosmos-db/test.sql | 4 + .../services/function-app/test.sql | 4 + .../services/key-vault/test.sql | 4 + .../services/service-bus/test.sql | 4 + .../services/storage-account/test.sql | 4 + examples/invoice-approval/sql/04_explain.sql | 4 + .../sql/05_start_workflow.sql | 4 + .../01_autovacuum_blocked.sql | 4 + .../02_database_bloat.sql | 4 + .../03_wraparound_risk.sql | 4 + .../04_tables_not_vacuumed.sql | 4 + examples/operational-scenarios/README.md | 5 + scripts/run-pgspot.sh | 4 - scripts/test-e2e-docker.sh | 8 ++ scripts/test-e2e-local.sh | 9 ++ sql/pg_durable--0.2.2--0.2.3.sql | 96 +++++++++++++++++-- src/lib.rs | 26 +++-- 28 files changed, 230 insertions(+), 29 deletions(-) diff --git a/.agents/skills/pg-durable-sql/SKILL.md b/.agents/skills/pg-durable-sql/SKILL.md index f5adcbf4..e0547e03 100644 --- a/.agents/skills/pg-durable-sql/SKILL.md +++ b/.agents/skills/pg-durable-sql/SKILL.md @@ -15,6 +15,7 @@ Generate correct, idiomatic pg_durable durable function SQL using the `df.*` sch 4. **Operators are SQL-level custom operators.** They work on `TEXT` operands. Parentheses control grouping. 5. **`df.setvar()` must be called BEFORE `df.start()`.** Variables are captured at start time and are immutable during execution. 6. **Two variable syntaxes:** `{varname}` for durable function variables (from `df.setvar`), `$name` for result captures (from `|=>`). Do NOT mix them up. +7. **Operators live in the `df` schema.** They are resolved in the calling session, so `df` must be on the `search_path` (e.g. `SET search_path TO "$user", public, df;`) for the unqualified operator syntax to work. Functions (`df.seq`, `df.join`, …) are always schema-qualified and work regardless. ## Operators — Complete Reference diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index dffd867e..747ea7be 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -72,6 +72,8 @@ pub const NAME: &str = "pg_durable::activity::execute-sql"; ### DSL Creates Graph Nodes DSL functions like `df.sql()` insert rows into `df.nodes`. The `Durofut` struct represents a node reference passed between operators. +The DSL operators (`~>`, `|=>`, `&`, `|`, `?>`, `!>`, `@>`) are created in the `df` schema (see `extension_sql!` in [src/lib.rs](../src/lib.rs)). They are resolved in the caller's session before `df.start()`/`df.explain()` run, so `df` must be on the session `search_path` for unqualified operator syntax. The E2E runners set this at the database level after `CREATE EXTENSION`. + ### E2E Test Structure Tests in `tests/e2e/sql/` follow this pattern: 1. Create temp state table, call `df.start()` diff --git a/CHANGELOG.md b/CHANGELOG.md index dc7f79a2..91453c53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ Pre-1.0 note: while `pg_durable` is in major version `0`, minor releases may include breaking changes. +## [0.2.3] - unreleased + +### Breaking Changes + +- **DSL operators moved from `public` to the `df` schema (#202):** The seven DSL operators (`~>`, `|=>`, `&`, `|`, `?>`, `!>`, `@>`) are now created in the `df` schema instead of `public`, so they no longer pollute the public namespace (resolving a pgspot PS017 finding). Because operators are resolved in the calling session before `df.start()`/`df.explain()` run, the unqualified operator syntax now requires `df` on the session `search_path` (e.g. `SET search_path TO "$user", public, df;`, or `ALTER ROLE`/`ALTER DATABASE ... SET search_path`). The `df.*` function forms (`df.seq`, `df.join`, …) are unaffected. Existing installs move the operators when they run `ALTER EXTENSION pg_durable UPDATE`; a non-upgraded `.so` keeps the operators in `public` and continues to work unchanged. + ## [0.2.2] - 2026-05-28 First open-source release of `pg_durable` on GitHub under the PostgreSQL License. diff --git a/README.md b/README.md index e735f442..722ba5b9 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,9 @@ The model is intentionally SQL-shaped. If a step needs arbitrary code, a non-HTT ## Quick Example ```sql +-- Add the df schema to your search_path so the DSL operators resolve +SET search_path TO "$user", public, df; + -- A durable function that processes data in steps SELECT df.start( 'SELECT id FROM documents WHERE processed = false LIMIT 100' |=> 'batch' @@ -101,6 +104,8 @@ SELECT df.start( ); ``` +> The DSL operators (`~>`, `|=>`, `&`, `|`, `?>`, `!>`, `@>`) live in the `df` schema. Add `df` to your `search_path` (as above) to use the unqualified syntax. See the [User Guide](USER_GUIDE.md#enable-the-extension) for details. + ## Packages Tagged releases publish Debian packages for PostgreSQL 17 and 18 on amd64 from the GitHub release assets. Packages are named `pg-durable-postgresql-_-1_.deb` and install the extension library, control file, and SQL upgrade files into the matching PostgreSQL installation directories. @@ -137,7 +142,7 @@ After installing a package, add `pg_durable` to `shared_preload_libraries`, rest CREATE EXTENSION pg_durable; ``` -The default pg_durable database is `postgres`; see [User Guide](USER_GUIDE.md) for background worker configuration and privilege setup. +The default pg_durable database is `postgres`; see [User Guide](USER_GUIDE.md) for background worker configuration and privilege setup. To use the unqualified DSL operator syntax, add the `df` schema to your `search_path` (e.g. `ALTER DATABASE postgres SET search_path = "$user", public, df;`). Each release also publishes source archives for building from source and a `SHA256SUMS` file for verifying downloaded assets. diff --git a/USER_GUIDE.md b/USER_GUIDE.md index 12efe7d9..bc1869b7 100644 --- a/USER_GUIDE.md +++ b/USER_GUIDE.md @@ -81,6 +81,19 @@ SELECT df.grant_usage('app_role'); After `CREATE EXTENSION`, the background worker initializes the engine schema asynchronously (normally within a few seconds). Until initialization completes, `df.*` functions will return: `"pg_durable background worker not yet initialized — try again in a moment"`. Simply retry after a short delay. +> ℹ️ **Using the DSL operators?** The DSL operators (`~>`, `|=>`, `&`, `|`, `?>`, `!>`, `@>`) live in the `df` schema, and they are resolved in your session *before* `df.start()`/`df.explain()` run. To use the unqualified operator syntax, add `df` to your `search_path`: +> +> ```sql +> -- Per session +> SET search_path TO "$user", public, df; +> +> -- Or persist it for a role / database +> ALTER ROLE app_role SET search_path = "$user", public, df; +> ALTER DATABASE mydb SET search_path = "$user", public, df; +> ``` +> +> Alternatively, schema-qualify each operator with `OPERATOR(df.~>)` syntax — but adding `df` to `search_path` is far more ergonomic. Other `df.*` functions are always called schema-qualified and work without this. + > ⚠️ **Important**: If you include `pg_durable` in `shared_preload_libraries` but don't create the extension, the worker will remain idle and durable functions cannot execute. ### Your First Durable Function @@ -197,6 +210,8 @@ df.sql('SELECT 1') ~> df.sql('SELECT 2') ### Operators +> **Note:** Operators live in the `df` schema. Using the unqualified syntax below requires `df` on your `search_path` (see [Enable the Extension](#enable-the-extension)). Without it, use the explicit `OPERATOR(df.~>)` form. + | Operator | Name | Description | Example | |----------|------|-------------|---------| | `~>` | Sequence | Run left, then right | `'SELECT 1' ~> 'SELECT 2'` | diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 727aa13f..12ab2f11 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -303,38 +303,41 @@ This allows users to write `'SELECT 1' ~> 'SELECT 2'` instead of `df.sql('SELECT ### SQL Operators -Operators are syntactic sugar that call DSL functions: +Operators are syntactic sugar that call DSL functions. They are created in the +`df` schema (alongside the functions they wrap), so callers need `df` on their +`search_path` to use the unqualified operator syntax — operators are resolved in +the calling session before `df.start()`/`df.explain()` run: ```sql -- src/lib.rs (extension_sql!) -- Sequence: a ~> b calls df.seq(a, b) -CREATE OPERATOR ~> ( +CREATE OPERATOR df.~> ( FUNCTION = df.seq, LEFTARG = text, RIGHTARG = text ); -- Name result: a |=> 'name' calls df.as_op(a, name) -CREATE OPERATOR |=> ( +CREATE OPERATOR df.|=> ( FUNCTION = df.as_op, LEFTARG = text, RIGHTARG = text ); -- Parallel join: a & b calls df.join(a, b) -CREATE OPERATOR & ( +CREATE OPERATOR df.& ( FUNCTION = df.join, LEFTARG = text, RIGHTARG = text ); -- Conditional: cond ?> then !> else -CREATE OPERATOR ?> (FUNCTION = df.if_then_op, ...); -CREATE OPERATOR !> (FUNCTION = df.if_else_op, ...); +CREATE OPERATOR df.?> (FUNCTION = df.if_then_op, ...); +CREATE OPERATOR df.!> (FUNCTION = df.if_else_op, ...); -- Loop prefix: @> body calls df.loop(body) -CREATE OPERATOR @> (FUNCTION = df.loop_prefix_op, RIGHTARG = text); +CREATE OPERATOR df.@> (FUNCTION = df.loop_prefix_op, RIGHTARG = text); ``` ### Node Insertion diff --git a/docs/api-reference.md b/docs/api-reference.md index 05fb16ab..951c9be1 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -18,6 +18,8 @@ Parameters marked with ✅ **Auto-wrap** accept either: Parameters marked with ❌ **Literal** expect a literal value (not auto-wrapped). +> **Operators and `search_path`:** The operator forms documented below (`~>`, `|=>`, `&`, `|`, `?>`, `!>`, `@>`) are defined in the `df` schema and are resolved in the calling session. Add `df` to your `search_path` (e.g. `SET search_path TO "$user", public, df;`) to use them unqualified, or call the equivalent `df.*` function form (always schema-qualified). + --- ## Node Functions diff --git a/docs/grammar.md b/docs/grammar.md index 20411881..fb6cf246 100644 --- a/docs/grammar.md +++ b/docs/grammar.md @@ -94,6 +94,8 @@ From highest to lowest binding: | 5 | `?>` `!>` | right | Conditional (if-then-else) | | 6 (lowest) | `@>` | prefix | Loop (forever) | +> **Schema:** These operators are defined in the `df` schema. They are resolved in the calling session, so `df` must be on your `search_path` (e.g. `SET search_path TO "$user", public, df;`) to use the unqualified syntax shown below. The `df.*` function forms are always schema-qualified and need no `search_path` change. + ## Examples ### Sequence diff --git a/docs/upgrade-testing.md b/docs/upgrade-testing.md index 51059edd..4aa30c0a 100644 --- a/docs/upgrade-testing.md +++ b/docs/upgrade-testing.md @@ -205,6 +205,13 @@ what the upgrade script handles, and any backward compatibility considerations. ### v0.2.2 → v0.2.3 +#### Move DSL operators from `public` to the `df` schema (#202) +- **DDL change (df schema):** Fresh 0.2.3 installs create the seven DSL operators (`~>`, `|=>`, `&`, `|`, `?>`, `!>`, `@>`) as `CREATE OPERATOR df.` in `src/lib.rs` (previously unqualified, i.e. in `public`). The upgrade script `sql/pg_durable--0.2.2--0.2.3.sql` runs `DROP OPERATOR IF EXISTS public. (...)` followed by `CREATE OPERATOR df. (...)` for all seven, pointing at the same `df` helper functions (`df.seq`, `df.as_op`, `df.join`, `df.race`, `df.if_then_op`, `df.if_else_op`, `df.loop_prefix_op`). The prefix loop operator uses signature `(none, text)`. +- **Behavior change (search_path):** Operators are resolved in the caller's session before `df.start()`/`df.explain()` execute, so the unqualified operator syntax now requires `df` on the session `search_path`. This is a documented, pre-1.0 breaking change; the `df.*` function forms are unaffected. The E2E runners (`scripts/test-e2e-local.sh`, `scripts/test-e2e-docker.sh`) set `search_path = "$user", public, df` at the database level after `CREATE EXTENSION` so the suite's unqualified operator usage resolves. +- **Scenario A considerations:** The Scenario A equivalence contract covers the `df` schema. After this change the operators are members of `df` on both the fresh-install and upgrade paths (fresh install creates them in `df`; the upgrade drops the `public` operators and recreates them in `df`), so the operator set in `df` matches. The now-removed pgspot PS017 allowlist entry in `scripts/run-pgspot.sh` is no longer needed because qualified `CREATE OPERATOR df.` does not trigger PS017. +- **Scenario B1 considerations:** The new `.so` works against all previous schemas. No Rust code hard-references the operator schema; operators are pure SQL-level sugar resolved by `search_path`. Against a non-upgraded ≤0.2.2 schema the operators still live in `public` (on the caller's default `search_path`), so existing callers keep working unchanged. After `ALTER EXTENSION pg_durable UPDATE`, the operators move to `df` and callers must add `df` to `search_path`. +- **Scenario B2 considerations:** No data migration. The change only drops and recreates operator objects (catalog metadata); `df.instances`, `df.nodes`, `df.vars`, and the provider schema are untouched. + #### Rename duroxide provider schema to `_duroxide` for fresh installs - **DDL change (df schema):** Adds `df.duroxide_schema()`, an `IMMUTABLE`/`PARALLEL SAFE` SQL function that returns the name of the schema holding the duroxide provider objects. Fresh 0.2.3 installs create the function (in `src/lib.rs`) returning `'_duroxide'`; the upgrade script `sql/pg_durable--0.2.2--0.2.3.sql` creates the same function returning `'duroxide'` so pre-existing installs keep using the legacy schema. Both bodies set `search_path = pg_catalog, pg_temp` to satisfy the pgspot gate. - **DDL change (provider schema):** Fresh installs now run `CREATE SCHEMA _duroxide` (was `CREATE SCHEMA duroxide`). The upgrade script does **not** rename, drop, or move the existing `duroxide` schema — renaming an in-use provider schema would orphan the BGW's durable state. Upgraded installs therefore continue to use `duroxide`. diff --git a/examples/azure-functions/sql/03_start_workflow.sql b/examples/azure-functions/sql/03_start_workflow.sql index af028d7c..9eabe774 100644 --- a/examples/azure-functions/sql/03_start_workflow.sql +++ b/examples/azure-functions/sql/03_start_workflow.sql @@ -1,6 +1,10 @@ -- Copyright (c) Microsoft Corporation. -- Licensed under the PostgreSQL License. +-- The DSL operators (~>, |=>, &, |, ?>, !>, @>) live in the df schema. Add df +-- to the session search_path so the unqualified operator syntax below resolves. +SET search_path TO "$user", public, df; + -- Start one workflow instance that: -- 1) Reads one pending document -- 2) Calls Azure Function via df.http() diff --git a/examples/azure-http-domains/services/cognitive-services/test.sql b/examples/azure-http-domains/services/cognitive-services/test.sql index ebc298d5..4af24784 100644 --- a/examples/azure-http-domains/services/cognitive-services/test.sql +++ b/examples/azure-http-domains/services/cognitive-services/test.sql @@ -1,6 +1,10 @@ -- Copyright (c) Microsoft Corporation. -- Licensed under the PostgreSQL License. +-- The DSL operators (~>, |=>, &, |, ?>, !>, @>) live in the df schema. Add df +-- to the session search_path so the unqualified operator syntax below resolves. +SET search_path TO "$user", public, df; + -- Azure Cognitive Services domain test -- Covers: .cognitiveservices.azure.com -- diff --git a/examples/azure-http-domains/services/cosmos-db/test.sql b/examples/azure-http-domains/services/cosmos-db/test.sql index f84c1d4b..3779ac77 100644 --- a/examples/azure-http-domains/services/cosmos-db/test.sql +++ b/examples/azure-http-domains/services/cosmos-db/test.sql @@ -1,6 +1,10 @@ -- Copyright (c) Microsoft Corporation. -- Licensed under the PostgreSQL License. +-- The DSL operators (~>, |=>, &, |, ?>, !>, @>) live in the df schema. Add df +-- to the session search_path so the unqualified operator syntax below resolves. +SET search_path TO "$user", public, df; + -- Azure Cosmos DB domain test -- Covers: .documents.azure.com -- diff --git a/examples/azure-http-domains/services/function-app/test.sql b/examples/azure-http-domains/services/function-app/test.sql index 0c8c121b..936a4cc4 100644 --- a/examples/azure-http-domains/services/function-app/test.sql +++ b/examples/azure-http-domains/services/function-app/test.sql @@ -1,6 +1,10 @@ -- Copyright (c) Microsoft Corporation. -- Licensed under the PostgreSQL License. +-- The DSL operators (~>, |=>, &, |, ?>, !>, @>) live in the df schema. Add df +-- to the session search_path so the unqualified operator syntax below resolves. +SET search_path TO "$user", public, df; + -- Azure Function App domain test -- Covers: .azurewebsites.net -- diff --git a/examples/azure-http-domains/services/key-vault/test.sql b/examples/azure-http-domains/services/key-vault/test.sql index a819a56c..9e22e350 100644 --- a/examples/azure-http-domains/services/key-vault/test.sql +++ b/examples/azure-http-domains/services/key-vault/test.sql @@ -1,6 +1,10 @@ -- Copyright (c) Microsoft Corporation. -- Licensed under the PostgreSQL License. +-- The DSL operators (~>, |=>, &, |, ?>, !>, @>) live in the df schema. Add df +-- to the session search_path so the unqualified operator syntax below resolves. +SET search_path TO "$user", public, df; + -- Azure Key Vault domain test -- Covers: .vault.azure.net -- diff --git a/examples/azure-http-domains/services/service-bus/test.sql b/examples/azure-http-domains/services/service-bus/test.sql index a281b1a9..fbfe5f2e 100644 --- a/examples/azure-http-domains/services/service-bus/test.sql +++ b/examples/azure-http-domains/services/service-bus/test.sql @@ -1,6 +1,10 @@ -- Copyright (c) Microsoft Corporation. -- Licensed under the PostgreSQL License. +-- The DSL operators (~>, |=>, &, |, ?>, !>, @>) live in the df schema. Add df +-- to the session search_path so the unqualified operator syntax below resolves. +SET search_path TO "$user", public, df; + -- Azure Service Bus domain test -- Covers: .servicebus.windows.net -- diff --git a/examples/azure-http-domains/services/storage-account/test.sql b/examples/azure-http-domains/services/storage-account/test.sql index 0a50d140..3ebdd417 100644 --- a/examples/azure-http-domains/services/storage-account/test.sql +++ b/examples/azure-http-domains/services/storage-account/test.sql @@ -1,6 +1,10 @@ -- Copyright (c) Microsoft Corporation. -- Licensed under the PostgreSQL License. +-- The DSL operators (~>, |=>, &, |, ?>, !>, @>) live in the df schema. Add df +-- to the session search_path so the unqualified operator syntax below resolves. +SET search_path TO "$user", public, df; + -- Azure Storage Account domain tests -- -- Covers: .blob.core.windows.net, .blob.storage.azure.net, diff --git a/examples/invoice-approval/sql/04_explain.sql b/examples/invoice-approval/sql/04_explain.sql index 535f3958..136fd7a9 100644 --- a/examples/invoice-approval/sql/04_explain.sql +++ b/examples/invoice-approval/sql/04_explain.sql @@ -1,6 +1,10 @@ -- Copyright (c) Microsoft Corporation. -- Licensed under the PostgreSQL License. +-- The DSL operators (~>, |=>, &, |, ?>, !>, @>) live in the df schema. Add df +-- to the session search_path so the unqualified operator syntax below resolves. +SET search_path TO "$user", public, df; + -- Invoice Approval Pipeline — main durable function -- -- This starts an infinite-loop workflow that: diff --git a/examples/invoice-approval/sql/05_start_workflow.sql b/examples/invoice-approval/sql/05_start_workflow.sql index 11d75fb9..49f48149 100644 --- a/examples/invoice-approval/sql/05_start_workflow.sql +++ b/examples/invoice-approval/sql/05_start_workflow.sql @@ -1,6 +1,10 @@ -- Copyright (c) Microsoft Corporation. -- Licensed under the PostgreSQL License. +-- The DSL operators (~>, |=>, &, |, ?>, !>, @>) live in the df schema. Add df +-- to the session search_path so the unqualified operator syntax below resolves. +SET search_path TO "$user", public, df; + -- Invoice Approval Pipeline — start the workflow -- -- Prerequisites: diff --git a/examples/operational-scenarios/01_autovacuum_blocked.sql b/examples/operational-scenarios/01_autovacuum_blocked.sql index 86abff7d..1211b03f 100644 --- a/examples/operational-scenarios/01_autovacuum_blocked.sql +++ b/examples/operational-scenarios/01_autovacuum_blocked.sql @@ -58,6 +58,10 @@ CREATE TABLE autovacuum_remediation_log ( -- Start the durable function: detect → branch on blockers → remediate or vacuum directly CREATE TEMP TABLE _scenario1_state (instance_id TEXT); +-- The DSL sequence (~>) and conditional (?> / !>) operators live in the df +-- schema. Add df to the session search_path so the unqualified syntax resolves. +SET search_path TO "$user", public, df; + INSERT INTO _scenario1_state SELECT df.start( -- Step 1: Log all autovacuum blockers into the diagnostics table diff --git a/examples/operational-scenarios/02_database_bloat.sql b/examples/operational-scenarios/02_database_bloat.sql index 2c4c8ed1..0d26f554 100644 --- a/examples/operational-scenarios/02_database_bloat.sql +++ b/examples/operational-scenarios/02_database_bloat.sql @@ -37,6 +37,10 @@ CREATE TABLE bloat_remediation_log ( -- Start the durable function: detect bloat → log blockers → branch → remediate or vacuum CREATE TEMP TABLE _scenario2_state (instance_id TEXT); +-- The DSL sequence (~>) and conditional (?> / !>) operators live in the df +-- schema. Add df to the session search_path so the unqualified syntax resolves. +SET search_path TO "$user", public, df; + INSERT INTO _scenario2_state SELECT df.start( -- Step 1: Identify bloated tables (dead tuple ratio > 20% as proxy for bloat) diff --git a/examples/operational-scenarios/03_wraparound_risk.sql b/examples/operational-scenarios/03_wraparound_risk.sql index 24d82ba2..fe55672a 100644 --- a/examples/operational-scenarios/03_wraparound_risk.sql +++ b/examples/operational-scenarios/03_wraparound_risk.sql @@ -85,6 +85,10 @@ CREATE TABLE wraparound_action_log ( -- Start the durable function: detect DB risk → find tables → branch on blockers → freeze CREATE TEMP TABLE _scenario3_state (instance_id TEXT); +-- The DSL sequence (~>) and conditional (?> / !>) operators live in the df +-- schema. Add df to the session search_path so the unqualified syntax resolves. +SET search_path TO "$user", public, df; + INSERT INTO _scenario3_state SELECT df.start( -- Step 1: Log database-level transaction ages diff --git a/examples/operational-scenarios/04_tables_not_vacuumed.sql b/examples/operational-scenarios/04_tables_not_vacuumed.sql index 7f8c0b9b..ad052e8d 100644 --- a/examples/operational-scenarios/04_tables_not_vacuumed.sql +++ b/examples/operational-scenarios/04_tables_not_vacuumed.sql @@ -77,6 +77,10 @@ CREATE TABLE stale_vacuum_action_log ( -- Start the durable function: find stale tables → detect blockers → branch → vacuum CREATE TEMP TABLE _scenario4_state (instance_id TEXT); +-- The DSL sequence (~>) and conditional (?> / !>) operators live in the df +-- schema. Add df to the session search_path so the unqualified syntax resolves. +SET search_path TO "$user", public, df; + INSERT INTO _scenario4_state SELECT df.start( -- Step 1: Identify tables not vacuumed in the last 7 days diff --git a/examples/operational-scenarios/README.md b/examples/operational-scenarios/README.md index 623113b3..9dfddd21 100644 --- a/examples/operational-scenarios/README.md +++ b/examples/operational-scenarios/README.md @@ -22,6 +22,11 @@ Each scenario file is a standalone SQL script that can be run against a PostgreS # Connect to your database psql -h -U -d +# The DSL operators (~>, ?>, !>) used below live in the df schema, so add it +# to your search_path for the current session (each scenario file also sets +# this itself): +SET search_path TO "$user", public, df; + # Run the common prerequisite to check for blockers \i examples/operational-scenarios/00_common_prerequisite.sql diff --git a/scripts/run-pgspot.sh b/scripts/run-pgspot.sh index 74a75ed1..a69c3163 100755 --- a/scripts/run-pgspot.sh +++ b/scripts/run-pgspot.sh @@ -32,10 +32,6 @@ PGSPOT_ALLOW=( # EXISTS (what PS010 flags) isn't controllable from source. Only df is allowed; # any other PS010 still fails. Schemas we control omit IF NOT EXISTS. '^PS010: Unsafe schema creation: df at line [0-9]+$' - # pg_durable's DSL intentionally exposes unqualified custom operators (for - # example, `df.sql(...) ~> df.sql(...)`) so users do not need df in search_path. - # pgspot reports the generated CREATE OPERATOR name as an unqualified object. - '^PS017: Unqualified object reference: ~> at line [0-9]+$' ) # Whole codes to suppress globally (pgspot --ignore). Prefer PGSPOT_ALLOW. Empty. diff --git a/scripts/test-e2e-docker.sh b/scripts/test-e2e-docker.sh index 5be0f7ef..29e56f60 100755 --- a/scripts/test-e2e-docker.sh +++ b/scripts/test-e2e-docker.sh @@ -163,6 +163,14 @@ if ! docker exec "$CONTAINER_NAME" psql -U postgres -c "CREATE EXTENSION IF NOT exit 1 fi +# The DSL operators (~>, |=>, &, |, ?>, !>, @>) live in the df schema and are +# resolved in the caller's session before df.start()/df.explain() run, so df +# must be on the database search_path for the unqualified operator syntax used +# throughout the E2E suite to resolve. Set it at the database level so every +# test session picks it up at connection time. +docker exec "$CONTAINER_NAME" psql -U postgres \ + -c "ALTER DATABASE postgres SET search_path = \"\$user\", public, df;" 2>&1 + # Show version echo -n "pg_durable version: " VERSION=$(docker exec "$CONTAINER_NAME" psql -U postgres -t -c "SELECT df.version();" 2>&1) diff --git a/scripts/test-e2e-local.sh b/scripts/test-e2e-local.sh index 6b51b0d2..e1624578 100755 --- a/scripts/test-e2e-local.sh +++ b/scripts/test-e2e-local.sh @@ -429,6 +429,15 @@ DROP EXTENSION IF EXISTS pg_durable CASCADE; CREATE EXTENSION pg_durable; COMMIT; SQL + + # The DSL operators (~>, |=>, &, |, ?>, !>, @>) live in the df schema and + # are resolved in the caller's session before df.start()/df.explain() run, + # so df must be on the database search_path for the unqualified operator + # syntax used throughout the E2E suite to resolve. Set it at the database + # level so every test session (including isolated single-test runs that skip + # 00_setup_playground.sql) picks it up at connection time. + "$PSQL" -h localhost -p "$PG_PORT" -U "$PG_USER" -d "$PG_DB" \ + -c "ALTER DATABASE \"${PG_DB}\" SET search_path = \"\$user\", public, df" >/dev/null 2>&1 } configure_phase() { diff --git a/sql/pg_durable--0.2.2--0.2.3.sql b/sql/pg_durable--0.2.2--0.2.3.sql index 470d4d17..436c8c58 100644 --- a/sql/pg_durable--0.2.2--0.2.3.sql +++ b/sql/pg_durable--0.2.2--0.2.3.sql @@ -3,19 +3,95 @@ -- pg_durable upgrade: 0.2.2 → 0.2.3 -- --- Introduces df.duroxide_schema(), a helper that reports which schema holds the --- duroxide provider objects for this install. Fresh 0.2.3 installs create the --- provider objects in the '_duroxide' schema (see lib.rs). Installs upgrading --- from <= 0.2.2 already have their provider objects in the legacy 'duroxide' --- schema and must keep using it — renaming an in-use schema would orphan the --- background worker's durable state. This upgrade therefore defines --- df.duroxide_schema() to return 'duroxide' for pre-existing installs. +-- 1. Introduces df.duroxide_schema(), a helper that reports which schema holds +-- the duroxide provider objects for this install. Fresh 0.2.3 installs create +-- the provider objects in the '_duroxide' schema (see lib.rs). Installs +-- upgrading from <= 0.2.2 already have their provider objects in the legacy +-- 'duroxide' schema and must keep using it — renaming an in-use schema would +-- orphan the background worker's durable state. This upgrade therefore defines +-- df.duroxide_schema() to return 'duroxide' for pre-existing installs. -- --- Backend sessions and the background worker call df.duroxide_schema() to learn --- which schema to use, falling back to 'duroxide' when the helper is absent --- (installs predating it). No schema rename, drop, or data movement occurs. +-- Backend sessions and the background worker call df.duroxide_schema() to learn +-- which schema to use, falling back to 'duroxide' when the helper is absent +-- (installs predating it). No schema rename, drop, or data movement occurs. +-- +-- 2. Moves the seven DSL operators from public into df (issue #202). See the +-- operator block below for the rationale and the search_path implication. CREATE FUNCTION df.duroxide_schema() RETURNS text LANGUAGE sql IMMUTABLE PARALLEL SAFE SET search_path = pg_catalog, pg_temp AS $$ SELECT 'duroxide'::text $$; + +-- --------------------------------------------------------------------------- +-- Move the DSL operators from the public schema into df (issue #202). +-- +-- pg_durable <= 0.2.2 created its seven DSL operators in the public schema, +-- polluting the public namespace (and flagged by pgspot). Fresh 0.2.3 installs +-- create them in df (see src/lib.rs); this block relocates them for installs +-- upgrading from <= 0.2.2. +-- +-- The helper functions the operators bind to (df.as_op, df.if_then_op, +-- df.if_else_op, df.loop_prefix_op) already live in df from earlier versions, +-- so only the operators themselves move. +-- +-- Behavior change: because an expression like `'a' ~> 'b'` is resolved in the +-- caller's session before df.start()/df.explain() see it, the unqualified +-- operator syntax now requires `df` on the session search_path (for example, +-- `SET search_path = "$user", public, df;`). The schema-qualified df.*() +-- functions (df.seq, df.as, df.join, df.race, df.if, df.loop) are unaffected. +-- --------------------------------------------------------------------------- +DROP OPERATOR IF EXISTS public.~> (text, text); +DROP OPERATOR IF EXISTS public.|=> (text, text); +DROP OPERATOR IF EXISTS public.& (text, text); +DROP OPERATOR IF EXISTS public.| (text, text); +DROP OPERATOR IF EXISTS public.?> (text, text); +DROP OPERATOR IF EXISTS public.!> (text, text); +DROP OPERATOR IF EXISTS public.@> (none, text); + +-- Sequencing: a ~> b means "run a, then run b" +CREATE OPERATOR df.~> ( + FUNCTION = df.seq, + LEFTARG = text, + RIGHTARG = text +); + +-- Naming: fut |=> 'name' means "name this result as $name" +CREATE OPERATOR df.|=> ( + FUNCTION = df.as_op, + LEFTARG = text, + RIGHTARG = text +); + +-- Parallel join: a & b means "run a and b in parallel, wait for both" +CREATE OPERATOR df.& ( + FUNCTION = df.join, + LEFTARG = text, + RIGHTARG = text +); + +-- Race: a | b means "run a and b in parallel, first wins" +CREATE OPERATOR df.| ( + FUNCTION = df.race, + LEFTARG = text, + RIGHTARG = text +); + +-- If-then / if-else: cond ?> then_branch !> else_branch +CREATE OPERATOR df.?> ( + FUNCTION = df.if_then_op, + LEFTARG = text, + RIGHTARG = text +); + +CREATE OPERATOR df.!> ( + FUNCTION = df.if_else_op, + LEFTARG = text, + RIGHTARG = text +); + +-- Loop (prefix): @> body means "repeat body forever" +CREATE OPERATOR df.@> ( + FUNCTION = df.loop_prefix_op, + RIGHTARG = text +); diff --git a/src/lib.rs b/src/lib.rs index 72ddad44..e0af3dc7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -648,11 +648,19 @@ CREATE FUNCTION df.duroxide_schema() RETURNS text // ============================================================================ // SQL Operators // ============================================================================ +// +// The DSL operators are created in the `df` schema (not `public`) so the +// extension does not pollute the public namespace (issue #202). Because an +// expression such as `'a' ~> 'b'` is resolved in the *caller's* session before +// df.start()/df.explain() ever see it, the unqualified operator syntax only +// resolves when `df` is on the session search_path. Users add df to their +// search_path (e.g. `SET search_path = "$user", public, df;`) or invoke the +// equivalent df.*() functions, which are always schema-qualified. extension_sql!( r#" -- Operator ~> for sequencing: a ~> b means "run a, then run b" -CREATE OPERATOR ~> ( +CREATE OPERATOR df.~> ( FUNCTION = df.seq, LEFTARG = text, RIGHTARG = text @@ -663,21 +671,21 @@ CREATE OR REPLACE FUNCTION df.as_op(fut text, name text) RETURNS text AS $$ SELECT df.as(fut, name); $$ LANGUAGE SQL IMMUTABLE SET search_path = pg_catalog, df, pg_temp; -CREATE OPERATOR |=> ( +CREATE OPERATOR df.|=> ( FUNCTION = df.as_op, LEFTARG = text, RIGHTARG = text ); -- Operator & for parallel join: a & b means "run a and b in parallel, wait for both" -CREATE OPERATOR & ( +CREATE OPERATOR df.& ( FUNCTION = df.join, LEFTARG = text, RIGHTARG = text ); -- Operator | for race: a | b means "run a and b in parallel, first wins" -CREATE OPERATOR | ( +CREATE OPERATOR df.| ( FUNCTION = df.race, LEFTARG = text, RIGHTARG = text @@ -765,13 +773,13 @@ BEGIN END; $$ LANGUAGE plpgsql IMMUTABLE SET search_path = pg_catalog, df, pg_temp; -CREATE OPERATOR ?> ( +CREATE OPERATOR df.?> ( FUNCTION = df.if_then_op, LEFTARG = text, RIGHTARG = text ); -CREATE OPERATOR !> ( +CREATE OPERATOR df.!> ( FUNCTION = df.if_else_op, LEFTARG = text, RIGHTARG = text @@ -783,7 +791,7 @@ CREATE OR REPLACE FUNCTION df.loop_prefix_op(body text) RETURNS text AS $$ SELECT df.loop(body); $$ LANGUAGE SQL IMMUTABLE SET search_path = pg_catalog, df, pg_temp; -CREATE OPERATOR @> ( +CREATE OPERATOR df.@> ( FUNCTION = df.loop_prefix_op, RIGHTARG = text ); @@ -2565,6 +2573,10 @@ pub mod pg_test { "pg_durable.worker_role = 'postgres'", "pg_durable.database = 'postgres'", "pg_durable.enable_superuser_instances = on", + // DSL operators live in the df schema (issue #202); add df to the + // default search_path so unqualified operator syntax (e.g. `a ~> b`) + // and df.explain()'s SPI evaluation resolve them in tests. + "search_path = '\"$user\", public, df'", ] } } From 851a66ca7c1c9f935493c912f378b7c968f084ab Mon Sep 17 00:00:00 2001 From: crprashant <5108573+crprashant@users.noreply.github.com> Date: Wed, 10 Jun 2026 22:38:32 -0700 Subject: [PATCH 2/4] Manage role search_path in df.grant_usage()/df.revoke_usage() The DSL operators now live in df (#202), so df must be on a role's search_path for the unqualified operator syntax to resolve. Add a fourth optional parameter to df.grant_usage(), set_search_path boolean DEFAULT true, that adds df to the target role's search_path via ALTER ROLE during onboarding (append-only and idempotent; opt out with set_search_path => false). df.revoke_usage(text) keeps its signature and now removes the df entry again. Both ALTER ROLE blocks tolerate insufficient_privilege with a NOTICE so the grant/revoke otherwise succeeds. The 0.2.2->0.2.3 upgrade script drops and recreates grant_usage for the new signature and re-revokes PUBLIC EXECUTE. It includes CREATE SCHEMA IF NOT EXISTS df (a runtime no-op) so the recreated function bodies stay byte-identical to src/lib.rs and pass the pgspot gate; the resulting PS010 finding is allowlisted in run-pgspot.sh. Add E2E test 50_grant_usage_search_path.sql (catalog-based assertions on pg_db_role_setting) and update USER_GUIDE, README, api-reference, upgrade-testing, and CHANGELOG. --- CHANGELOG.md | 4 + README.md | 4 +- USER_GUIDE.md | 15 +- docs/api-reference.md | 10 +- docs/upgrade-testing.md | 17 ++ sql/pg_durable--0.2.2--0.2.3.sql | 281 +++++++++++++++++++ src/lib.rs | 89 +++++- tests/e2e/sql/12_extension_lifecycle.sql | 8 +- tests/e2e/sql/18_delegated_grants.sql | 6 +- tests/e2e/sql/50_grant_usage_search_path.sql | 148 ++++++++++ 10 files changed, 563 insertions(+), 19 deletions(-) create mode 100644 tests/e2e/sql/50_grant_usage_search_path.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index 91453c53..fe0078cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ Pre-1.0 note: while `pg_durable` is in major version `0`, minor releases may inc - **DSL operators moved from `public` to the `df` schema (#202):** The seven DSL operators (`~>`, `|=>`, `&`, `|`, `?>`, `!>`, `@>`) are now created in the `df` schema instead of `public`, so they no longer pollute the public namespace (resolving a pgspot PS017 finding). Because operators are resolved in the calling session before `df.start()`/`df.explain()` run, the unqualified operator syntax now requires `df` on the session `search_path` (e.g. `SET search_path TO "$user", public, df;`, or `ALTER ROLE`/`ALTER DATABASE ... SET search_path`). The `df.*` function forms (`df.seq`, `df.join`, …) are unaffected. Existing installs move the operators when they run `ALTER EXTENSION pg_durable UPDATE`; a non-upgraded `.so` keeps the operators in `public` and continues to work unchanged. +### Changed + +- **`df.grant_usage()` now manages `search_path` (#202):** `df.grant_usage('role')` adds `df` to the target role's `search_path` (via `ALTER ROLE`) by default, so the unqualified DSL operators resolve without manual setup. This adds a fourth optional parameter, `set_search_path boolean DEFAULT true` (signature is now `df.grant_usage(text, boolean, boolean, boolean)`); pass `set_search_path => false` to opt out. `df.revoke_usage('role')` removes that `df` entry again (idempotent; other `search_path` entries are preserved). If the caller lacks privilege to alter the role, a `NOTICE` is raised and the grant/revoke otherwise succeeds. + ## [0.2.2] - 2026-05-28 First open-source release of `pg_durable` on GitHub under the PostgreSQL License. diff --git a/README.md b/README.md index 722ba5b9..dfed1620 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ SELECT df.start( ); ``` -> The DSL operators (`~>`, `|=>`, `&`, `|`, `?>`, `!>`, `@>`) live in the `df` schema. Add `df` to your `search_path` (as above) to use the unqualified syntax. See the [User Guide](USER_GUIDE.md#enable-the-extension) for details. +> The DSL operators (`~>`, `|=>`, `&`, `|`, `?>`, `!>`, `@>`) live in the `df` schema and resolve in your session, so `df` must be on your `search_path`. `df.grant_usage('role')` adds it for a role automatically (the `SET` above is for an ad-hoc session). See the [User Guide](USER_GUIDE.md#enable-the-extension) for details. ## Packages @@ -142,7 +142,7 @@ After installing a package, add `pg_durable` to `shared_preload_libraries`, rest CREATE EXTENSION pg_durable; ``` -The default pg_durable database is `postgres`; see [User Guide](USER_GUIDE.md) for background worker configuration and privilege setup. To use the unqualified DSL operator syntax, add the `df` schema to your `search_path` (e.g. `ALTER DATABASE postgres SET search_path = "$user", public, df;`). +The default pg_durable database is `postgres`; see [User Guide](USER_GUIDE.md) for background worker configuration and privilege setup. The unqualified DSL operator syntax needs the `df` schema on your `search_path` — `df.grant_usage('role')` adds it per role automatically, or set it yourself (e.g. `ALTER DATABASE postgres SET search_path = "$user", public, df;`). Each release also publishes source archives for building from source and a `SHA256SUMS` file for verifying downloaded assets. diff --git a/USER_GUIDE.md b/USER_GUIDE.md index bc1869b7..1e924a5c 100644 --- a/USER_GUIDE.md +++ b/USER_GUIDE.md @@ -81,7 +81,9 @@ SELECT df.grant_usage('app_role'); After `CREATE EXTENSION`, the background worker initializes the engine schema asynchronously (normally within a few seconds). Until initialization completes, `df.*` functions will return: `"pg_durable background worker not yet initialized — try again in a moment"`. Simply retry after a short delay. -> ℹ️ **Using the DSL operators?** The DSL operators (`~>`, `|=>`, `&`, `|`, `?>`, `!>`, `@>`) live in the `df` schema, and they are resolved in your session *before* `df.start()`/`df.explain()` run. To use the unqualified operator syntax, add `df` to your `search_path`: +> ℹ️ **Using the DSL operators?** The DSL operators (`~>`, `|=>`, `&`, `|`, `?>`, `!>`, `@>`) live in the `df` schema, and they are resolved in your session *before* `df.start()`/`df.explain()` run, so `df` must be on your `search_path`. **`df.grant_usage('app_role')` handles this for you by default** — it adds `df` to the role's `search_path` (via `ALTER ROLE`) during onboarding, so the operators resolve the next time that role connects. Pass `set_search_path => false` to opt out and manage `search_path` yourself. +> +> To set it manually (or for the current session, before reconnecting): > > ```sql > -- Per session @@ -92,7 +94,7 @@ After `CREATE EXTENSION`, the background worker initializes the engine schema as > ALTER DATABASE mydb SET search_path = "$user", public, df; > ``` > -> Alternatively, schema-qualify each operator with `OPERATOR(df.~>)` syntax — but adding `df` to `search_path` is far more ergonomic. Other `df.*` functions are always called schema-qualified and work without this. +> Alternatively, schema-qualify each operator with `OPERATOR(df.~>)` syntax. Other `df.*` functions are always called schema-qualified and work without this. > ⚠️ **Important**: If you include `pg_durable` in `shared_preload_libraries` but don't create the extension, the worker will remain idle and durable functions cannot execute. @@ -1650,6 +1652,8 @@ SELECT df.grant_usage('admin_role', include_http => true, with_grant => true); This function is purely additive — it never issues REVOKE. To downgrade a role's privileges (e.g., remove HTTP access), call `df.revoke_usage()` first, then `df.grant_usage()` with the desired options. +By default it also adds `df` to the role's `search_path` (see `set_search_path` below) so the unqualified DSL operators resolve — the only change it makes outside of privilege grants. + **Parameters:** | Parameter | Default | Description | @@ -1657,6 +1661,7 @@ This function is purely additive — it never issues REVOKE. To downgrade a role | `p_role` | *(required)* | Target role name | | `include_http` | `false` | Grant EXECUTE on `df.http()` (opt-in — makes outbound network requests) | | `with_grant` | `false` | Grant all privileges WITH GRANT OPTION and allow the role to call `df.grant_usage()` / `df.revoke_usage()` to manage other roles' access. The caller must hold each underlying privilege WITH GRANT OPTION (automatically true for superusers and delegated admins). | +| `set_search_path` | `true` | Add `df` to the role's `search_path` (via `ALTER ROLE`) so the unqualified DSL operators resolve without manual setup. Takes effect the next time the role connects; append-only and idempotent. Pass `false` to manage `search_path` yourself. If the caller lacks privilege to alter the role, a `NOTICE` is raised and the grant otherwise succeeds. |
Equivalent manual grants (for reference) @@ -1710,6 +1715,10 @@ GRANT SELECT ON df.nodes TO app_role; GRANT INSERT (id, label, root_node, submitted_by, database) ON df.instances TO app_role; GRANT INSERT (id, instance_id, node_type, query, result_name, left_node, right_node, submitted_by, database) ON df.nodes TO app_role; GRANT SELECT, INSERT, UPDATE, DELETE ON df.vars TO app_role; + +-- Add df to the role's search_path so the unqualified DSL operators resolve +-- (df.grant_usage does this too, unless set_search_path => false) +ALTER ROLE app_role SET search_path = "$user", public, df; ```
@@ -1759,7 +1768,7 @@ To remove a role's access to pg_durable: SELECT df.revoke_usage('app_role'); ``` -This revokes all privileges previously granted by `df.grant_usage()`. As a safety measure, `df.revoke_usage()` prevents revoking privileges from a role the caller is a member of (including the caller's own role). +This revokes all privileges previously granted by `df.grant_usage()`, and also removes the `df` entry that `df.grant_usage()` added to the role's `search_path` (idempotent — a no-op if `df` was never added or was already removed; any other entries are preserved). As a safety measure, `df.revoke_usage()` prevents revoking privileges from a role the caller is a member of (including the caller's own role). For non-superusers, `df.revoke_usage()` is still subject to PostgreSQL's normal grantor rules because it is a SECURITY INVOKER helper. In practice, that means a delegated admin can only revoke the privileges that delegated admin granted; removing grants made by another role requires the original grantor or a superuser. diff --git a/docs/api-reference.md b/docs/api-reference.md index 951c9be1..c7b643d3 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -18,7 +18,7 @@ Parameters marked with ✅ **Auto-wrap** accept either: Parameters marked with ❌ **Literal** expect a literal value (not auto-wrapped). -> **Operators and `search_path`:** The operator forms documented below (`~>`, `|=>`, `&`, `|`, `?>`, `!>`, `@>`) are defined in the `df` schema and are resolved in the calling session. Add `df` to your `search_path` (e.g. `SET search_path TO "$user", public, df;`) to use them unqualified, or call the equivalent `df.*` function form (always schema-qualified). +> **Operators and `search_path`:** The operator forms documented below (`~>`, `|=>`, `&`, `|`, `?>`, `!>`, `@>`) are defined in the `df` schema and are resolved in the calling session, so `df` must be on your `search_path`. `df.grant_usage('role')` adds it per role automatically; you can also set it yourself (e.g. `SET search_path TO "$user", public, df;`) or call the equivalent `df.*` function form (always schema-qualified). --- @@ -420,9 +420,9 @@ SELECT df.clearvars(); ## Administration Functions -### df.grant_usage(role_name [, include_http] [, with_grant]) +### df.grant_usage(role_name [, include_http] [, with_grant] [, set_search_path]) -Grants the privileges a role needs to use pg_durable. By default this grants general `df` usage but does not grant `EXECUTE` on `df.http()`. Pass `include_http => true` to opt a role into HTTP access. Pass `with_grant => true` to allow the role to delegate access to others. +Grants the privileges a role needs to use pg_durable. By default this grants general `df` usage but does not grant `EXECUTE` on `df.http()`. Pass `include_http => true` to opt a role into HTTP access. Pass `with_grant => true` to allow the role to delegate access to others. By default it also adds `df` to the role's `search_path` so the unqualified DSL operators resolve; pass `set_search_path => false` to opt out. Authorization is enforced by PostgreSQL’s native mechanisms: EXECUTE on this function is revoked from PUBLIC (so only roles explicitly granted access can call it), and the inner GRANT statements run as the caller via SECURITY INVOKER, so the caller must hold the underlying privileges WITH GRANT OPTION. @@ -431,16 +431,18 @@ Authorization is enforced by PostgreSQL’s native mechanisms: EXECUTE on this f | `role_name` | TEXT | The role to grant privileges to | | `include_http` | BOOLEAN | Optional, defaults to `false`; when `true`, also grants `EXECUTE` on `df.http(text, text, text, jsonb, integer)` | | `with_grant` | BOOLEAN | Optional, defaults to `false`; when `true`, grants all privileges WITH GRANT OPTION and retains EXECUTE on `df.grant_usage` / `df.revoke_usage` | +| `set_search_path` | BOOLEAN | Optional, defaults to `true`; when `true`, adds `df` to the role's `search_path` (via `ALTER ROLE`) so the unqualified DSL operators resolve. Takes effect on the role's next connection; append-only and idempotent. A `NOTICE` is raised (and the grant otherwise succeeds) if the caller cannot alter the role | ```sql SELECT df.grant_usage('app_role'); SELECT df.grant_usage('app_role', include_http => true); SELECT df.grant_usage('admin_role', with_grant => true); +SELECT df.grant_usage('app_role', set_search_path => false); -- manage search_path yourself ``` ### df.revoke_usage(role_name) -Revokes all privileges previously granted by `df.grant_usage()`, including any `df.http()` access. Authorization is enforced the same way as `df.grant_usage()` — EXECUTE is revoked from PUBLIC, and the inner REVOKE statements run as the caller. On upgraded installs, revoking `df.http()` from `PUBLIC` is still a separate manual step. +Revokes all privileges previously granted by `df.grant_usage()`, including any `df.http()` access, and removes the `df` entry `df.grant_usage()` added to the role's `search_path` (idempotent; other entries are preserved). Authorization is enforced the same way as `df.grant_usage()` — EXECUTE is revoked from PUBLIC, and the inner REVOKE statements run as the caller. On upgraded installs, revoking `df.http()` from `PUBLIC` is still a separate manual step. | Parameter | Type | Description | |-----------|------|-------------| diff --git a/docs/upgrade-testing.md b/docs/upgrade-testing.md index 4aa30c0a..dc77afbd 100644 --- a/docs/upgrade-testing.md +++ b/docs/upgrade-testing.md @@ -196,6 +196,15 @@ released). Every new upgrade script is gated and must pass — keep its DDL schema-qualified (see step 3 above). Scripts written after qualification pass the gate, so they never need to be added to the exclude list. +One narrow exception: when an upgrade script recreates a function whose body is +shared verbatim with `src/lib.rs` and relies on a `SET search_path = pg_catalog, +df, pg_temp` (secure in the install SQL only because pgrx emits `CREATE SCHEMA IF +NOT EXISTS df`), the script may include its own `CREATE SCHEMA IF NOT EXISTS df;` +instead of hand-qualifying the body. This keeps the body byte-identical to +`src/lib.rs` — avoiding a forked, untested copy — and is a runtime no-op since the +schema already exists. The resulting `PS010` finding is allowlisted in +`scripts/run-pgspot.sh`. See the v0.2.2 → v0.2.3 `df.grant_usage()` notes below. + --- ## Version-Specific Changes @@ -212,6 +221,14 @@ what the upgrade script handles, and any backward compatibility considerations. - **Scenario B1 considerations:** The new `.so` works against all previous schemas. No Rust code hard-references the operator schema; operators are pure SQL-level sugar resolved by `search_path`. Against a non-upgraded ≤0.2.2 schema the operators still live in `public` (on the caller's default `search_path`), so existing callers keep working unchanged. After `ALTER EXTENSION pg_durable UPDATE`, the operators move to `df` and callers must add `df` to `search_path`. - **Scenario B2 considerations:** No data migration. The change only drops and recreates operator objects (catalog metadata); `df.instances`, `df.nodes`, `df.vars`, and the provider schema are untouched. +#### `df.grant_usage()` adds `df` to the role's `search_path`; signature 3→4 args (#202) +- **DDL change (df schema):** `df.grant_usage` gains a fourth optional parameter, `set_search_path boolean DEFAULT true`, so its signature changes from `df.grant_usage(text, boolean, boolean)` to `df.grant_usage(text, boolean, boolean, boolean)`. When `true` (default) the function adds `df` to the target role's `search_path` via `ALTER ROLE ... SET search_path` (append-only and idempotent: it appends `, df` to an existing per-role setting, or sets `"$user", public, df` when none exists). `df.revoke_usage(text)` keeps its existing signature and now also removes the `df` entry from the role's `search_path` (idempotent; other entries preserved, `RESET` when nothing remains). Both `ALTER ROLE` blocks are wrapped in `EXCEPTION WHEN insufficient_privilege` so a caller who cannot alter the role gets a `NOTICE` and the grant/revoke otherwise succeeds. +- **Upgrade script handling:** Because the parameter list changes, the upgrade script `sql/pg_durable--0.2.2--0.2.3.sql` runs `DROP FUNCTION IF EXISTS df.grant_usage(text, boolean, boolean)` then `CREATE` the 4-arg version, re-runs `REVOKE EXECUTE ON FUNCTION df.grant_usage(text, boolean, boolean, boolean) FROM PUBLIC` (a freshly created function defaults to PUBLIC EXECUTE), and `CREATE OR REPLACE`s `df.revoke_usage(text)` with the new body. The `DROP` discards any delegated-admin `WITH GRANT OPTION` grants on `grant_usage`; superusers re-establish them by re-calling `df.grant_usage(..., with_grant => true)`. +- **pgspot gate:** Both functions are recreated with `SET search_path = pg_catalog, df, pg_temp` and unqualified `pg_catalog` references in their bodies (matching `src/lib.rs`). pgspot only treats that search_path as secure when `df` is a *created* schema in the scanned file, which the generated install SQL satisfies (pgrx emits `CREATE SCHEMA IF NOT EXISTS df`) but an upgrade script normally does not. Rather than hand-qualify every reference only in the upgrade script (which would fork the bodies from `src/lib.rs` on a path no automated test exercises), the script includes a `CREATE SCHEMA IF NOT EXISTS df;` — a runtime no-op (the schema already exists) that lets the bodies stay byte-identical to `src/lib.rs` and pass the gate. The resulting `PS010: Unsafe schema creation: df` finding is allowlisted in `scripts/run-pgspot.sh`. +- **Scenario A considerations:** Schema comparison covers function name + arguments. After the upgrade `df.grant_usage` has the same 4-arg signature and defaults as a fresh install, and `df.revoke_usage(text)` is unchanged, so Scenario A passes. (PUBLIC grant rows are filtered from the ACL comparison, so the `REVOKE ... FROM PUBLIC` is for real security correctness rather than to satisfy the diff.) +- **Scenario B1 considerations:** The new `.so` does not hard-reference `grant_usage`/`revoke_usage`; these are SQL-level admin helpers invoked by operators, not by the runtime. Against a non-upgraded ≤0.2.2 schema the 3-arg `grant_usage` and old `revoke_usage` remain callable exactly as before. The new behavior (search_path management, 4th arg) is only available after `ALTER EXTENSION pg_durable UPDATE`. +- **Scenario B2 considerations:** No data migration. The change only redefines two functions; `df.instances`, `df.nodes`, `df.vars`, and the provider schema are untouched. Per-role `search_path` entries are managed only when an admin explicitly calls `grant_usage`/`revoke_usage`. + #### Rename duroxide provider schema to `_duroxide` for fresh installs - **DDL change (df schema):** Adds `df.duroxide_schema()`, an `IMMUTABLE`/`PARALLEL SAFE` SQL function that returns the name of the schema holding the duroxide provider objects. Fresh 0.2.3 installs create the function (in `src/lib.rs`) returning `'_duroxide'`; the upgrade script `sql/pg_durable--0.2.2--0.2.3.sql` creates the same function returning `'duroxide'` so pre-existing installs keep using the legacy schema. Both bodies set `search_path = pg_catalog, pg_temp` to satisfy the pgspot gate. - **DDL change (provider schema):** Fresh installs now run `CREATE SCHEMA _duroxide` (was `CREATE SCHEMA duroxide`). The upgrade script does **not** rename, drop, or move the existing `duroxide` schema — renaming an in-use provider schema would orphan the BGW's durable state. Upgraded installs therefore continue to use `duroxide`. diff --git a/sql/pg_durable--0.2.2--0.2.3.sql b/sql/pg_durable--0.2.2--0.2.3.sql index 436c8c58..2fb05e3f 100644 --- a/sql/pg_durable--0.2.2--0.2.3.sql +++ b/sql/pg_durable--0.2.2--0.2.3.sql @@ -17,6 +17,17 @@ -- -- 2. Moves the seven DSL operators from public into df (issue #202). See the -- operator block below for the rationale and the search_path implication. +-- +-- 3. Redefines df.grant_usage() / df.revoke_usage() to manage the role's +-- search_path. Because the operators move into df (item 2), df must be on a +-- role's search_path for the unqualified operator syntax to resolve. +-- df.grant_usage() gains a set_search_path argument (default true) that adds +-- df to the role's search_path during onboarding, and df.revoke_usage() +-- removes it again — so existing installs get the ergonomic syntax without +-- every user editing search_path by hand. grant_usage's signature changes +-- (a fourth argument is appended), so it is dropped and recreated; any +-- EXECUTE grants on the old signature are reissued the next time an admin +-- calls df.grant_usage(..., with_grant => true). CREATE FUNCTION df.duroxide_schema() RETURNS text LANGUAGE sql IMMUTABLE PARALLEL SAFE @@ -95,3 +106,273 @@ CREATE OPERATOR df.@> ( FUNCTION = df.loop_prefix_op, RIGHTARG = text ); + +-- --------------------------------------------------------------------------- +-- Redefine df.grant_usage() / df.revoke_usage() to manage search_path +-- (issue #202 follow-up). +-- +-- Now that the DSL operators live in df (above), df must be on a role's +-- search_path for the unqualified operator syntax to resolve. df.grant_usage() +-- gains a set_search_path argument (default true) that adds df to the role's +-- search_path during onboarding, and df.revoke_usage() removes it again — so +-- existing installs get the ergonomic syntax without every user editing +-- search_path by hand. +-- +-- These definitions are kept in sync with src/lib.rs (the upgrade test compares +-- function signatures and non-PUBLIC ACLs, not bodies). grant_usage gains a +-- fourth argument, so it must be dropped and recreated: DROP also discards the +-- old function's EXECUTE grants (including any delegated-admin WITH GRANT OPTION +-- grants), so the REVOKE below re-secures the recreated function against PUBLIC +-- and superusers re-grant delegated admins by calling df.grant_usage(..., +-- with_grant => true) again. df.revoke_usage() keeps its signature, so +-- CREATE OR REPLACE preserves its existing ACL. +-- +-- The CREATE SCHEMA IF NOT EXISTS below is a runtime no-op (df already exists, +-- created by the original install) — it is present so the pgspot security gate +-- recognises df as a created schema and treats the functions' "SET search_path = +-- pg_catalog, df, pg_temp" as secure, exactly as it does for the generated +-- fresh-install SQL (pgrx emits CREATE SCHEMA IF NOT EXISTS df there). This lets +-- the function bodies stay byte-identical to their src/lib.rs definitions instead +-- of being hand-qualified only here. The resulting PS010 finding is allowlisted +-- in scripts/run-pgspot.sh. +-- --------------------------------------------------------------------------- +CREATE SCHEMA IF NOT EXISTS df; + +DROP FUNCTION IF EXISTS df.grant_usage(text, boolean, boolean); + +CREATE OR REPLACE FUNCTION df.grant_usage( + p_role TEXT, + include_http boolean DEFAULT false, + with_grant boolean DEFAULT false, + set_search_path boolean DEFAULT true +) +RETURNS VOID +LANGUAGE plpgsql +SET search_path = pg_catalog, df, pg_temp +AS $fn$ +DECLARE + grant_opt TEXT := ''; + func_sig TEXT; + -- Explicit list of df.* functions to grant. Sensitive functions + -- (df.http, df.grant_usage, df.revoke_usage) are excluded from this + -- list and granted conditionally below. + func_sigs TEXT[] := ARRAY[ + -- DSL functions + 'df.sql(text)', + 'df.seq(text, text)', + 'df.as(text, text)', + 'df.sleep(bigint)', + 'df.wait_for_schedule(text)', + 'df.loop(text, text)', + 'df.break(text)', + 'df.if(text, text, text)', + 'df.if_rows(text, text, text)', + 'df.join(text, text)', + 'df.join3(text, text, text)', + 'df.race(text, text)', + 'df.wait_for_signal(text, integer)', + 'df.signal(text, text, text)', + 'df.start(text, text, text)', + 'df.setvar(text, text)', + 'df.getvar(text)', + 'df.unsetvar(text)', + 'df.clearvars()', + -- Monitoring functions + 'df.status(text)', + 'df.result(text)', + 'df.cancel(text, text)', + 'df.wait_for_completion(text, integer)', + 'df.run(text)', + 'df.list_instances(text, integer)', + 'df.instance_info(text)', + 'df.instance_nodes(text, integer)', + 'df.instance_executions(text, integer)', + 'df.metrics()', + -- Internal helpers (operators, version, etc.) + 'df.as_op(text, text)', + 'df.if_then_op(text, text)', + 'df.if_else_op(text, text)', + 'df.ensure_durofut(text)', + 'df.loop_prefix_op(text)', + 'df.version()', + 'df.debug_connection()', + 'df.explain(text)', + 'df.target_database()' + ]; +BEGIN + -- Validate the role exists + IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = p_role) THEN + RAISE EXCEPTION 'role "%" does not exist', p_role; + END IF; + + IF with_grant THEN + grant_opt := ' WITH GRANT OPTION'; + END IF; + + -- Schema access + EXECUTE format('GRANT USAGE ON SCHEMA df TO %I', p_role) || grant_opt; + + -- Grant EXECUTE on each standard function explicitly. + FOREACH func_sig IN ARRAY func_sigs LOOP + EXECUTE format('GRANT EXECUTE ON FUNCTION %s TO %I', func_sig, p_role) || grant_opt; + END LOOP; + + -- df.http() — opt-in because it makes outbound network requests. + IF include_http THEN + EXECUTE format('GRANT EXECUTE ON FUNCTION df.http(text, text, text, jsonb, integer) TO %I', p_role) || grant_opt; + END IF; + + -- Admin helpers — only for delegated administrators. + IF with_grant THEN + EXECUTE format('GRANT EXECUTE ON FUNCTION df.grant_usage(text, boolean, boolean, boolean) TO %I', p_role) || grant_opt; + EXECUTE format('GRANT EXECUTE ON FUNCTION df.revoke_usage(text) TO %I', p_role) || grant_opt; + END IF; + + -- Table privileges + EXECUTE format('GRANT SELECT ON df.instances TO %I', p_role) || grant_opt; + EXECUTE format('GRANT UPDATE (status, updated_at) ON df.instances TO %I', p_role) || grant_opt; + EXECUTE format('GRANT SELECT ON df.nodes TO %I', p_role) || grant_opt; + EXECUTE format('GRANT INSERT (id, label, root_node, submitted_by, database) ON df.instances TO %I', p_role) || grant_opt; + EXECUTE format('GRANT INSERT (id, instance_id, node_type, query, result_name, left_node, right_node, submitted_by, database) ON df.nodes TO %I', p_role) || grant_opt; + EXECUTE format('GRANT SELECT, INSERT, UPDATE, DELETE ON df.vars TO %I', p_role) || grant_opt; + + -- Ensure df is on the role's search_path so the unqualified DSL operators + -- (which live in df and are resolved in the caller's session) work without + -- each user setting search_path by hand. Opt out with + -- set_search_path => false. Append-only and idempotent: df is added at the + -- end (lowest precedence) and only when not already present. + IF set_search_path THEN + DECLARE + v_path text; + BEGIN + SELECT substring(opt FROM 13) -- strip leading 'search_path=' + INTO v_path + FROM pg_db_role_setting s + JOIN pg_roles r ON r.oid = s.setrole + CROSS JOIN LATERAL unnest(s.setconfig) AS o(opt) + WHERE r.rolname = p_role + AND s.setdatabase = 0 + AND opt LIKE 'search_path=%' + LIMIT 1; + + IF v_path IS NULL THEN + -- No per-role search_path yet: set the standard default plus df. + EXECUTE format('ALTER ROLE %I SET search_path = %s', p_role, '"$user", public, df'); + RAISE NOTICE 'pg_durable: set search_path for "%" to "$user", public, df', p_role; + ELSIF NOT EXISTS ( + SELECT 1 FROM unnest(string_to_array(v_path, ',')) AS t(tok) + WHERE lower(btrim(tok, ' "')) = 'df' + ) THEN + EXECUTE format('ALTER ROLE %I SET search_path = %s', p_role, v_path || ', df'); + RAISE NOTICE 'pg_durable: added df to search_path for "%"', p_role; + END IF; + EXCEPTION WHEN insufficient_privilege THEN + RAISE NOTICE 'pg_durable: could not set search_path for "%" (insufficient privilege); add df to search_path manually', p_role; + END; + END IF; + + RAISE NOTICE 'pg_durable: granted df usage privileges to "%"', p_role; +END; +$fn$; + +-- df.grant_usage() is an admin-only helper: revoke PUBLIC's default EXECUTE on +-- the recreated function (DROP above also dropped the prior REVOKE entry). +REVOKE EXECUTE ON FUNCTION df.grant_usage(text, boolean, boolean, boolean) FROM PUBLIC; + +CREATE OR REPLACE FUNCTION df.revoke_usage(p_role TEXT) +RETURNS VOID +LANGUAGE plpgsql +SET search_path = pg_catalog, df, pg_temp +AS $fn$ +DECLARE + func_oid oid; +BEGIN + -- Validate the role exists + IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = p_role) THEN + RAISE EXCEPTION 'role "%" does not exist', p_role; + END IF; + + -- Prevent accidentally revoking your own access. pg_has_role checks + -- both direct identity (current_user = p_role) and inherited membership + -- (current_user is a member of p_role), so revoking a parent role that + -- the caller depends on is also caught. + -- Superusers are exempt: pg_has_role returns true for all roles when the + -- caller is a superuser, and superusers can always re-grant themselves. + IF NOT EXISTS ( + SELECT 1 + FROM pg_roles + WHERE rolname = current_user + AND rolsuper + ) + AND pg_has_role(current_user, p_role, 'MEMBER') THEN + RAISE EXCEPTION 'cannot revoke df privileges from "%" because the current role ("%") is a member of it — use a different administrator', p_role, current_user; + END IF; + + -- CASCADE: if the target role granted sub-grants (via WITH GRANT OPTION), + -- CASCADE ensures those dependent privileges are also revoked. + -- Column-level revokes must match the column-level grants from grant_usage(). + EXECUTE format('REVOKE SELECT, INSERT, UPDATE, DELETE ON df.vars FROM %I CASCADE', p_role); + EXECUTE format('REVOKE INSERT (id, instance_id, node_type, query, result_name, left_node, right_node, submitted_by, database) ON df.nodes FROM %I CASCADE', p_role); + EXECUTE format('REVOKE SELECT ON df.nodes FROM %I CASCADE', p_role); + EXECUTE format('REVOKE INSERT (id, label, root_node, submitted_by, database) ON df.instances FROM %I CASCADE', p_role); + EXECUTE format('REVOKE UPDATE (status, updated_at) ON df.instances FROM %I CASCADE', p_role); + EXECUTE format('REVOKE SELECT ON df.instances FROM %I CASCADE', p_role); + + -- Revoke EXECUTE per-function rather than using the blanket + -- REVOKE ON ALL FUNCTIONS. A delegated admin may lack privilege on + -- some functions (e.g. df.http); per-function revokes let us skip those. + FOR func_oid IN + SELECT p.oid FROM pg_proc p + JOIN pg_namespace n ON p.pronamespace = n.oid + WHERE n.nspname = 'df' + LOOP + BEGIN + EXECUTE format('REVOKE EXECUTE ON FUNCTION %s FROM %I CASCADE', func_oid::regprocedure, p_role); + EXCEPTION WHEN insufficient_privilege THEN + NULL; + END; + END LOOP; + + EXECUTE format('REVOKE USAGE ON SCHEMA df FROM %I CASCADE', p_role); + + -- Mirror df.grant_usage()'s search_path setup: remove the df entry this + -- extension manages from the role's search_path. Idempotent (a no-op when + -- df is absent) and gracefully skipped if the caller lacks privilege to + -- ALTER the role. + DECLARE + v_path text; + v_newpath text; + BEGIN + SELECT substring(opt FROM 13) -- strip leading 'search_path=' + INTO v_path + FROM pg_db_role_setting s + JOIN pg_roles r ON r.oid = s.setrole + CROSS JOIN LATERAL unnest(s.setconfig) AS o(opt) + WHERE r.rolname = p_role + AND s.setdatabase = 0 + AND opt LIKE 'search_path=%' + LIMIT 1; + + IF v_path IS NOT NULL AND EXISTS ( + SELECT 1 FROM unnest(string_to_array(v_path, ',')) AS t(tok) + WHERE lower(btrim(tok, ' "')) = 'df' + ) THEN + SELECT string_agg(btrim(tok), ', ') + INTO v_newpath + FROM unnest(string_to_array(v_path, ',')) AS t(tok) + WHERE lower(btrim(tok, ' "')) <> 'df'; + + IF v_newpath IS NULL OR btrim(v_newpath) = '' THEN + EXECUTE format('ALTER ROLE %I RESET search_path', p_role); + ELSE + EXECUTE format('ALTER ROLE %I SET search_path = %s', p_role, v_newpath); + END IF; + RAISE NOTICE 'pg_durable: removed df from search_path for "%"', p_role; + END IF; + EXCEPTION WHEN insufficient_privilege THEN + RAISE NOTICE 'pg_durable: could not adjust search_path for "%" (insufficient privilege)', p_role; + END; + + RAISE NOTICE 'pg_durable: revoked df usage privileges granted by "%" from "%"', current_user, p_role; +END; +$fn$; diff --git a/src/lib.rs b/src/lib.rs index e0af3dc7..18cbad58 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -355,12 +355,22 @@ CREATE POLICY vars_user_isolation ON df.vars -- privilege WITH GRANT OPTION, which is automatically true for superusers -- and for delegated admins granted via with_grant => true. -- +-- set_search_path (default true) ensures df is on the role's search_path (via +-- ALTER ROLE) so the unqualified DSL operators (~>, |=>, &, |, ?>, !>, @>) — +-- which live in the df schema and are resolved in the caller's session before +-- df.start() runs — work without each user adding df by hand. It appends df +-- at the end (lowest precedence) only when absent and never reorders an +-- existing path. Pass false to manage search_path yourself. If the caller +-- lacks privilege to ALTER the role, a NOTICE is raised and the grant +-- otherwise succeeds. +-- -- MAINTENANCE: when adding a new df.* function, add it to the func_sigs -- array below. Functions NOT in this list are deny-by-default. CREATE OR REPLACE FUNCTION df.grant_usage( p_role TEXT, include_http boolean DEFAULT false, - with_grant boolean DEFAULT false + with_grant boolean DEFAULT false, + set_search_path boolean DEFAULT true ) RETURNS VOID LANGUAGE plpgsql @@ -440,7 +450,7 @@ BEGIN -- Admin helpers — only for delegated administrators. IF with_grant THEN - EXECUTE format('GRANT EXECUTE ON FUNCTION df.grant_usage(text, boolean, boolean) TO %I', p_role) || grant_opt; + EXECUTE format('GRANT EXECUTE ON FUNCTION df.grant_usage(text, boolean, boolean, boolean) TO %I', p_role) || grant_opt; EXECUTE format('GRANT EXECUTE ON FUNCTION df.revoke_usage(text) TO %I', p_role) || grant_opt; END IF; @@ -452,6 +462,41 @@ BEGIN EXECUTE format('GRANT INSERT (id, instance_id, node_type, query, result_name, left_node, right_node, submitted_by, database) ON df.nodes TO %I', p_role) || grant_opt; EXECUTE format('GRANT SELECT, INSERT, UPDATE, DELETE ON df.vars TO %I', p_role) || grant_opt; + -- Ensure df is on the role's search_path so the unqualified DSL operators + -- (which live in df and are resolved in the caller's session) work without + -- each user setting search_path by hand. Opt out with + -- set_search_path => false. Append-only and idempotent: df is added at the + -- end (lowest precedence) and only when not already present. + IF set_search_path THEN + DECLARE + v_path text; + BEGIN + SELECT substring(opt FROM 13) -- strip leading 'search_path=' + INTO v_path + FROM pg_db_role_setting s + JOIN pg_roles r ON r.oid = s.setrole + CROSS JOIN LATERAL unnest(s.setconfig) AS o(opt) + WHERE r.rolname = p_role + AND s.setdatabase = 0 + AND opt LIKE 'search_path=%' + LIMIT 1; + + IF v_path IS NULL THEN + -- No per-role search_path yet: set the standard default plus df. + EXECUTE format('ALTER ROLE %I SET search_path = %s', p_role, '"$user", public, df'); + RAISE NOTICE 'pg_durable: set search_path for "%" to "$user", public, df', p_role; + ELSIF NOT EXISTS ( + SELECT 1 FROM unnest(string_to_array(v_path, ',')) AS t(tok) + WHERE lower(btrim(tok, ' "')) = 'df' + ) THEN + EXECUTE format('ALTER ROLE %I SET search_path = %s', p_role, v_path || ', df'); + RAISE NOTICE 'pg_durable: added df to search_path for "%"', p_role; + END IF; + EXCEPTION WHEN insufficient_privilege THEN + RAISE NOTICE 'pg_durable: could not set search_path for "%" (insufficient privilege); add df to search_path manually', p_role; + END; + END IF; + RAISE NOTICE 'pg_durable: granted df usage privileges to "%"', p_role; END; $fn$; @@ -517,6 +562,44 @@ BEGIN EXECUTE format('REVOKE USAGE ON SCHEMA df FROM %I CASCADE', p_role); + -- Mirror df.grant_usage()'s search_path setup: remove the df entry this + -- extension manages from the role's search_path. Idempotent (a no-op when + -- df is absent) and gracefully skipped if the caller lacks privilege to + -- ALTER the role. + DECLARE + v_path text; + v_newpath text; + BEGIN + SELECT substring(opt FROM 13) -- strip leading 'search_path=' + INTO v_path + FROM pg_db_role_setting s + JOIN pg_roles r ON r.oid = s.setrole + CROSS JOIN LATERAL unnest(s.setconfig) AS o(opt) + WHERE r.rolname = p_role + AND s.setdatabase = 0 + AND opt LIKE 'search_path=%' + LIMIT 1; + + IF v_path IS NOT NULL AND EXISTS ( + SELECT 1 FROM unnest(string_to_array(v_path, ',')) AS t(tok) + WHERE lower(btrim(tok, ' "')) = 'df' + ) THEN + SELECT string_agg(btrim(tok), ', ') + INTO v_newpath + FROM unnest(string_to_array(v_path, ',')) AS t(tok) + WHERE lower(btrim(tok, ' "')) <> 'df'; + + IF v_newpath IS NULL OR btrim(v_newpath) = '' THEN + EXECUTE format('ALTER ROLE %I RESET search_path', p_role); + ELSE + EXECUTE format('ALTER ROLE %I SET search_path = %s', p_role, v_newpath); + END IF; + RAISE NOTICE 'pg_durable: removed df from search_path for "%"', p_role; + END IF; + EXCEPTION WHEN insufficient_privilege THEN + RAISE NOTICE 'pg_durable: could not adjust search_path for "%" (insufficient privilege)', p_role; + END; + RAISE NOTICE 'pg_durable: revoked df usage privileges granted by "%" from "%"', current_user, p_role; END; $fn$; @@ -554,7 +637,7 @@ REVOKE EXECUTE ON FUNCTION df.http(text, text, text, jsonb, integer) FROM PUBLIC -- Revoke PUBLIC's default EXECUTE privilege so that only roles explicitly -- granted access (via with_grant => true or a direct superuser GRANT) can -- manage other roles' df privileges. -REVOKE EXECUTE ON FUNCTION df.grant_usage(text, boolean, boolean) FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION df.grant_usage(text, boolean, boolean, boolean) FROM PUBLIC; REVOKE EXECUTE ON FUNCTION df.revoke_usage(text) FROM PUBLIC; "#, name = "rls_and_grants", diff --git a/tests/e2e/sql/12_extension_lifecycle.sql b/tests/e2e/sql/12_extension_lifecycle.sql index d66635bf..8c35cc91 100644 --- a/tests/e2e/sql/12_extension_lifecycle.sql +++ b/tests/e2e/sql/12_extension_lifecycle.sql @@ -214,7 +214,7 @@ DECLARE BEGIN SELECT has_function_privilege( 'df_e2e_user', - 'df.grant_usage(text, boolean, boolean)', + 'df.grant_usage(text, boolean, boolean, boolean)', 'EXECUTE' ) INTO can_grant_usage; @@ -246,7 +246,7 @@ DECLARE BEGIN SELECT has_function_privilege( 'df_e2e_user', - 'df.grant_usage(text, boolean, boolean)', + 'df.grant_usage(text, boolean, boolean, boolean)', 'EXECUTE' ) INTO can_grant_usage; @@ -274,7 +274,7 @@ DROP ROLE IF EXISTS test_helper_grantee; CREATE ROLE test_helper_grantee LOGIN; GRANT USAGE ON SCHEMA df TO test_helper_grantee; -GRANT EXECUTE ON FUNCTION df.grant_usage(text, boolean, boolean) TO test_helper_grantee; +GRANT EXECUTE ON FUNCTION df.grant_usage(text, boolean, boolean, boolean) TO test_helper_grantee; GRANT EXECUTE ON FUNCTION df.revoke_usage(text) TO test_helper_grantee; SET ROLE test_helper_grantee; @@ -312,7 +312,7 @@ END $$; RESET ROLE; -REVOKE EXECUTE ON FUNCTION df.grant_usage(text, boolean, boolean) FROM test_helper_grantee; +REVOKE EXECUTE ON FUNCTION df.grant_usage(text, boolean, boolean, boolean) FROM test_helper_grantee; REVOKE EXECUTE ON FUNCTION df.revoke_usage(text) FROM test_helper_grantee; REVOKE USAGE ON SCHEMA df FROM test_helper_grantee; DROP ROLE test_helper_grantee; diff --git a/tests/e2e/sql/18_delegated_grants.sql b/tests/e2e/sql/18_delegated_grants.sql index 6393951c..e240d299 100644 --- a/tests/e2e/sql/18_delegated_grants.sql +++ b/tests/e2e/sql/18_delegated_grants.sql @@ -63,7 +63,7 @@ DECLARE BEGIN SELECT has_function_privilege( 'dg_admin', - 'df.grant_usage(text, boolean, boolean)', + 'df.grant_usage(text, boolean, boolean, boolean)', 'EXECUTE' ) INTO can_grant; @@ -129,7 +129,7 @@ DECLARE BEGIN SELECT has_function_privilege( 'dg_delegate_target', - 'df.grant_usage(text, boolean, boolean)', + 'df.grant_usage(text, boolean, boolean, boolean)', 'EXECUTE' ) INTO can_grant; @@ -201,7 +201,7 @@ DECLARE BEGIN SELECT has_function_privilege( 'dg_app', - 'df.grant_usage(text, boolean, boolean)', + 'df.grant_usage(text, boolean, boolean, boolean)', 'EXECUTE' ) INTO can_grant; diff --git a/tests/e2e/sql/50_grant_usage_search_path.sql b/tests/e2e/sql/50_grant_usage_search_path.sql new file mode 100644 index 00000000..061aa2eb --- /dev/null +++ b/tests/e2e/sql/50_grant_usage_search_path.sql @@ -0,0 +1,148 @@ +-- Copyright (c) Microsoft Corporation. +-- Licensed under the PostgreSQL License. + +-- df.grant_usage() adds df to the target role's search_path by default (so the +-- unqualified DSL operators resolve), set_search_path => false opts out, and +-- df.revoke_usage() removes the df entry again. These are verified through the +-- pg_db_role_setting catalog rather than effective resolution, because a +-- role-level search_path only takes effect on the role's next connection. + +-- Helpers: read the role's per-role search_path setting and test for tokens. +CREATE OR REPLACE FUNCTION pg_temp._gsp_path(p_role text) RETURNS text +LANGUAGE sql STABLE AS $$ + SELECT substring(opt FROM 13) -- strip leading 'search_path=' + FROM pg_db_role_setting s + JOIN pg_roles r ON r.oid = s.setrole + CROSS JOIN LATERAL unnest(s.setconfig) AS o(opt) + WHERE r.rolname = p_role + AND s.setdatabase = 0 + AND opt LIKE 'search_path=%' + LIMIT 1; +$$; + +CREATE OR REPLACE FUNCTION pg_temp._gsp_df_count(p_role text) RETURNS int +LANGUAGE sql STABLE AS $$ + SELECT count(*)::int + FROM unnest(string_to_array(coalesce(pg_temp._gsp_path(p_role), ''), ',')) AS t(tok) + WHERE lower(btrim(tok, ' "')) = 'df'; +$$; + +CREATE OR REPLACE FUNCTION pg_temp._gsp_has(p_role text, p_tok text) RETURNS boolean +LANGUAGE sql STABLE AS $$ + SELECT EXISTS ( + SELECT 1 + FROM unnest(string_to_array(coalesce(pg_temp._gsp_path(p_role), ''), ',')) AS t(tok) + WHERE lower(btrim(tok, ' "')) = lower(p_tok) + ); +$$; + +-- Setup: four fresh roles exercising the distinct code paths. +DO $setup$ +DECLARE + role_name TEXT; +BEGIN + FOREACH role_name IN ARRAY ARRAY['gsp_default', 'gsp_optout', 'gsp_existing', 'gsp_idem'] + LOOP + BEGIN + EXECUTE format('DROP OWNED BY %I', role_name); + EXCEPTION + WHEN undefined_object THEN NULL; + END; + EXECUTE format('DROP ROLE IF EXISTS %I', role_name); + EXECUTE format('CREATE ROLE %I', role_name); + END LOOP; + + -- gsp_existing already has a custom per-role search_path (no df). + ALTER ROLE gsp_existing SET search_path = "$user", myschema; +END $setup$; + +-- Grant: default adds df; opt-out does not; existing path is appended to; +-- repeated grants stay idempotent. +SELECT df.grant_usage('gsp_default'); +SELECT df.grant_usage('gsp_optout', set_search_path => false); +SELECT df.grant_usage('gsp_existing'); +SELECT df.grant_usage('gsp_idem'); +SELECT df.grant_usage('gsp_idem'); -- second call must not duplicate df + +DO $assert_grant$ +BEGIN + -- gsp_default: no prior path -> "$user", public, df + IF pg_temp._gsp_path('gsp_default') IS NULL THEN + RAISE EXCEPTION 'TEST FAILED (gsp_default): expected a search_path setting, found none'; + END IF; + IF pg_temp._gsp_df_count('gsp_default') <> 1 THEN + RAISE EXCEPTION 'TEST FAILED (gsp_default): expected exactly one df entry, path = %', pg_temp._gsp_path('gsp_default'); + END IF; + IF NOT pg_temp._gsp_has('gsp_default', '$user') OR NOT pg_temp._gsp_has('gsp_default', 'public') THEN + RAISE EXCEPTION 'TEST FAILED (gsp_default): expected "$user" and public preserved, path = %', pg_temp._gsp_path('gsp_default'); + END IF; + + -- gsp_optout: set_search_path => false leaves no per-role setting + IF pg_temp._gsp_path('gsp_optout') IS NOT NULL THEN + RAISE EXCEPTION 'TEST FAILED (gsp_optout): expected no search_path setting, found %', pg_temp._gsp_path('gsp_optout'); + END IF; + + -- gsp_existing: df appended, original entries preserved + IF pg_temp._gsp_df_count('gsp_existing') <> 1 THEN + RAISE EXCEPTION 'TEST FAILED (gsp_existing): expected exactly one df entry, path = %', pg_temp._gsp_path('gsp_existing'); + END IF; + IF NOT pg_temp._gsp_has('gsp_existing', 'myschema') OR NOT pg_temp._gsp_has('gsp_existing', '$user') THEN + RAISE EXCEPTION 'TEST FAILED (gsp_existing): expected original entries preserved, path = %', pg_temp._gsp_path('gsp_existing'); + END IF; + + -- gsp_idem: granting twice must not add df twice + IF pg_temp._gsp_df_count('gsp_idem') <> 1 THEN + RAISE EXCEPTION 'TEST FAILED (gsp_idem): expected exactly one df entry after two grants, path = %', pg_temp._gsp_path('gsp_idem'); + END IF; +END $assert_grant$; + +-- Revoke: removes df, preserves the rest; opt-out role is an idempotent no-op. +SELECT df.revoke_usage('gsp_default'); +SELECT df.revoke_usage('gsp_existing'); +SELECT df.revoke_usage('gsp_optout'); -- never had df: must not error + +DO $assert_revoke$ +BEGIN + -- gsp_default: df removed, "$user"/public remain + IF pg_temp._gsp_df_count('gsp_default') <> 0 THEN + RAISE EXCEPTION 'TEST FAILED (gsp_default revoke): expected df removed, path = %', pg_temp._gsp_path('gsp_default'); + END IF; + IF NOT pg_temp._gsp_has('gsp_default', 'public') OR NOT pg_temp._gsp_has('gsp_default', '$user') THEN + RAISE EXCEPTION 'TEST FAILED (gsp_default revoke): expected "$user"/public preserved, path = %', pg_temp._gsp_path('gsp_default'); + END IF; + + -- gsp_existing: df removed, original entries preserved + IF pg_temp._gsp_df_count('gsp_existing') <> 0 THEN + RAISE EXCEPTION 'TEST FAILED (gsp_existing revoke): expected df removed, path = %', pg_temp._gsp_path('gsp_existing'); + END IF; + IF NOT pg_temp._gsp_has('gsp_existing', 'myschema') OR NOT pg_temp._gsp_has('gsp_existing', '$user') THEN + RAISE EXCEPTION 'TEST FAILED (gsp_existing revoke): expected original entries preserved, path = %', pg_temp._gsp_path('gsp_existing'); + END IF; + + -- gsp_optout: still no per-role setting after a no-op revoke + IF pg_temp._gsp_path('gsp_optout') IS NOT NULL THEN + RAISE EXCEPTION 'TEST FAILED (gsp_optout revoke): expected no search_path setting, found %', pg_temp._gsp_path('gsp_optout'); + END IF; +END $assert_revoke$; + +-- Cleanup +DO $cleanup$ +DECLARE + role_name TEXT; +BEGIN + FOREACH role_name IN ARRAY ARRAY['gsp_default', 'gsp_optout', 'gsp_existing', 'gsp_idem'] + LOOP + BEGIN + EXECUTE format('DROP OWNED BY %I', role_name); + EXCEPTION + WHEN undefined_object THEN NULL; + END; + EXECUTE format('DROP ROLE IF EXISTS %I', role_name); + END LOOP; +END $cleanup$; + +DROP FUNCTION pg_temp._gsp_has(text, text); +DROP FUNCTION pg_temp._gsp_df_count(text); +DROP FUNCTION pg_temp._gsp_path(text); + +SELECT 'TEST PASSED' AS result; From a03d07e94a64f884bb97dab7be8a88311e31b1de Mon Sep 17 00:00:00 2001 From: crprashant <5108573+crprashant@users.noreply.github.com> Date: Thu, 11 Jun 2026 05:42:45 -0700 Subject: [PATCH 3/4] Fix pg_regress tests for operators moved to df schema Add df to the database-level search_path in 00_init so the unqualified operator syntax (~>, |=>, &, |, ?>, !>, @>) resolves in the regression tests after the operators moved from public to df. Use ALTER DATABASE current_database() in a DO/format() block so it works for both the contrib_regression and postgres databases used by the CI jobs. A per-role setting is not sufficient because the tests use SET ROLE, which does not reload role-level search_path. Update expected/00_init.out for the new df.grant_usage search_path NOTICE and the appended search_path setup block. --- expected/00_init.out | 12 ++++++++++++ sql/00_init.sql | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/expected/00_init.out b/expected/00_init.out index d7980f4b..58671b6b 100644 --- a/expected/00_init.out +++ b/expected/00_init.out @@ -65,6 +65,7 @@ BEGIN END IF; END $$; SELECT df.grant_usage('df_regress_user'); +NOTICE: pg_durable: set search_path for "df_regress_user" to "$user", public, df NOTICE: pg_durable: granted df usage privileges to "df_regress_user" grant_usage ------------- @@ -72,3 +73,14 @@ NOTICE: pg_durable: granted df usage privileges to "df_regress_user" (1 row) GRANT CREATE ON SCHEMA public TO df_regress_user; +-- The DSL operators (~>, |=>, &, |, ?>, !>, @>) live in the df schema and are +-- resolved in the caller's session before df.start()/df.explain() run, so df +-- must be on the database search_path for the unqualified operator syntax used +-- by the operator tests below to resolve. Set it at the database level so each +-- pg_regress test connection picks it up. A per-role setting (e.g. from +-- df.grant_usage) is not sufficient here because the tests use SET ROLE, which +-- does not reload role-level search_path settings. +DO $$ +BEGIN + EXECUTE format('ALTER DATABASE %I SET search_path = "$user", public, df', current_database()); +END $$; diff --git a/sql/00_init.sql b/sql/00_init.sql index 379f21c0..0b9bb104 100644 --- a/sql/00_init.sql +++ b/sql/00_init.sql @@ -60,3 +60,15 @@ BEGIN END $$; SELECT df.grant_usage('df_regress_user'); GRANT CREATE ON SCHEMA public TO df_regress_user; + +-- The DSL operators (~>, |=>, &, |, ?>, !>, @>) live in the df schema and are +-- resolved in the caller's session before df.start()/df.explain() run, so df +-- must be on the database search_path for the unqualified operator syntax used +-- by the operator tests below to resolve. Set it at the database level so each +-- pg_regress test connection picks it up. A per-role setting (e.g. from +-- df.grant_usage) is not sufficient here because the tests use SET ROLE, which +-- does not reload role-level search_path settings. +DO $$ +BEGIN + EXECUTE format('ALTER DATABASE %I SET search_path = "$user", public, df', current_database()); +END $$; From 9126be149338ff2fbfe2637036e9376de2ba0921 Mon Sep 17 00:00:00 2001 From: crprashant <5108573+crprashant@users.noreply.github.com> Date: Thu, 11 Jun 2026 06:12:55 -0700 Subject: [PATCH 4/4] Re-trigger CI (flaky external httpbin.org test in 06_http_and_ssrf)