From 7eafacf60422a3dcdaacb3c386cec8c7aa4278da Mon Sep 17 00:00:00 2001 From: pinodeca Date: Fri, 19 Jun 2026 18:03:30 +0000 Subject: [PATCH] Anchor extension to pg_catalog; create df via fail-loud bootstrap Switch the control file to schema = pg_catalog and create the df and _duroxide schemas in a pgrx bootstrap block using plain CREATE SCHEMA (no IF NOT EXISTS), so a pre-existing/squatted df schema makes CREATE EXTENSION fail loudly instead of being silently adopted. - Move df.duroxide_schema() into the bootstrap block alongside the CREATE SCHEMA statements and drop the separate create_duroxide_schema block. - Remove IF NOT EXISTS from the install-script tables (df.nodes, df.instances, df.vars, df._worker_epoch) and indexes so install fails loud on conflicts. - Keep #[pg_schema] mod df {} (required for df-qualified externs); its redundant CREATE SCHEMA IF NOT EXISTS df is now a no-op. Update the pgspot PS010 allowlist comment to explain why it remains. Tests: unit (183), 12_extension_lifecycle anti-squat, fmt, clippy, pgspot gate all pass. --- pg_durable.control | 15 +++------ scripts/run-pgspot.sh | 12 +++++-- src/lib.rs | 73 +++++++++++++++++++++---------------------- 3 files changed, 48 insertions(+), 52 deletions(-) diff --git a/pg_durable.control b/pg_durable.control index 3640796b..fee94ed5 100644 --- a/pg_durable.control +++ b/pg_durable.control @@ -4,15 +4,8 @@ comment = 'pg_durable: SQL-native durable orchestrations for PostgreSQL' default_version = '@CARGO_VERSION@' module_pathname = 'pg_durable' +schema = pg_catalog +# pgrx's control-file parser requires `relocatable` and `superuser` to be +# present explicitly (they are otherwise PostgreSQL defaults). relocatable = false -superuser = true -trusted = false -# Note: 'schema' is intentionally omitted. This extension manages two schemas -# (df and the duroxide provider schema), and PostgreSQL's control file only -# supports a single schema directive. The df schema is created by pgrx -# (#[pg_schema]); the duroxide provider schema is created by the -# create_duroxide_schema extension_sql block in src/lib.rs (named '_duroxide' -# on fresh installs, or the legacy 'duroxide' on installs upgraded from -# <= 0.2.2). relocatable = false prevents schema relocation attacks. - - +superuser = true \ No newline at end of file diff --git a/scripts/run-pgspot.sh b/scripts/run-pgspot.sh index 6ae6f6fe..cd15e5d1 100755 --- a/scripts/run-pgspot.sh +++ b/scripts/run-pgspot.sh @@ -28,9 +28,15 @@ PGSPOT_VENV="${PGSPOT_VENV:-${XDG_CACHE_HOME:-$HOME/.cache}/pg_durable/pgspot-ve # future unsafe instance of the same code still fails. Anything unmatched -- plus # unknowns, fatals, and unexplained non-zero exits -- fails the gate. PGSPOT_ALLOW=( - # pgrx emits `CREATE SCHEMA IF NOT EXISTS df` from #[pg_schema]; the IF NOT - # EXISTS (what PS010 flags) isn't controllable from source. Only df is allowed; - # any other PS010 still fails. Schemas we control omit IF NOT EXISTS. + # The real df schema is created by our `bootstrap` block as a plain + # `CREATE SCHEMA df` (no IF NOT EXISTS), so squatting fails loud. pgrx then + # additionally emits a redundant `CREATE SCHEMA IF NOT EXISTS df` from + # #[pg_schema] (required so df-qualified externs resolve); that line is a pure + # no-op since bootstrap already created df. The IF NOT EXISTS string isn't + # controllable from source and the line can't be removed without + # post-processing the generated SQL, so we allow this single PS010 finding. + # Only df is allowed; any other PS010 still fails. Schemas we control omit + # IF NOT EXISTS. '^PS010: Unsafe schema creation: df at line [0-9]+$' # pg_durable's DSL intentionally exposes unqualified custom operators (for # example, `df.sql(...) ~> df.sql(...)`) so users do not need df in search_path. diff --git a/src/lib.rs b/src/lib.rs index edb0a657..76b26a32 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -142,7 +142,35 @@ pub extern "C-unwind" fn _PG_init() { // Schema Declaration // ============================================================================ -/// The 'df' schema contains all pg_durable functions (df = durable functions) +// Create both extension-owned schemas as the very first statements of the +// install script. `bootstrap` guarantees this runs before every other extension +// object, including the redundant `CREATE SCHEMA IF NOT EXISTS df` that pgrx +// emits for the `#[pg_schema] mod df` entity below. +extension_sql!( + r#" +CREATE SCHEMA df; +CREATE SCHEMA _duroxide; + +-- Returns the name of the duroxide provider schema selected for this install. +-- Fresh installs return '_duroxide'. The body is version-specific: the upgrade +-- script pg_durable--0.2.2--0.2.3.sql replaces it to return 'duroxide' for +-- installs that originated on pg_durable <= 0.2.2 (which keep the legacy +-- 'duroxide' schema). Both backend sessions and the background worker call +-- df.duroxide_schema() to discover which schema to use, falling back to +-- 'duroxide' when the helper is absent (installs predating it). +CREATE FUNCTION df.duroxide_schema() RETURNS text + LANGUAGE sql IMMUTABLE PARALLEL SAFE + SET search_path = pg_catalog, pg_temp + AS $$ SELECT '_duroxide'::text $$; +"#, + name = "bootstrap_schemas", + bootstrap +); + +/// The 'df' schema contains all pg_durable functions (df = durable functions). +/// pgrx requires this entity so that `#[pg_extern(schema = "df")]` functions can +/// resolve their target schema. It emits a redundant `CREATE SCHEMA IF NOT +/// EXISTS df` that no-ops after the bootstrap block above has already created df. #[pg_schema] mod df {} @@ -153,7 +181,7 @@ mod df {} extension_sql!( r#" -- Table to store function nodes (SQL steps, THEN chains, etc.) -CREATE TABLE IF NOT EXISTS df.nodes ( +CREATE TABLE df.nodes ( id VARCHAR(8) PRIMARY KEY, instance_id VARCHAR(8), node_type TEXT NOT NULL, @@ -174,7 +202,7 @@ COMMENT ON COLUMN df.nodes.submitted_by IS 'Effective role (current_user) at df.start() time - used for connection authentication and SQL execution'; -- Table to store function instances -CREATE TABLE IF NOT EXISTS df.instances ( +CREATE TABLE df.instances ( id VARCHAR(8) PRIMARY KEY, label TEXT, root_node VARCHAR(8) NOT NULL, @@ -190,14 +218,14 @@ COMMENT ON COLUMN df.instances.submitted_by IS 'Effective role (current_user) at df.start() time - used for connection authentication and SQL execution'; -- Index for finding pending instances -CREATE INDEX IF NOT EXISTS idx_instances_status ON df.instances(status); +CREATE INDEX idx_instances_status ON df.instances(status); -- Index for finding nodes by instance -CREATE INDEX IF NOT EXISTS idx_nodes_instance ON df.nodes(instance_id); +CREATE INDEX idx_nodes_instance ON df.nodes(instance_id); -- Table to store workflow variables (captured at df.start()) -- Per-user scoping: each user has their own variable namespace. -CREATE TABLE IF NOT EXISTS df.vars ( +CREATE TABLE df.vars ( name TEXT NOT NULL, value TEXT, owner REGROLE NOT NULL DEFAULT pg_catalog.quote_ident(current_user)::pg_catalog.regrole, @@ -208,7 +236,7 @@ CREATE TABLE IF NOT EXISTS df.vars ( -- initialising. If the extension is DROP-ed and re-CREATEd between -- two poll ticks the epoch row disappears, so the worker detects the -- recreation even though the extension is always "present" in pg_extension. -CREATE TABLE IF NOT EXISTS df._worker_epoch ( +CREATE TABLE df._worker_epoch ( epoch_id UUID PRIMARY KEY, started_at TIMESTAMPTZ DEFAULT pg_catalog.now(), last_seen_at TIMESTAMPTZ DEFAULT pg_catalog.now() @@ -519,37 +547,6 @@ END $$; requires = [df] ); -// ============================================================================ -// Duroxide Schema -// ============================================================================ - -extension_sql!( - r#" --- The duroxide provider schema is created here so the extension owns it. --- No IF NOT EXISTS: fails loudly if a _duroxide schema already exists, --- preventing adoption of a potentially attacker-crafted schema. --- The background worker populates this schema at startup via ApplyAll. --- --- Fresh installs use the '_duroxide' schema. Installs that originated on --- pg_durable <= 0.2.2 keep the legacy 'duroxide' schema; the upgrade script --- pg_durable--0.2.2--0.2.3.sql defines df.duroxide_schema() to return --- 'duroxide' for those installs. Both backend sessions and the background --- worker call df.duroxide_schema() to discover which schema to use, falling --- back to 'duroxide' when the helper is absent (installs predating it). -CREATE SCHEMA _duroxide; - --- Returns the name of the duroxide provider schema selected for this install. --- Fresh installs return '_duroxide'. The body is version-specific: the upgrade --- script for pre-existing installs replaces it to return 'duroxide'. -CREATE FUNCTION df.duroxide_schema() RETURNS text - LANGUAGE sql IMMUTABLE PARALLEL SAFE - SET search_path = pg_catalog, pg_temp - AS $$ SELECT '_duroxide'::text $$; -"#, - name = "create_duroxide_schema", - requires = ["validate_database"] -); - // ============================================================================ // SQL Operators // ============================================================================