From 12b50e4b196b1d9cf3db6a8e849cccc876ba32b2 Mon Sep 17 00:00:00 2001 From: crprashant <5108573+crprashant@users.noreply.github.com> Date: Mon, 15 Jun 2026 18:16:11 -0700 Subject: [PATCH] Implement full UUID instance/node IDs (#129) Replace the collision-prone 8-character short IDs (32 bits of entropy, ~50% PK-collision probability near 65k instances) with full canonical UUIDs (122 bits) for df.instances and df.nodes IDs. - types.rs: add full_uuid(); retain short_id() for the not-yet-upgraded pre-0.2.3 schema - dsl.rs: gate ID format on full_uuid_ids_enabled() (extversion >= 0.2.3) and thread the choice through start()/insert_nodes() so node IDs and node references also become UUIDs - lib.rs: widen the six id columns to TEXT and relax the *_format_chk regexes to accept a legacy 8-char id or a canonical UUID; add a node-id format test - explain.rs: classify both legacy 8-char and canonical 36-char UUID instance IDs - load_function_graph.rs: cast the id columns to text in the BGW's cached SELECTs so an ALTER EXTENSION UPDATE that widens them to TEXT under a live pooled connection cannot trip "cached plan must not change result type" - fold the UUID-widening DDL (drop FKs/checks, ALTER COLUMN TYPE text, re-add checks/FKs NOT VALID) into the existing, still-unreleased sql/pg_durable--0.2.2--0.2.3.sql upgrade script instead of minting a new version; no Cargo version bump - docs: USER_GUIDE, ARCHITECTURE, upgrade-testing (incl. an operator note on running VALIDATE CONSTRAINT later) and a CHANGELOG entry Backward compatible: the 0.2.3 .so keeps emitting 8-char IDs against the not-yet-upgraded 0.2.2 schema until ALTER EXTENSION UPDATE runs (B1); existing 8-char rows stay valid and coexist with UUIDs (B2). Verified locally: unit (171), e2e, and upgrade (Scenario A + B1 + B2) suites pass; pgspot clean. --- CHANGELOG.md | 6 + USER_GUIDE.md | 17 ++- docs/ARCHITECTURE.md | 8 +- docs/upgrade-testing.md | 22 +++ sql/pg_durable--0.2.2--0.2.3.sql | 127 ++++++++++++++++++ src/activities/load_function_graph.rs | 16 ++- src/dsl.rs | 41 +++++- src/explain.rs | 13 +- src/lib.rs | 74 +++++++--- src/types.rs | 19 ++- tests/e2e/sql/45_connection_limit_timeout.sql | 16 ++- 11 files changed, 319 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc7f79a2..f2e64b14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ Pre-1.0 note: while `pg_durable` is in major version `0`, minor releases may include breaking changes. +## [0.2.3] - Unreleased + +### Changed + +- **Full UUID instance/node IDs:** `df.start()` now generates full 36-character canonical UUIDs (122 bits of entropy) for instance and node IDs instead of the legacy 8-character form (32 bits, ~50% birthday-paradox collision chance near 65k instances on the `df.instances.id` primary key). The six id columns widen from `VARCHAR(8)` to `TEXT` and their `*_format_chk` CHECK constraints relax to accept a legacy 8-character id or a full UUID, so rows created before an in-place upgrade stay valid. A version gate keeps a new `.so` emitting 8-character ids against not-yet-upgraded schemas (Scenario B1). Upgrade DDL is in `sql/pg_durable--0.2.2--0.2.3.sql` (#129). + ## [0.2.2] - 2026-05-28 First open-source release of `pg_durable` on GitHub under the PostgreSQL License. diff --git a/USER_GUIDE.md b/USER_GUIDE.md index 5a86fade..b26e96bc 100644 --- a/USER_GUIDE.md +++ b/USER_GUIDE.md @@ -88,7 +88,7 @@ After `CREATE EXTENSION`, the background worker initializes the engine schema as ```sql -- Execute a simple SQL query as a durable function SELECT df.start('SELECT ''Hello, durable world!'''); --- Returns: a1b2c3d4 (8-character instance ID) +-- Returns: 2f1c4d8e-9a3b-4c7d-8e1f-0a1b2c3d4e5f (UUID instance ID) ``` ### Check the Result @@ -97,8 +97,8 @@ SELECT df.start('SELECT ''Hello, durable world!'''); -- List all functions SELECT * FROM df.list_instances(); --- Get result of a specific instance -SELECT df.result('a1b2c3d4'); +-- Get result of a specific instance (use the instance_id returned above) +SELECT df.result('2f1c4d8e-9a3b-4c7d-8e1f-0a1b2c3d4e5f'); ``` --- @@ -127,10 +127,13 @@ SELECT df.result('a1b2c3d4'); ### Instance IDs -Every durable function gets a unique 8-character hex ID (e.g., `a1b2c3d4`). Use this ID to: -- Check status: `SELECT df.status('a1b2c3d4')` -- Get result: `SELECT df.result('a1b2c3d4')` -- Cancel: `SELECT df.cancel('a1b2c3d4')` +Every durable function gets a unique UUID (e.g., `2f1c4d8e-9a3b-4c7d-8e1f-0a1b2c3d4e5f`). Use this ID to: +- Check status: `SELECT df.status('2f1c4d8e-9a3b-4c7d-8e1f-0a1b2c3d4e5f')` +- Get result: `SELECT df.result('2f1c4d8e-9a3b-4c7d-8e1f-0a1b2c3d4e5f')` +- Cancel: `SELECT df.cancel('2f1c4d8e-9a3b-4c7d-8e1f-0a1b2c3d4e5f')` + +> **Note:** Instances created before upgrading to 0.2.3 keep their original +> 8-character IDs (e.g., `a1b2c3d4`); both formats remain valid. ### Durability diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index ea397c88..e2d47771 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -220,13 +220,13 @@ When serialized to JSON: ```sql CREATE TABLE df.nodes ( - id VARCHAR(8) PRIMARY KEY, - instance_id VARCHAR(8), -- Set by df.start() + id TEXT PRIMARY KEY, -- Full UUID (0.2.3+); legacy 8-char rows accepted after upgrade + instance_id TEXT, -- Set by df.start() node_type TEXT NOT NULL, -- SQL, THEN, IF, JOIN, LOOP, etc. query TEXT, -- SQL query or config JSON result_name TEXT, -- Named result for $variable substitution - left_node VARCHAR(8), -- Left child ID - right_node VARCHAR(8), -- Right child ID + left_node TEXT, -- Left child ID + right_node TEXT, -- Right child ID status TEXT DEFAULT 'pending', result JSONB, created_at TIMESTAMPTZ DEFAULT now() diff --git a/docs/upgrade-testing.md b/docs/upgrade-testing.md index 51059edd..fa09c5dd 100644 --- a/docs/upgrade-testing.md +++ b/docs/upgrade-testing.md @@ -205,6 +205,28 @@ what the upgrade script handles, and any backward compatibility considerations. ### v0.2.2 → v0.2.3 +#### #129 Full UUID instance/node IDs +- **DDL change:** The six id columns — `df.instances.id`, `df.instances.root_node`, `df.nodes.id`, `df.nodes.instance_id`, `df.nodes.left_node`, `df.nodes.right_node` — widen from `VARCHAR(8)` to `TEXT`, and their six `*_format_chk` CHECK constraints relax from `^[0-9a-f]{8}$` to `^[0-9a-f]{8}(-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$`, which accepts a legacy 8-character id **or** a full canonical UUID. The legacy short id carried only 32 bits of entropy (~50% PK-collision chance near 65k instances); 0.2.3 generates full UUIDs (122 bits). The fresh-install DDL lives in `src/lib.rs`; the upgrade script is `sql/pg_durable--0.2.2--0.2.3.sql`. +- **Upgrade mechanics:** The id columns participate in four composite foreign keys (`nodes_instance_identity_fkey`, `nodes_left_node_same_instance_fkey`, `nodes_right_node_same_instance_fkey`, `instances_root_node_same_instance_fkey`), so the upgrade drops those FKs and the six format checks, runs `ALTER COLUMN ... TYPE text` (binary-coercible — the PRIMARY KEY / UNIQUE indexes are rebuilt in place, not dropped), then re-adds the format checks (new regex) and the FKs. All re-added constraints use the exact names, `NOT VALID`, and `DEFERRABLE INITIALLY DEFERRED` of a fresh install. +- **Scenario A considerations:** The upgrade reproduces the fresh-install constraints byte-for-byte (same names, same `NOT VALID` state, same FK definitions) and `varchar(8) → text` leaves `atttypmod = -1` just like a fresh `TEXT` column, so the `df` schema snapshot matches on both paths. +- **Scenario B1 considerations:** A 0.2.3 `.so` must keep emitting 8-character ids against pre-0.2.3 schemas (whose `VARCHAR(8)` columns and `^[0-9a-f]{8}$` checks would reject a UUID). `df.start()` gates id generation on `full_uuid_ids_enabled()` (`src/dsl.rs`), which returns true only when the installed extension version is `>= 0.2.3`. On older schemas it returns false and `short_id()` is used; on fresh/upgraded 0.2.3 schemas `full_uuid()` is used. This mirrors the existing `owner_scoped_vars_enabled()` / `legacy_login_role_schema()` version gates. +- **Scenario B2 considerations:** No data migration. Rows created before the upgrade keep their 8-character ids, which still satisfy the relaxed CHECK regex, so the new and old id formats coexist and in-flight BGW state stays valid. The `NOT VALID` re-add means existing rows are not rescanned. +- **Operational note — validating constraints later:** Because the relaxed CHECK regex also matches every legacy 8-character id, all pre-existing rows already conform. An operator who wants to drop the `NOT VALID` marker (so the constraints show as validated in the catalog) can run `ALTER TABLE df.instances VALIDATE CONSTRAINT instances_id_format_chk` (and the analogous statements for the other five `*_format_chk` constraints) at any later time; the validating scan cannot fail because every existing id already matches. This is optional — `NOT VALID` constraints are still enforced for new rows. +- **Operational note — lock window:** `ALTER COLUMN ... TYPE text` takes an `ACCESS EXCLUSIVE` lock on `df.nodes` and `df.instances`. The `varchar(8) → text` conversion is binary-coercible so there is no full table rewrite, but the `PRIMARY KEY`/`UNIQUE` indexes are rebuilt in place and the dropped/re-added foreign keys briefly lock both tables. The work scales with retained history (`df.nodes`/`df.instances` row counts), so run `ALTER EXTENSION pg_durable UPDATE` in a maintenance window and consider pausing `df.start()` traffic on installations with very large `df.nodes` tables. +- **Operational note — dependent objects:** Re-typing the six id columns will fail if a user-created view, rule, generated column, or SQL function depends on those columns. Check for dependencies before upgrading and drop/recreate them around the update: + ```sql + SELECT DISTINCT dependent_ns.nspname AS schema, dependent.relname AS object + FROM pg_depend d + JOIN pg_rewrite r ON r.oid = d.objid + JOIN pg_class dependent ON dependent.oid = r.ev_class + JOIN pg_namespace dependent_ns ON dependent_ns.oid = dependent.relnamespace + JOIN pg_class src ON src.oid = d.refobjid + JOIN pg_namespace src_ns ON src_ns.oid = src.relnamespace + WHERE src_ns.nspname = 'df' AND src.relname IN ('nodes', 'instances') + AND dependent.relname NOT IN ('nodes', 'instances'); + ``` +- **Constraint-drift note:** The id-format CHECK regex is maintained byte-identically in two places — the fresh-install DDL (`src/lib.rs`) and the upgrade script — and is validated by `pgspot` plus the functional B1/B2 tests, which insert real 8-character and UUID ids and assert accept/reject behavior. The `df` schema snapshot in `scripts/test-upgrade.sh` compares column `data_type` and constraint names but not CHECK bodies or FK deferrability flags; pinning those via `pg_get_constraintdef()`/`convalidated` is a possible future hardening of the schema comparison. + #### Rename duroxide provider schema to `_duroxide` for fresh installs - **DDL change (df schema):** Adds `df.duroxide_schema()`, an `IMMUTABLE`/`PARALLEL SAFE` SQL function that returns the name of the schema holding the duroxide provider objects. Fresh 0.2.3 installs create the function (in `src/lib.rs`) returning `'_duroxide'`; the upgrade script `sql/pg_durable--0.2.2--0.2.3.sql` creates the same function returning `'duroxide'` so pre-existing installs keep using the legacy schema. Both bodies set `search_path = pg_catalog, pg_temp` to satisfy the pgspot gate. - **DDL change (provider schema):** Fresh installs now run `CREATE SCHEMA _duroxide` (was `CREATE SCHEMA duroxide`). The upgrade script does **not** rename, drop, or move the existing `duroxide` schema — renaming an in-use provider schema would orphan the BGW's durable state. Upgraded installs therefore continue to use `duroxide`. diff --git a/sql/pg_durable--0.2.2--0.2.3.sql b/sql/pg_durable--0.2.2--0.2.3.sql index 470d4d17..485fefbd 100644 --- a/sql/pg_durable--0.2.2--0.2.3.sql +++ b/sql/pg_durable--0.2.2--0.2.3.sql @@ -3,6 +3,18 @@ -- pg_durable upgrade: 0.2.2 → 0.2.3 -- +-- This upgrade carries two independent schema changes: +-- 1. df.duroxide_schema() — reports which schema holds the duroxide provider +-- objects for this install (provider schema rename for fresh installs). +-- 2. Full UUID instance/node ids (#129) — widens the id columns to TEXT and +-- relaxes their format CHECKs to accept a full canonical UUID. +-- The two changes touch disjoint objects (a df function vs. the df.nodes / +-- df.instances columns), so the order below does not matter. + +-- ============================================================================ +-- Change 1: df.duroxide_schema() +-- ============================================================================ +-- -- Introduces df.duroxide_schema(), a helper that reports which schema holds the -- duroxide provider objects for this install. Fresh 0.2.3 installs create the -- provider objects in the '_duroxide' schema (see lib.rs). Installs upgrading @@ -19,3 +31,118 @@ CREATE FUNCTION df.duroxide_schema() RETURNS text LANGUAGE sql IMMUTABLE PARALLEL SAFE SET search_path = pg_catalog, pg_temp AS $$ SELECT 'duroxide'::text $$; + +-- ============================================================================ +-- Change 2: Full UUID instance/node ids (#129) +-- ============================================================================ +-- +-- Widens the instance/node id columns from VARCHAR(8) to TEXT and relaxes the +-- id-format CHECK constraints so they accept a full canonical UUID in addition +-- to the legacy 8-character form. See issue #129: the old 8-hex ids carry only +-- 32 bits of entropy, giving a ~50% birthday-paradox collision chance around +-- 65k instances on the df.instances.id primary key. From 0.2.3 the extension +-- generates full UUIDs (122 bits) for new instances and nodes. +-- +-- Backward compatibility: rows created before this upgrade keep their 8-char +-- ids, which still satisfy the new CHECK regex, so no data is rewritten and the +-- background worker's in-flight state stays valid. The new .so only emits full +-- UUIDs once the installed extension version reports >= 0.2.3 (see +-- full_uuid_ids_enabled() in src/dsl.rs), so a .so upgraded ahead of this +-- script keeps producing 8-char ids that the old VARCHAR(8) columns accept. +-- +-- The id columns participate in composite foreign keys, so the constraints are +-- dropped before the type change and re-added afterwards. varchar(8) -> text is +-- binary-coercible, so the supporting PRIMARY KEY / UNIQUE indexes are rebuilt +-- in place by ALTER COLUMN TYPE and do not need to be dropped. The format CHECK +-- and FOREIGN KEY constraints are re-created exactly as a fresh 0.2.3 install +-- defines them (same names, NOT VALID, DEFERRABLE INITIALLY DEFERRED) so an +-- in-place upgrade yields a schema identical to a fresh install (Scenario A). +-- +-- Operators are schema-qualified (OPERATOR(pg_catalog.~)) so name resolution +-- never depends on the session search_path, closing the CVE-2018-1058 vector. +-- Enforced by the pgspot CI gate (scripts/pgspot-gate.sh). + +-- ---------------------------------------------------------------------------- +-- 1. Drop the composite foreign keys that span the id columns being widened. +-- ---------------------------------------------------------------------------- +ALTER TABLE df.nodes + DROP CONSTRAINT IF EXISTS nodes_instance_identity_fkey, + DROP CONSTRAINT IF EXISTS nodes_left_node_same_instance_fkey, + DROP CONSTRAINT IF EXISTS nodes_right_node_same_instance_fkey; + +ALTER TABLE df.instances + DROP CONSTRAINT IF EXISTS instances_root_node_same_instance_fkey; + +-- ---------------------------------------------------------------------------- +-- 2. Drop the old 8-hex-only id-format CHECK constraints. +-- ---------------------------------------------------------------------------- +ALTER TABLE df.instances + DROP CONSTRAINT IF EXISTS instances_id_format_chk, + DROP CONSTRAINT IF EXISTS instances_root_node_format_chk; + +ALTER TABLE df.nodes + DROP CONSTRAINT IF EXISTS nodes_id_format_chk, + DROP CONSTRAINT IF EXISTS nodes_instance_id_format_chk, + DROP CONSTRAINT IF EXISTS nodes_left_node_format_chk, + DROP CONSTRAINT IF EXISTS nodes_right_node_format_chk; + +-- ---------------------------------------------------------------------------- +-- 3. Widen the id columns to TEXT (binary-coercible; indexes rebuilt in place). +-- ---------------------------------------------------------------------------- +ALTER TABLE df.instances + ALTER COLUMN id TYPE text, + ALTER COLUMN root_node TYPE text; + +ALTER TABLE df.nodes + ALTER COLUMN id TYPE text, + ALTER COLUMN instance_id TYPE text, + ALTER COLUMN left_node TYPE text, + ALTER COLUMN right_node TYPE text; + +-- ---------------------------------------------------------------------------- +-- 4. Re-add the id-format CHECK constraints, now accepting a legacy 8-char id +-- OR a full canonical UUID. NOT VALID so existing rows are not rescanned; +-- new rows are still validated. Existing rows already conform (their 8-char +-- ids match the relaxed regex), so an operator may later run +-- `ALTER TABLE df.instances VALIDATE CONSTRAINT instances_id_format_chk` (and +-- the other five constraints) in a maintenance window to drop the NOT VALID +-- marker; the scan cannot fail because every existing id already matches. +-- ---------------------------------------------------------------------------- +ALTER TABLE df.instances + ADD CONSTRAINT instances_id_format_chk + CHECK (id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}(-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$') NOT VALID, + ADD CONSTRAINT instances_root_node_format_chk + CHECK (root_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}(-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$') NOT VALID; + +ALTER TABLE df.nodes + ADD CONSTRAINT nodes_id_format_chk + CHECK (id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}(-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$') NOT VALID, + ADD CONSTRAINT nodes_instance_id_format_chk + CHECK (instance_id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}(-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$') NOT VALID, + ADD CONSTRAINT nodes_left_node_format_chk + CHECK (left_node IS NULL OR left_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}(-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$') NOT VALID, + ADD CONSTRAINT nodes_right_node_format_chk + CHECK (right_node IS NULL OR right_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}(-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$') NOT VALID; + +-- ---------------------------------------------------------------------------- +-- 5. Re-add the composite foreign keys, identical to a fresh 0.2.3 install. +-- ---------------------------------------------------------------------------- +ALTER TABLE df.nodes + ADD CONSTRAINT nodes_instance_identity_fkey + FOREIGN KEY (instance_id, submitted_by) + REFERENCES df.instances (id, submitted_by) + DEFERRABLE INITIALLY DEFERRED NOT VALID, + ADD CONSTRAINT nodes_left_node_same_instance_fkey + FOREIGN KEY (instance_id, left_node) + REFERENCES df.nodes (instance_id, id) + DEFERRABLE INITIALLY DEFERRED NOT VALID, + ADD CONSTRAINT nodes_right_node_same_instance_fkey + FOREIGN KEY (instance_id, right_node) + REFERENCES df.nodes (instance_id, id) + DEFERRABLE INITIALLY DEFERRED NOT VALID; + +ALTER TABLE df.instances + ADD CONSTRAINT instances_root_node_same_instance_fkey + FOREIGN KEY (id, root_node) + REFERENCES df.nodes (instance_id, id) + DEFERRABLE INITIALLY DEFERRED NOT VALID; diff --git a/src/activities/load_function_graph.rs b/src/activities/load_function_graph.rs index 14d2239e..f0e06d59 100644 --- a/src/activities/load_function_graph.rs +++ b/src/activities/load_function_graph.rs @@ -31,7 +31,15 @@ pub async fn execute( "Loading function graph for instance: {instance_id}" )); - let instance_query = "SELECT root_node, r.rolname AS submitted_by + // Cast the id-typed columns (root_node here; id/left_node/right_node in the + // nodes query below) to text so this prepared statement's result descriptor + // is identical whether the columns are still VARCHAR(8) (a pre-0.2.3 schema + // or a not-yet-upgraded instance) or TEXT (0.2.3+). The BGW holds long-lived + // pooled connections with cached prepared statements; without the cast, an + // ALTER EXTENSION UPDATE that widens these columns to TEXT under a live + // connection makes the next execute fail with "cached plan must not change + // result type". See sql/pg_durable--0.2.2--0.2.3.sql. + let instance_query = "SELECT root_node::text AS root_node, r.rolname AS submitted_by FROM df.instances i LEFT JOIN pg_catalog.pg_roles r ON r.oid = i.submitted_by::oid WHERE i.id = $1"; @@ -102,8 +110,10 @@ pub async fn execute( } } - let nodes_query = r#"SELECT n.id, n.node_type, n.query, n.result_name, - n.left_node, n.right_node, + // id/left_node/right_node cast to text for the same cached-plan-stability + // reason documented on instance_query above. + let nodes_query = r#"SELECT n.id::text AS id, n.node_type, n.query, n.result_name, + n.left_node::text AS left_node, n.right_node::text AS right_node, r.rolname AS submitted_by, n.database FROM df.nodes n diff --git a/src/dsl.rs b/src/dsl.rs index 9e2c95e2..7f42ef29 100644 --- a/src/dsl.rs +++ b/src/dsl.rs @@ -14,7 +14,7 @@ use std::time::Instant; use crate::client::start_durable_function; use crate::types::{ - mark_non_future_helper_call, short_id, validate_result_name, Durofut, FunctionInput, + full_uuid, mark_non_future_helper_call, short_id, validate_result_name, Durofut, FunctionInput, }; /// Check if we're running inside a workflow context (background worker connection). @@ -118,6 +118,33 @@ fn legacy_login_role_schema() -> bool { !owner_scoped_vars_enabled() } +/// Returns true when the installed schema can hold full UUID instance/node IDs +/// (0.2.3+). Earlier schemas declare the id columns as `VARCHAR(8)` with an +/// `^[0-9a-f]{8}$` CHECK, so the new `.so` must keep generating 8-character IDs +/// for them (Scenario B1) until the customer runs `ALTER EXTENSION UPDATE`. +/// See #129 — full UUIDs make the 32-bit collision risk of short IDs negligible. +fn full_uuid_ids_enabled() -> bool { + let extversion = installed_extension_version(); + let ext_semver = parse_semver(&extversion).unwrap_or_else(|| { + pgrx::error!( + "Unsupported pg_durable extension version format: {}", + extversion + ) + }); + + ext_semver >= (0, 2, 3) +} + +/// Generate an instance/node ID in the format the installed schema accepts: +/// a full UUID on 0.2.3+ schemas, or a legacy 8-character ID on older ones. +fn new_id(full_uuid_ids: bool) -> String { + if full_uuid_ids { + full_uuid() + } else { + short_id() + } +} + /// Sets a workflow variable. Must be called BEFORE df.start(), not inside a workflow. /// Variables are captured at df.start() and remain immutable during execution. /// Each user has their own variable namespace (owner = current_user). @@ -642,7 +669,10 @@ pub fn start( if let Err(e) = durofut.validate_recursive() { pgrx::error!("Invalid durable function graph: {}", e); } - let instance_id = short_id(); + // Choose the ID format the installed schema accepts: full UUIDs on 0.2.3+ + // schemas, legacy 8-character IDs on older ones (Scenario B1). See #129. + let full_uuid_ids = full_uuid_ids_enabled(); + let instance_id = new_id(full_uuid_ids); // Validate that the target database exists (if specified) if let Some(db) = database { @@ -721,6 +751,7 @@ pub fn start( current_user_oid: pgrx::pg_sys::Oid, database: Option<&str>, legacy_login_role: bool, + full_uuid_ids: bool, node_count: &mut usize, ) -> String { *node_count += 1; @@ -731,7 +762,7 @@ pub fn start( crate::types::MAX_GRAPH_NODES ); } - let node_id = short_id(); + let node_id = new_id(full_uuid_ids); // Recursively insert children FIRST to get their IDs let left_id = node.left_node.as_ref().map(|n| { @@ -741,6 +772,7 @@ pub fn start( current_user_oid, database, legacy_login_role, + full_uuid_ids, node_count, ) }); @@ -751,6 +783,7 @@ pub fn start( current_user_oid, database, legacy_login_role, + full_uuid_ids, node_count, ) }); @@ -763,6 +796,7 @@ pub fn start( current_user_oid, database, legacy_login_role, + full_uuid_ids, node_count, )) }) { @@ -845,6 +879,7 @@ pub fn start( current_user_oid, database, legacy_login_role, + full_uuid_ids, &mut node_count, ); diff --git a/src/explain.rs b/src/explain.rs index 2ac8c3b6..efd5a05d 100644 --- a/src/explain.rs +++ b/src/explain.rs @@ -37,8 +37,17 @@ struct ExplainNode { pub fn explain(input: &str) -> String { let trimmed = input.trim(); - // Detect if input is an instance_id (8 hex chars) or a DSL expression - let is_instance_id = trimmed.len() == 8 && trimmed.chars().all(|c| c.is_ascii_hexdigit()); + // Detect whether the input is an instance_id or a DSL expression. Instance + // IDs are either a legacy 8-char hex string or a full canonical UUID (#129); + // DSL expressions contain `df.` calls and punctuation that never parse as + // either, so this stays unambiguous. + let is_legacy_id = trimmed.len() == 8 && trimmed.chars().all(|c| c.is_ascii_hexdigit()); + // Require the canonical 36-char hyphenated form. uuid::Uuid::parse_str also + // accepts simple (32-char), braced, and URN spellings; restricting to + // length 36 keeps a malformed id on the DSL-parse path (clear error) rather + // than a silent instance-lookup miss. df.start() only ever emits this form. + let is_canonical_uuid = trimmed.len() == 36 && uuid::Uuid::parse_str(trimmed).is_ok(); + let is_instance_id = is_legacy_id || is_canonical_uuid; if is_instance_id { explain_instance(trimmed) diff --git a/src/lib.rs b/src/lib.rs index 681b88d4..e44a87cd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -153,14 +153,17 @@ mod df {} extension_sql!( r#" -- Table to store function nodes (SQL steps, THEN chains, etc.) +-- IDs are full UUIDs (36-char canonical text) as of 0.2.3 (#129); the legacy +-- 8-character format remains accepted by the format CHECKs below so rows +-- created before an in-place upgrade stay valid. CREATE TABLE IF NOT EXISTS df.nodes ( - id VARCHAR(8) PRIMARY KEY, - instance_id VARCHAR(8), + id TEXT PRIMARY KEY, + instance_id TEXT, node_type TEXT NOT NULL, query TEXT, result_name TEXT, - left_node VARCHAR(8), - right_node VARCHAR(8), + left_node TEXT, + right_node TEXT, status TEXT DEFAULT 'pending', result JSONB, error TEXT, @@ -175,9 +178,9 @@ COMMENT ON COLUMN df.nodes.submitted_by IS -- Table to store function instances CREATE TABLE IF NOT EXISTS df.instances ( - id VARCHAR(8) PRIMARY KEY, + id TEXT PRIMARY KEY, label TEXT, - root_node VARCHAR(8) NOT NULL, + root_node TEXT NOT NULL, status TEXT DEFAULT 'pending', submitted_by REGROLE NOT NULL, database TEXT, @@ -221,9 +224,12 @@ ALTER TABLE df.instances -- depends on the session search_path -- closing the CVE-2018-1058 vector -- (a malicious schema shadowing `=`, `~`, etc.). Enforced by the pgspot CI -- gate (scripts/pgspot-gate.sh). - CHECK (id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, + -- The regex accepts a legacy 8-character id OR a full 36-char canonical + -- UUID (#129) so databases upgraded in place keep pre-existing short ids + -- valid alongside newly generated UUIDs. + CHECK (id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}(-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$') NOT VALID, ADD CONSTRAINT instances_root_node_format_chk - CHECK (root_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, + CHECK (root_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}(-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$') NOT VALID, ADD CONSTRAINT instances_status_chk CHECK (status OPERATOR(pg_catalog.=) ANY (ARRAY['pending', 'running', 'completed', 'failed', 'cancelled'])) NOT VALID, -- Supports the composite FK from df.nodes that ties node identity to the instance row. @@ -236,13 +242,13 @@ ALTER TABLE df.nodes ADD CONSTRAINT nodes_submitted_by_present_chk CHECK (submitted_by IS NOT NULL) NOT VALID, ADD CONSTRAINT nodes_id_format_chk - CHECK (id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, + CHECK (id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}(-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$') NOT VALID, ADD CONSTRAINT nodes_instance_id_format_chk - CHECK (instance_id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, + CHECK (instance_id OPERATOR(pg_catalog.~) '^[0-9a-f]{8}(-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$') NOT VALID, ADD CONSTRAINT nodes_left_node_format_chk - CHECK (left_node IS NULL OR left_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, + CHECK (left_node IS NULL OR left_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}(-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$') NOT VALID, ADD CONSTRAINT nodes_right_node_format_chk - CHECK (right_node IS NULL OR right_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}$') NOT VALID, + CHECK (right_node IS NULL OR right_node OPERATOR(pg_catalog.~) '^[0-9a-f]{8}(-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$') NOT VALID, ADD CONSTRAINT nodes_node_type_chk CHECK (node_type OPERATOR(pg_catalog.=) ANY (ARRAY['SQL', 'THEN', 'IF', 'JOIN', 'LOOP', 'BREAK', 'RACE', 'SLEEP', 'WAIT_SCHEDULE', 'HTTP', 'SIGNAL'])) NOT VALID, ADD CONSTRAINT nodes_result_name_chk @@ -1386,15 +1392,50 @@ mod tests { fn test_start_returns_instance_id() { let fut = crate::dsl::sql("SELECT 1"); let instance_id = crate::dsl::start(&fut, None, None); - assert_eq!(instance_id.len(), 8); - assert!(instance_id.chars().all(|c| c.is_ascii_hexdigit())); + assert_eq!(instance_id.len(), 36); + assert!(uuid::Uuid::parse_str(&instance_id).is_ok()); } #[pg_test] fn test_start_with_label() { let fut = crate::dsl::sql("SELECT 1"); let instance_id = crate::dsl::start(&fut, Some("my-test-function"), None); - assert_eq!(instance_id.len(), 8); + assert_eq!(instance_id.len(), 36); + assert!(uuid::Uuid::parse_str(&instance_id).is_ok()); + } + + #[pg_test] + fn test_start_node_ids_are_uuids() { + // #129: df.start() must generate full UUIDs for every node id and node + // reference, not only the returned instance id. A two-step sequence makes + // the root THEN node carry both a left_node and a right_node id. + let seq = crate::dsl::then_fn("SELECT 1", "SELECT 2"); + let instance_id = crate::dsl::start(&seq, Some("test-node-id-format"), None); + + let non_uuid: i64 = Spi::get_one(&format!( + "SELECT COUNT(*) FROM ( + SELECT unnest(ARRAY[id, instance_id, left_node, right_node]) AS v + FROM df.nodes WHERE instance_id = '{instance_id}' + ) s + WHERE v IS NOT NULL + AND v !~ '^[0-9a-f]{{8}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{4}}-[0-9a-f]{{12}}$'" + )) + .unwrap() + .unwrap_or(-1); + assert_eq!( + non_uuid, 0, + "All df.nodes ids and references for a new 0.2.3 instance should be full UUIDs" + ); + + let node_count: i64 = Spi::get_one(&format!( + "SELECT COUNT(*) FROM df.nodes WHERE instance_id = '{instance_id}'" + )) + .unwrap() + .unwrap_or(0); + assert!( + node_count >= 3, + "Expected a root THEN node plus two child nodes, got {node_count}" + ); } #[pg_test] @@ -1796,7 +1837,8 @@ mod tests { fn test_autowrap_start_plain_sql() { // Start with plain SQL - simplest possible durable function let instance_id = crate::dsl::start("SELECT 42", Some("autowrap-test"), None); - assert_eq!(instance_id.len(), 8); + assert_eq!(instance_id.len(), 36); + assert!(uuid::Uuid::parse_str(&instance_id).is_ok()); // Verify instance was created let count = Spi::get_one::(&format!( diff --git a/src/types.rs b/src/types.rs index 28aacbeb..efef692f 100644 --- a/src/types.rs +++ b/src/types.rs @@ -99,7 +99,24 @@ pub const MAX_GRAPH_DEPTH: usize = 256; /// unbounded INSERTs and memory exhaustion from extremely large graphs. pub const MAX_GRAPH_NODES: usize = 10_000; -/// Generate a short 8-character instance ID from a UUID +/// Generate a full canonical UUID v4 string (36 chars, lowercase hex with +/// hyphens), e.g. `2f1c4d8e-9a3b-4c7d-8e1f-0a1b2c3d4e5f`. +/// +/// This is the ID format used by df.instances.id and df.nodes.id on 0.2.3+ +/// schemas. A full UUID provides 122 bits of entropy, reducing the +/// birthday-paradox collision risk of the legacy 8-character IDs to a +/// negligible level (see #129). +pub fn full_uuid() -> String { + Uuid::new_v4().to_string() +} + +/// Generate a short 8-character instance ID from a UUID. +/// +/// Retained for backward compatibility: when the new `.so` runs against a +/// pre-0.2.3 schema (VARCHAR(8) id columns with an `^[0-9a-f]{8}$` CHECK) that +/// has not yet run `ALTER EXTENSION pg_durable UPDATE`, IDs must still be 8 +/// hex characters so INSERTs satisfy the older column type and constraints. +/// New 0.2.3+ schemas use [`full_uuid`] instead. pub fn short_id() -> String { let uuid = Uuid::new_v4(); uuid.to_string() diff --git a/tests/e2e/sql/45_connection_limit_timeout.sql b/tests/e2e/sql/45_connection_limit_timeout.sql index 79097f51..85beb24c 100644 --- a/tests/e2e/sql/45_connection_limit_timeout.sql +++ b/tests/e2e/sql/45_connection_limit_timeout.sql @@ -44,12 +44,20 @@ BEGIN RAISE EXCEPTION 'TEST FAILED: victim status = %, expected failed', victim_status; END IF; - -- Verify the error message mentions connection limit - SELECT output INTO victim_output - FROM df.instance_info(victim_id); + -- df.instances.status is flipped to 'failed' by the update-instance-status activity, + -- which commits *before* the orchestration returns its terminal error to duroxide. + -- df.instance_info().output is sourced from duroxide, so there is a brief window where + -- wait_for_completion() already reports 'failed' but the output column has not yet been + -- populated. Poll for the error message to materialize rather than reading it once. + FOR i IN 1..100 LOOP + SELECT output INTO victim_output + FROM df.instance_info(victim_id); + EXIT WHEN victim_output IS NOT NULL; + PERFORM pg_sleep(0.1); + END LOOP; IF victim_output IS NULL THEN - RAISE EXCEPTION 'TEST FAILED: victim output is NULL'; + RAISE EXCEPTION 'TEST FAILED: victim output is NULL (error message did not materialize within 10s of failure)'; END IF; IF victim_output NOT LIKE '%connection limit reached%' THEN