From f9157443f07a999b4526014a853760e4501c1c69 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 13 Mar 2026 05:41:07 +0000
Subject: [PATCH 1/3] Initial plan


From d0771c7dce49b41e29b0b9c8c13d1ac43f10c6f5 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 13 Mar 2026 06:11:19 +0000
Subject: [PATCH 2/3] Add 13 resilience E2E tests covering infinite loops,
 truthiness bugs, empty SQL, recursive start, crafted JSON, concurrency, and
 error edge cases

Co-authored-by: pinodeca <32303022+pinodeca@users.noreply.github.com>
---
 tests/e2e/sql/38_infinite_loop.sql         |  79 +++++++++++++
 tests/e2e/sql/39_truthiness_edge_cases.sql | 117 ++++++++++++++++++++
 tests/e2e/sql/40_empty_dml_results.sql     |  82 ++++++++++++++
 tests/e2e/sql/41_break_outside_loop.sql    |  35 ++++++
 tests/e2e/sql/42_recursive_start.sql       |  52 +++++++++
 tests/e2e/sql/43_empty_sql.sql             |  84 ++++++++++++++
 tests/e2e/sql/44_crafted_json.sql          | 122 +++++++++++++++++++++
 tests/e2e/sql/45_concurrent_instances.sql  |  64 +++++++++++
 tests/e2e/sql/46_deep_nesting.sql          |  34 ++++++
 tests/e2e/sql/47_rapid_cancel.sql          |  67 +++++++++++
 tests/e2e/sql/48_race_both_fail.sql        |  65 +++++++++++
 tests/e2e/sql/49_join_one_fails.sql        |  76 +++++++++++++
 tests/e2e/sql/50_signal_edge_cases.sql     |  93 ++++++++++++++++
 13 files changed, 970 insertions(+)
 create mode 100644 tests/e2e/sql/38_infinite_loop.sql
 create mode 100644 tests/e2e/sql/39_truthiness_edge_cases.sql
 create mode 100644 tests/e2e/sql/40_empty_dml_results.sql
 create mode 100644 tests/e2e/sql/41_break_outside_loop.sql
 create mode 100644 tests/e2e/sql/42_recursive_start.sql
 create mode 100644 tests/e2e/sql/43_empty_sql.sql
 create mode 100644 tests/e2e/sql/44_crafted_json.sql
 create mode 100644 tests/e2e/sql/45_concurrent_instances.sql
 create mode 100644 tests/e2e/sql/46_deep_nesting.sql
 create mode 100644 tests/e2e/sql/47_rapid_cancel.sql
 create mode 100644 tests/e2e/sql/48_race_both_fail.sql
 create mode 100644 tests/e2e/sql/49_join_one_fails.sql
 create mode 100644 tests/e2e/sql/50_signal_edge_cases.sql

diff --git a/tests/e2e/sql/38_infinite_loop.sql b/tests/e2e/sql/38_infinite_loop.sql
new file mode 100644
index 00000000..5f12dfcc
--- /dev/null
+++ b/tests/e2e/sql/38_infinite_loop.sql
@@ -0,0 +1,79 @@
+-- Test: Infinite loop cancellation (B1 / B2)
+-- Demonstrates: df.loop() with always-true condition and unconditional loop
+-- Expected:
+--   - Loops run indefinitely; df.cancel() successfully stops them
+--   - Instance ends in canceled/failed state, not stuck in running
+
+DROP TABLE IF EXISTS test_infinite_log;
+CREATE TABLE test_infinite_log (id SERIAL, variant TEXT, ts TIMESTAMP DEFAULT now());
+
+CREATE TEMP TABLE _inf_state (instance_id TEXT, variant TEXT);
+
+-- B1: always-true while-condition loop
+INSERT INTO _inf_state
+SELECT df.start(
+    df.loop(
+        'INSERT INTO test_infinite_log (variant) VALUES (''while_true'')',
+        'SELECT true'   -- condition never becomes false
+    ),
+    'test-infinite-while-true'
+), 'while_true';
+
+-- B2: unconditional loop (no condition argument)
+INSERT INTO _inf_state
+SELECT df.start(
+    df.loop(
+        'INSERT INTO test_infinite_log (variant) VALUES (''unconditional'')'
+    ),
+    'test-infinite-unconditional'
+), 'unconditional';
+
+DO $$
+DECLARE
+    rec RECORD;
+    cnt INT;
+    status TEXT;
+    attempts INT;
+BEGIN
+    FOR rec IN SELECT instance_id, variant FROM _inf_state LOOP
+        RAISE NOTICE 'Testing infinite loop [%]: %', rec.variant, rec.instance_id;
+
+        -- Wait for at least 2 iterations to prove the loop is actually running
+        attempts := 0;
+        LOOP
+            SELECT COUNT(*) INTO cnt FROM test_infinite_log WHERE variant = rec.variant;
+            EXIT WHEN cnt >= 2 OR attempts > 200;
+            PERFORM pg_sleep(0.1);
+            attempts := attempts + 1;
+        END LOOP;
+
+        IF cnt < 2 THEN
+            RAISE EXCEPTION 'TEST FAILED [%]: expected >= 2 iterations before cancel, got %',
+                rec.variant, cnt;
+        END IF;
+
+        -- Cancel the running loop
+        PERFORM df.cancel(rec.instance_id, 'test-cancel');
+
+        -- Wait for cancellation to take effect
+        attempts := 0;
+        LOOP
+            SELECT s INTO status FROM df.status(rec.instance_id) s;
+            EXIT WHEN lower(status) IN ('canceled', 'cancelled', 'failed') OR attempts > 100;
+            PERFORM pg_sleep(0.2);
+            attempts := attempts + 1;
+        END LOOP;
+
+        IF lower(status) NOT IN ('canceled', 'cancelled', 'failed') THEN
+            RAISE EXCEPTION 'TEST FAILED [%]: expected canceled/failed after cancel, got %',
+                rec.variant, status;
+        END IF;
+
+        RAISE NOTICE 'PASSED [%]: ran % iterations, then canceled (status=%)',
+            rec.variant, cnt, status;
+    END LOOP;
+END $$;
+
+DROP TABLE _inf_state;
+DROP TABLE test_infinite_log;
+SELECT 'TEST PASSED' AS result;
diff --git a/tests/e2e/sql/39_truthiness_edge_cases.sql b/tests/e2e/sql/39_truthiness_edge_cases.sql
new file mode 100644
index 00000000..64b79584
--- /dev/null
+++ b/tests/e2e/sql/39_truthiness_edge_cases.sql
@@ -0,0 +1,117 @@
+-- Test: Loop condition truthiness edge cases (B3)
+-- Demonstrates: evaluate_condition / is_truthy behavior for ambiguous values
+-- Expected: Documents and verifies the actual truthiness semantics for:
+--   NULL, integer 0, float 0.0, empty string, string "false", string "no",
+--   empty JSON array, empty JSON object
+
+-- Each sub-test starts a df.loop(body, condition) and checks whether the loop
+-- stops (condition is falsy) or runs at least 2 iterations before cancel
+-- (condition is truthy).
+
+DROP TABLE IF EXISTS test_truth_log;
+CREATE TABLE test_truth_log (id SERIAL, variant TEXT, ts TIMESTAMP DEFAULT now());
+
+-- Helper: run a loop with the given condition SQL, return 'truthy' or 'falsy'
+-- based on whether the loop keeps running (truthy) or stops on its own.
+CREATE OR REPLACE FUNCTION _run_truth_test(
+    p_variant TEXT,
+    p_condition_sql TEXT
+) RETURNS TEXT
+LANGUAGE plpgsql AS $$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+    cnt INT;
+    attempts INT := 0;
+BEGIN
+    inst_id := df.start(
+        df.loop(
+            format('INSERT INTO test_truth_log (variant) VALUES (%L)', p_variant),
+            p_condition_sql
+        ),
+        format('truth-%s', p_variant)
+    );
+
+    -- Wait up to 3s for the loop to either stop on its own or run 2 iterations
+    LOOP
+        SELECT s INTO status FROM df.status(inst_id) s;
+        SELECT COUNT(*) INTO cnt FROM test_truth_log WHERE variant = p_variant;
+        EXIT WHEN lower(status) IN ('completed', 'failed', 'canceled', 'cancelled')
+               OR cnt >= 2
+               OR attempts > 30;
+        PERFORM pg_sleep(0.1);
+        attempts := attempts + 1;
+    END LOOP;
+
+    IF lower(status) IN ('completed', 'failed', 'canceled', 'cancelled') THEN
+        -- Loop stopped by itself → condition was falsy
+        RETURN 'falsy';
+    ELSE
+        -- Loop kept running → condition is truthy; cancel it
+        PERFORM df.cancel(inst_id, 'truth-test-done');
+        -- Wait for cancel to land
+        attempts := 0;
+        LOOP
+            SELECT s INTO status FROM df.status(inst_id) s;
+            EXIT WHEN lower(status) IN ('completed', 'failed', 'canceled', 'cancelled')
+                   OR attempts > 50;
+            PERFORM pg_sleep(0.1);
+            attempts := attempts + 1;
+        END LOOP;
+        RETURN 'truthy';
+    END IF;
+END $$;
+
+-- NOTE on known behavior quirks:
+-- String "false" and "no" are treated as TRUTHY by is_truthy() because they are
+-- non-empty strings that don't parse as integers. A user writing
+-- `df.loop(..., 'SELECT ''false''')` may expect the loop to stop but it will not.
+-- The correct way to return a falsy condition is `SELECT false` (boolean) or `SELECT 0`.
+
+DO $$
+DECLARE
+    -- Each entry: (variant, condition_sql, expected_actual_result)
+    -- expected values reflect the CURRENT implementation behavior.
+    -- Entries marked with [KNOWN QUIRK] behave differently than users may expect.
+    cases TEXT[][] := ARRAY[
+        ARRAY['null_val',    'SELECT NULL',          'falsy'],
+        ARRAY['int_zero',    'SELECT 0',             'falsy'],
+        ARRAY['int_one',     'SELECT 1',             'truthy'],
+        ARRAY['bool_false',  'SELECT false',         'falsy'],
+        ARRAY['bool_true',   'SELECT true',          'truthy'],
+        -- [KNOWN QUIRK] Non-empty strings that are not "true"/"t"/"yes"/"1" and
+        -- not parseable as non-zero integers fall through to !s.is_empty() = true.
+        ARRAY['str_false',   'SELECT ''false''',     'truthy'],
+        ARRAY['str_no',      'SELECT ''no''',        'truthy'],
+        ARRAY['empty_str',   'SELECT ''''',          'falsy'],
+        ARRAY['float_zero',  'SELECT 0.0',           'falsy'],
+        ARRAY['empty_array', 'SELECT ''[]''::jsonb', 'falsy'],
+        ARRAY['empty_obj',   'SELECT ''{}''::jsonb', 'falsy']
+    ];
+    rec TEXT[];
+    got TEXT;
+    expected TEXT;
+    failures INT := 0;
+BEGIN
+    FOREACH rec SLICE 1 IN ARRAY cases LOOP
+        got := _run_truth_test(rec[1], rec[2]);
+        expected := rec[3];
+        RAISE NOTICE 'Truthiness [%]: condition=% → %', rec[1], rec[2], got;
+        IF got != expected THEN
+            RAISE WARNING 'REGRESSION [%]: got % expected %', rec[1], got, expected;
+            failures := failures + 1;
+        END IF;
+    END LOOP;
+
+    -- Emit a clear notice about the known quirks so they are visible in test output
+    RAISE NOTICE 'KNOWN QUIRK: SELECT ''false'' and SELECT ''no'' are truthy in loop conditions. '
+        'Use SELECT false (boolean) or SELECT 0 to stop a loop.';
+
+    IF failures > 0 THEN
+        RAISE EXCEPTION 'TEST FAILED: % truthiness regression(s) — see WARNINGs above', failures;
+    END IF;
+END $$;
+
+DROP FUNCTION _run_truth_test(TEXT, TEXT);
+DROP TABLE test_truth_log;
+SELECT 'TEST PASSED' AS result;
diff --git a/tests/e2e/sql/40_empty_dml_results.sql b/tests/e2e/sql/40_empty_dml_results.sql
new file mode 100644
index 00000000..d0dabe2f
--- /dev/null
+++ b/tests/e2e/sql/40_empty_dml_results.sql
@@ -0,0 +1,82 @@
+-- Test: SQL nodes returning 0 rows or DML without RETURNING (B5 / B6)
+-- Demonstrates: How empty result sets and DML results flow through |=> and $var
+-- Expected: Both patterns complete successfully; documents the JSON result shape
+
+DROP TABLE IF EXISTS test_dml_target;
+CREATE TABLE test_dml_target (id SERIAL, val TEXT);
+
+-- ============================================================================
+-- B5: SQL node that returns 0 rows, result used in next node
+-- ============================================================================
+CREATE TEMP TABLE _b5_state AS
+SELECT df.start(
+    'SELECT 1 WHERE false' |=> 'empty_result'
+    ~> 'SELECT $empty_result',   -- uses the empty result JSON
+    'test-empty-result'
+) AS instance_id;
+
+DO $$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+    res TEXT;
+BEGIN
+    SELECT instance_id INTO inst_id FROM _b5_state;
+    SELECT df.wait_for_completion(inst_id, 30) INTO status;
+
+    IF status != 'completed' THEN
+        RAISE EXCEPTION 'TEST FAILED [B5]: expected Completed, got %', status;
+    END IF;
+
+    SELECT r INTO res FROM df.result(inst_id) r;
+    RAISE NOTICE 'B5 result (empty result passed as $var): %', res;
+    RAISE NOTICE 'PASSED [B5]: zero-row SQL result flows through |=> correctly';
+END $$;
+
+DROP TABLE _b5_state;
+
+-- ============================================================================
+-- B6: DML node without RETURNING, result used in next node
+-- ============================================================================
+INSERT INTO test_dml_target (val) VALUES ('initial');
+
+CREATE TEMP TABLE _b6_state AS
+SELECT df.start(
+    'UPDATE test_dml_target SET val = ''updated''' |=> 'update_result'
+    ~> 'SELECT $update_result',  -- uses the DML result JSON (0 rows, row_count > 0)
+    'test-dml-result'
+) AS instance_id;
+
+DO $$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+    res TEXT;
+    updated_val TEXT;
+BEGIN
+    SELECT instance_id INTO inst_id FROM _b6_state;
+    SELECT df.wait_for_completion(inst_id, 30) INTO status;
+
+    IF status != 'completed' THEN
+        RAISE EXCEPTION 'TEST FAILED [B6]: expected Completed, got %', status;
+    END IF;
+
+    SELECT r INTO res FROM df.result(inst_id) r;
+    RAISE NOTICE 'B6 result (DML result passed as $var): %', res;
+
+    -- Verify the DML actually ran
+    SELECT val INTO updated_val FROM test_dml_target LIMIT 1;
+    IF updated_val != 'updated' THEN
+        RAISE EXCEPTION 'TEST FAILED [B6]: DML did not execute, val = %', updated_val;
+    END IF;
+
+    RAISE NOTICE 'PASSED [B6]: DML result flows through |=> correctly';
+END $$;
+
+DROP TABLE _b6_state;
+
+-- ============================================================================
+-- Cleanup
+-- ============================================================================
+DROP TABLE test_dml_target;
+SELECT 'TEST PASSED' AS result;
diff --git a/tests/e2e/sql/41_break_outside_loop.sql b/tests/e2e/sql/41_break_outside_loop.sql
new file mode 100644
index 00000000..d1223bf4
--- /dev/null
+++ b/tests/e2e/sql/41_break_outside_loop.sql
@@ -0,0 +1,35 @@
+-- Test: df.break() used at the top level outside any loop (B10)
+-- Demonstrates: Break sentinel propagated as final instance result
+-- Expected: Instance completes (the break sentinel becomes the result),
+--           does NOT hang or crash.
+
+CREATE TEMP TABLE _b10_state AS
+SELECT df.start(
+    df.break('{"reason": "top-level-break"}'),
+    'test-break-outside-loop'
+) AS instance_id;
+
+DO $$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+    res TEXT;
+BEGIN
+    SELECT instance_id INTO inst_id FROM _b10_state;
+
+    -- A top-level break has no enclosing loop to consume it, so the break
+    -- sentinel propagates as the final result.  The instance should complete
+    -- rather than hang or fail with an error.
+    SELECT df.wait_for_completion(inst_id, 30) INTO status;
+
+    IF status != 'completed' THEN
+        RAISE EXCEPTION 'TEST FAILED [B10]: expected Completed for top-level break, got %', status;
+    END IF;
+
+    SELECT r INTO res FROM df.result(inst_id) r;
+    RAISE NOTICE 'B10 result (top-level break value): %', res;
+    RAISE NOTICE 'PASSED [B10]: df.break() at top level completes gracefully';
+END $$;
+
+DROP TABLE _b10_state;
+SELECT 'TEST PASSED' AS result;
diff --git a/tests/e2e/sql/42_recursive_start.sql b/tests/e2e/sql/42_recursive_start.sql
new file mode 100644
index 00000000..0d6fb682
--- /dev/null
+++ b/tests/e2e/sql/42_recursive_start.sql
@@ -0,0 +1,52 @@
+-- Test: Calling df.start() from inside a workflow SQL node (B11)
+-- Demonstrates: df.start() is not guarded by is_in_workflow_context().
+--               A SQL node can spawn child instances, which the background
+--               worker picks up independently.
+-- Expected: Outer instance completes; child instance is created and completes.
+
+DROP TABLE IF EXISTS test_recursive_log;
+CREATE TABLE test_recursive_log (id SERIAL, spawned_id TEXT, ts TIMESTAMP DEFAULT now());
+
+CREATE TEMP TABLE _b11_outer AS
+SELECT df.start(
+    -- This SQL node calls df.start() to spawn a child instance and records the ID.
+    'INSERT INTO test_recursive_log (spawned_id)
+     SELECT df.start(df.sql(''SELECT 1''), ''child-from-workflow'')',
+    'test-recursive-start-outer'
+) AS instance_id;
+
+DO $$
+DECLARE
+    outer_id TEXT;
+    child_id TEXT;
+    status TEXT;
+BEGIN
+    SELECT instance_id INTO outer_id FROM _b11_outer;
+    RAISE NOTICE 'Outer instance: %', outer_id;
+
+    -- Wait for the outer instance to complete
+    SELECT df.wait_for_completion(outer_id, 30) INTO status;
+    IF status != 'completed' THEN
+        RAISE EXCEPTION 'TEST FAILED [B11]: outer instance expected Completed, got %', status;
+    END IF;
+
+    -- Verify that a child instance was spawned
+    SELECT spawned_id INTO child_id FROM test_recursive_log LIMIT 1;
+    IF child_id IS NULL THEN
+        RAISE EXCEPTION 'TEST FAILED [B11]: expected a child instance to be spawned';
+    END IF;
+    RAISE NOTICE 'Child instance spawned: %', child_id;
+
+    -- Wait for the child instance to complete
+    SELECT df.wait_for_completion(child_id, 30) INTO status;
+    IF status != 'completed' THEN
+        RAISE EXCEPTION 'TEST FAILED [B11]: child instance expected Completed, got %', status;
+    END IF;
+
+    RAISE NOTICE 'PASSED [B11]: df.start() inside a workflow spawns a running child instance';
+    RAISE NOTICE 'NOTE: No recursion guard exists — unbounded spawning is possible if used carelessly';
+END $$;
+
+DROP TABLE _b11_outer;
+DROP TABLE test_recursive_log;
+SELECT 'TEST PASSED' AS result;
diff --git a/tests/e2e/sql/43_empty_sql.sql b/tests/e2e/sql/43_empty_sql.sql
new file mode 100644
index 00000000..01dd87b2
--- /dev/null
+++ b/tests/e2e/sql/43_empty_sql.sql
@@ -0,0 +1,84 @@
+-- Test: Empty and whitespace-only SQL strings (C1)
+-- Demonstrates: df.sql('') and df.sql('   ') pass DSL validation but fail at execution
+-- Expected: df.start() succeeds (validation doesn't reject empty queries),
+--           but the instance transitions to Failed when worker executes the empty query.
+
+-- ============================================================================
+-- C1a: Empty string SQL
+-- ============================================================================
+CREATE TEMP TABLE _c1a_state AS
+SELECT df.start(df.sql(''), 'test-empty-sql') AS instance_id;
+
+DO $$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+BEGIN
+    SELECT instance_id INTO inst_id FROM _c1a_state;
+    RAISE NOTICE 'C1a: Testing empty SQL, instance: %', inst_id;
+
+    -- Empty query will fail at execution time (PostgreSQL rejects empty statement)
+    SELECT df.wait_for_completion(inst_id, 30) INTO status;
+
+    IF lower(status) NOT IN ('failed', 'completed') THEN
+        RAISE EXCEPTION 'TEST FAILED [C1a]: expected Failed or Completed for empty SQL, got %', status;
+    END IF;
+
+    RAISE NOTICE 'C1a: empty SQL result status = % (expected Failed)', status;
+    RAISE NOTICE 'PASSED [C1a]: empty SQL handled gracefully (no crash)';
+END $$;
+
+DROP TABLE _c1a_state;
+
+-- ============================================================================
+-- C1b: Whitespace-only SQL
+-- ============================================================================
+CREATE TEMP TABLE _c1b_state AS
+SELECT df.start(df.sql('   '), 'test-whitespace-sql') AS instance_id;
+
+DO $$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+BEGIN
+    SELECT instance_id INTO inst_id FROM _c1b_state;
+    RAISE NOTICE 'C1b: Testing whitespace SQL, instance: %', inst_id;
+
+    SELECT df.wait_for_completion(inst_id, 30) INTO status;
+
+    IF lower(status) NOT IN ('failed', 'completed') THEN
+        RAISE EXCEPTION 'TEST FAILED [C1b]: expected Failed or Completed for whitespace SQL, got %', status;
+    END IF;
+
+    RAISE NOTICE 'C1b: whitespace SQL result status = % (expected Failed)', status;
+    RAISE NOTICE 'PASSED [C1b]: whitespace SQL handled gracefully (no crash)';
+END $$;
+
+DROP TABLE _c1b_state;
+
+-- ============================================================================
+-- C1c: Non-SQL text
+-- ============================================================================
+CREATE TEMP TABLE _c1c_state AS
+SELECT df.start(df.sql('this is not valid sql at all'), 'test-nonsql') AS instance_id;
+
+DO $$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+BEGIN
+    SELECT instance_id INTO inst_id FROM _c1c_state;
+    RAISE NOTICE 'C1c: Testing non-SQL text, instance: %', inst_id;
+
+    SELECT df.wait_for_completion(inst_id, 30) INTO status;
+
+    IF lower(status) NOT IN ('failed', 'completed') THEN
+        RAISE EXCEPTION 'TEST FAILED [C1c]: expected Failed for non-SQL text, got %', status;
+    END IF;
+
+    RAISE NOTICE 'C1c: non-SQL text result status = % (expected Failed)', status;
+    RAISE NOTICE 'PASSED [C1c]: non-SQL text handled gracefully (no crash)';
+END $$;
+
+DROP TABLE _c1c_state;
+SELECT 'TEST PASSED' AS result;
diff --git a/tests/e2e/sql/44_crafted_json.sql b/tests/e2e/sql/44_crafted_json.sql
new file mode 100644
index 00000000..64f0fc86
--- /dev/null
+++ b/tests/e2e/sql/44_crafted_json.sql
@@ -0,0 +1,122 @@
+-- Test: Manually crafted JSON inputs bypassing the DSL (C7)
+-- Extends tests 32 (invalid node_type) and 33 (malformed condition_node)
+-- Demonstrates: Additional raw JSON edge cases and unknown-field handling
+
+-- ============================================================================
+-- C7a: Valid node type, unknown extra field (should be ignored or rejected)
+-- ============================================================================
+DO $body$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+BEGIN
+    BEGIN
+        inst_id := df.start('{"node_type":"SQL","query":"SELECT 1","evil_field":"pwned"}');
+        -- Unknown fields may be silently ignored by serde; instance might complete
+        RAISE NOTICE 'C7a: df.start accepted unknown field (serde ignores unknowns)';
+        SELECT df.wait_for_completion(inst_id, 30) INTO status;
+        RAISE NOTICE 'C7a: status = %', status;
+    EXCEPTION WHEN OTHERS THEN
+        RAISE NOTICE 'C7a: df.start rejected unknown field: %', SQLERRM;
+    END;
+END $body$;
+
+-- ============================================================================
+-- C7b: THEN node with non-object left_node (string instead of object)
+-- ============================================================================
+DO $body$
+BEGIN
+    BEGIN
+        PERFORM df.start('{"node_type":"THEN","left_node":"not an object","right_node":{"node_type":"SQL","query":"SELECT 2"}}');
+        RAISE EXCEPTION 'TEST FAILED [C7b]: df.start should have rejected non-object left_node';
+    EXCEPTION WHEN OTHERS THEN
+        RAISE NOTICE 'C7b: Caught expected error for non-object left_node: %', SQLERRM;
+    END;
+END $body$;
+
+-- ============================================================================
+-- C7c: THEN node with null left_node (accepted by serde, may fail at runtime)
+-- ============================================================================
+DO $body$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+BEGIN
+    BEGIN
+        inst_id := df.start('{"node_type":"THEN","left_node":null,"right_node":{"node_type":"SQL","query":"SELECT 2"}}');
+        -- serde accepts null as Option<Durofut> = None; df.start() may succeed.
+        -- The instance may fail at runtime when the orchestration finds no left node.
+        RAISE NOTICE 'C7c: df.start accepted null left_node (serde treats null as None)';
+        SELECT df.wait_for_completion(inst_id, 30) INTO status;
+        RAISE NOTICE 'C7c: null left_node instance status = %', status;
+    EXCEPTION WHEN OTHERS THEN
+        RAISE NOTICE 'C7c: df.start rejected null left_node: %', SQLERRM;
+    END;
+END $body$;
+
+-- ============================================================================
+-- C7d: SQL node with null query (accepted by serde, fails at execution time)
+-- ============================================================================
+DO $body$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+BEGIN
+    BEGIN
+        inst_id := df.start('{"node_type":"SQL","query":null}');
+        -- null is accepted by serde as Option<String> = None; node is inserted with NULL query.
+        -- The orchestration will error with "SQL node X has no query".
+        RAISE NOTICE 'C7d: df.start accepted null query (inserted with NULL query column)';
+        SELECT df.wait_for_completion(inst_id, 30) INTO status;
+        IF lower(status) NOT IN ('failed', 'completed') THEN
+            RAISE EXCEPTION 'TEST FAILED [C7d]: expected Failed for null query, got %', status;
+        END IF;
+        RAISE NOTICE 'C7d: null query instance status = % (expected Failed)', status;
+    EXCEPTION WHEN OTHERS THEN
+        RAISE NOTICE 'C7d: df.start rejected null query: %', SQLERRM;
+    END;
+END $body$;
+
+-- ============================================================================
+-- C7e: LOOP node with left_node missing (no body)
+-- ============================================================================
+DO $body$
+BEGIN
+    BEGIN
+        PERFORM df.start('{"node_type":"LOOP"}');
+        RAISE EXCEPTION 'TEST FAILED [C7e]: df.start should have rejected LOOP without body';
+    EXCEPTION WHEN OTHERS THEN
+        RAISE NOTICE 'C7e: Caught expected error for LOOP without body: %', SQLERRM;
+    END;
+END $body$;
+
+-- ============================================================================
+-- C7f: Completely empty JSON object
+-- ============================================================================
+DO $body$
+BEGIN
+    BEGIN
+        PERFORM df.start('{}');
+        RAISE EXCEPTION 'TEST FAILED [C7f]: df.start should have rejected empty JSON object';
+    EXCEPTION WHEN OTHERS THEN
+        RAISE NOTICE 'C7f: Caught expected error for empty JSON: %', SQLERRM;
+    END;
+END $body$;
+
+-- ============================================================================
+-- C7g: Plain string (auto-wrapped as SQL node) — should succeed
+-- ============================================================================
+DO $body$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+BEGIN
+    inst_id := df.start('SELECT 1', 'test-plain-string-c7g');
+    SELECT df.wait_for_completion(inst_id, 30) INTO status;
+    IF status != 'completed' THEN
+        RAISE EXCEPTION 'TEST FAILED [C7g]: plain string auto-wrap expected Completed, got %', status;
+    END IF;
+    RAISE NOTICE 'C7g: plain string auto-wrapped as SQL node and completed successfully';
+END $body$;
+
+SELECT 'TEST PASSED' AS result;
diff --git a/tests/e2e/sql/45_concurrent_instances.sql b/tests/e2e/sql/45_concurrent_instances.sql
new file mode 100644
index 00000000..161eeb96
--- /dev/null
+++ b/tests/e2e/sql/45_concurrent_instances.sql
@@ -0,0 +1,64 @@
+-- Test: Many concurrent instances (A1)
+-- Demonstrates: Background worker handles a burst of simultaneous instances
+-- Expected: All 20 instances complete within 60 seconds; none stuck in pending/running
+
+DROP TABLE IF EXISTS test_burst_instances;
+CREATE TABLE test_burst_instances (id SERIAL, instance_id TEXT);
+
+DO $$
+DECLARE
+    i INT;
+    inst_id TEXT;
+    total INT := 20;
+BEGIN
+    FOR i IN 1..total LOOP
+        inst_id := df.start(df.sql('SELECT 1'), 'burst-' || i);
+        INSERT INTO test_burst_instances (instance_id) VALUES (inst_id);
+    END LOOP;
+    RAISE NOTICE 'Started % instances', total;
+END $$;
+
+-- Wait for all burst instances to complete
+DO $$
+DECLARE
+    completed_count INT;
+    attempts INT := 0;
+    total INT := 20;
+BEGIN
+    LOOP
+        SELECT COUNT(*) INTO completed_count
+        FROM test_burst_instances b
+        JOIN df.instances i ON i.id = b.instance_id
+        WHERE lower(i.status) IN ('completed', 'failed', 'canceled');
+
+        EXIT WHEN completed_count >= total OR attempts > 600;  -- 60s timeout
+        PERFORM pg_sleep(0.1);
+        attempts := attempts + 1;
+    END LOOP;
+
+    IF completed_count < total THEN
+        RAISE EXCEPTION 'TEST FAILED [A1]: only %/% instances completed within timeout', completed_count, total;
+    END IF;
+
+    RAISE NOTICE 'PASSED [A1]: all % concurrent instances completed', total;
+END $$;
+
+-- Verify no instances are stuck in pending or running
+DO $$
+DECLARE
+    stuck_count INT;
+BEGIN
+    SELECT COUNT(*) INTO stuck_count
+    FROM test_burst_instances b
+    JOIN df.instances i ON i.id = b.instance_id
+    WHERE lower(i.status) IN ('pending', 'running');
+
+    IF stuck_count > 0 THEN
+        RAISE EXCEPTION 'TEST FAILED [A1]: % instances stuck in pending/running', stuck_count;
+    END IF;
+
+    RAISE NOTICE 'PASSED [A1]: no instances stuck after burst';
+END $$;
+
+DROP TABLE test_burst_instances;
+SELECT 'TEST PASSED' AS result;
diff --git a/tests/e2e/sql/46_deep_nesting.sql b/tests/e2e/sql/46_deep_nesting.sql
new file mode 100644
index 00000000..7b96114f
--- /dev/null
+++ b/tests/e2e/sql/46_deep_nesting.sql
@@ -0,0 +1,34 @@
+-- Test: Deep graph nesting — 50-level sequential chain (A2)
+-- Demonstrates: execute_function_node_with_vars handles deeply nested THEN nodes
+--               without stack overflow.
+-- Expected: Instance completes successfully.
+
+-- Build a 50-step sequential chain using a DO block, then start it
+DO $$
+DECLARE
+    chain TEXT;
+    i INT;
+    inst_id TEXT;
+    status TEXT;
+BEGIN
+    -- Start with a single SQL node
+    chain := df.sql('SELECT 1');
+
+    -- Append 49 more steps: total depth = 50 nested THEN nodes
+    FOR i IN 2..50 LOOP
+        chain := df.seq(chain, format('SELECT %s', i));
+    END LOOP;
+
+    inst_id := df.start(chain, 'test-deep-nesting-50');
+    RAISE NOTICE 'Deep nesting test started: %', inst_id;
+
+    SELECT df.wait_for_completion(inst_id, 60) INTO status;
+
+    IF status != 'completed' THEN
+        RAISE EXCEPTION 'TEST FAILED [A2]: 50-level deep chain expected Completed, got %', status;
+    END IF;
+
+    RAISE NOTICE 'PASSED [A2]: 50-level sequential chain completed successfully';
+END $$;
+
+SELECT 'TEST PASSED' AS result;
diff --git a/tests/e2e/sql/47_rapid_cancel.sql b/tests/e2e/sql/47_rapid_cancel.sql
new file mode 100644
index 00000000..17b431d4
--- /dev/null
+++ b/tests/e2e/sql/47_rapid_cancel.sql
@@ -0,0 +1,67 @@
+-- Test: Rapid start/cancel cycles (A7)
+-- Demonstrates: Race between worker pickup and cancel signal;
+--               verifies no instances are stuck after repeated start+cancel.
+-- Expected: All instances end up in a terminal state (canceled, failed, or completed).
+
+DROP TABLE IF EXISTS test_rapid_cancel_instances;
+CREATE TABLE test_rapid_cancel_instances (id SERIAL, instance_id TEXT);
+
+DO $$
+DECLARE
+    i INT;
+    inst_id TEXT;
+    total INT := 20;
+BEGIN
+    FOR i IN 1..total LOOP
+        -- Start a slow instance, then immediately cancel it
+        inst_id := df.start(df.sleep(60), 'rapid-cancel-' || i);
+        INSERT INTO test_rapid_cancel_instances (instance_id) VALUES (inst_id);
+        PERFORM df.cancel(inst_id, 'rapid-cancel-test');
+    END LOOP;
+    RAISE NOTICE 'Started and canceled % instances', total;
+END $$;
+
+-- Wait for all to settle into a terminal state
+DO $$
+DECLARE
+    settled INT;
+    attempts INT := 0;
+    total INT := 20;
+BEGIN
+    LOOP
+        SELECT COUNT(*) INTO settled
+        FROM test_rapid_cancel_instances r
+        JOIN df.instances i ON i.id = r.instance_id
+        WHERE lower(i.status) IN ('completed', 'failed', 'canceled', 'cancelled');
+
+        EXIT WHEN settled >= total OR attempts > 300;  -- 30s timeout
+        PERFORM pg_sleep(0.1);
+        attempts := attempts + 1;
+    END LOOP;
+
+    IF settled < total THEN
+        RAISE EXCEPTION 'TEST FAILED [A7]: only %/% instances settled within timeout', settled, total;
+    END IF;
+
+    RAISE NOTICE 'PASSED [A7]: all % rapid-cancel instances settled', total;
+END $$;
+
+-- Verify no instances stuck
+DO $$
+DECLARE
+    stuck INT;
+BEGIN
+    SELECT COUNT(*) INTO stuck
+    FROM test_rapid_cancel_instances r
+    JOIN df.instances i ON i.id = r.instance_id
+    WHERE lower(i.status) IN ('pending', 'running');
+
+    IF stuck > 0 THEN
+        RAISE EXCEPTION 'TEST FAILED [A7]: % instances stuck after rapid cancel', stuck;
+    END IF;
+
+    RAISE NOTICE 'PASSED [A7]: no instances stuck after rapid start/cancel';
+END $$;
+
+DROP TABLE test_rapid_cancel_instances;
+SELECT 'TEST PASSED' AS result;
diff --git a/tests/e2e/sql/48_race_both_fail.sql b/tests/e2e/sql/48_race_both_fail.sql
new file mode 100644
index 00000000..cabb22fb
--- /dev/null
+++ b/tests/e2e/sql/48_race_both_fail.sql
@@ -0,0 +1,65 @@
+-- Test: RACE where both branches fail (B8)
+-- Demonstrates: ctx.select2 behavior when both branches of a RACE node error
+-- Expected: Instance transitions to Failed (not stuck in Running)
+
+-- ============================================================================
+-- B8a: df.race() function — both branches fail
+-- ============================================================================
+CREATE TEMP TABLE _b8a_state AS
+SELECT df.start(
+    df.race(
+        'SELECT 1/0',   -- division by zero
+        'SELECT 2/0'    -- division by zero
+    ),
+    'test-race-both-fail-func'
+) AS instance_id;
+
+DO $$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+BEGIN
+    SELECT instance_id INTO inst_id FROM _b8a_state;
+    RAISE NOTICE 'B8a: Testing race(both-fail) func: %', inst_id;
+
+    SELECT df.wait_for_completion(inst_id, 30) INTO status;
+
+    IF lower(status) NOT IN ('failed', 'completed') THEN
+        RAISE EXCEPTION 'TEST FAILED [B8a]: expected Failed for race(both-fail), got %', status;
+    END IF;
+
+    RAISE NOTICE 'B8a: race(both-fail) status = %', status;
+    RAISE NOTICE 'PASSED [B8a]: race with both branches failing is handled gracefully';
+END $$;
+
+DROP TABLE _b8a_state;
+
+-- ============================================================================
+-- B8b: | operator — both branches fail
+-- ============================================================================
+CREATE TEMP TABLE _b8b_state AS
+SELECT df.start(
+    'SELECT 1/0' | 'SELECT 2/0',
+    'test-race-both-fail-op'
+) AS instance_id;
+
+DO $$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+BEGIN
+    SELECT instance_id INTO inst_id FROM _b8b_state;
+    RAISE NOTICE 'B8b: Testing | operator both-fail: %', inst_id;
+
+    SELECT df.wait_for_completion(inst_id, 30) INTO status;
+
+    IF lower(status) NOT IN ('failed', 'completed') THEN
+        RAISE EXCEPTION 'TEST FAILED [B8b]: expected Failed for | both-fail, got %', status;
+    END IF;
+
+    RAISE NOTICE 'B8b: | both-fail status = %', status;
+    RAISE NOTICE 'PASSED [B8b]: | operator with both branches failing is handled gracefully';
+END $$;
+
+DROP TABLE _b8b_state;
+SELECT 'TEST PASSED' AS result;
diff --git a/tests/e2e/sql/49_join_one_fails.sql b/tests/e2e/sql/49_join_one_fails.sql
new file mode 100644
index 00000000..2ff83001
--- /dev/null
+++ b/tests/e2e/sql/49_join_one_fails.sql
@@ -0,0 +1,76 @@
+-- Test: JOIN where one branch fails (B9)
+-- Demonstrates: execute_join_node behavior when one branch errors
+-- Expected: Instance transitions to Failed (not stuck). The successful branch's
+--           side effects (if any committed DML) are already persisted.
+
+DROP TABLE IF EXISTS test_join_fail_log;
+CREATE TABLE test_join_fail_log (id SERIAL, branch TEXT, ts TIMESTAMP DEFAULT now());
+
+-- ============================================================================
+-- B9a: df.join() — left succeeds, right fails
+-- ============================================================================
+CREATE TEMP TABLE _b9a_state AS
+SELECT df.start(
+    df.join(
+        'INSERT INTO test_join_fail_log (branch) VALUES (''left'') RETURNING ''ok''',
+        'SELECT 1/0'   -- fails
+    ),
+    'test-join-one-fail-func'
+) AS instance_id;
+
+DO $$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+    left_ran INT;
+BEGIN
+    SELECT instance_id INTO inst_id FROM _b9a_state;
+    RAISE NOTICE 'B9a: Testing join(left-ok, right-fail): %', inst_id;
+
+    SELECT df.wait_for_completion(inst_id, 30) INTO status;
+
+    IF lower(status) NOT IN ('failed', 'completed') THEN
+        RAISE EXCEPTION 'TEST FAILED [B9a]: expected Failed for join(one-fail), got %', status;
+    END IF;
+
+    -- Check whether the left branch's DML committed (it may or may not depending on tx boundaries)
+    SELECT COUNT(*) INTO left_ran FROM test_join_fail_log WHERE branch = 'left';
+    RAISE NOTICE 'B9a: join(one-fail) status=%, left branch ran=% time(s)', status, left_ran;
+    RAISE NOTICE 'PASSED [B9a]: join with one failing branch handled gracefully';
+END $$;
+
+DROP TABLE _b9a_state;
+
+-- ============================================================================
+-- B9b: & operator — left fails, right succeeds
+-- ============================================================================
+CREATE TEMP TABLE _b9b_state AS
+SELECT df.start(
+    'SELECT 1/0'  -- fails
+    & 'INSERT INTO test_join_fail_log (branch) VALUES (''right'') RETURNING ''ok''',
+    'test-join-one-fail-op'
+) AS instance_id;
+
+DO $$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+    right_ran INT;
+BEGIN
+    SELECT instance_id INTO inst_id FROM _b9b_state;
+    RAISE NOTICE 'B9b: Testing & operator (left-fail, right-ok): %', inst_id;
+
+    SELECT df.wait_for_completion(inst_id, 30) INTO status;
+
+    IF lower(status) NOT IN ('failed', 'completed') THEN
+        RAISE EXCEPTION 'TEST FAILED [B9b]: expected Failed for & (one-fail), got %', status;
+    END IF;
+
+    SELECT COUNT(*) INTO right_ran FROM test_join_fail_log WHERE branch = 'right';
+    RAISE NOTICE 'B9b: & (one-fail) status=%, right branch ran=% time(s)', status, right_ran;
+    RAISE NOTICE 'PASSED [B9b]: & operator with one failing branch handled gracefully';
+END $$;
+
+DROP TABLE _b9b_state;
+DROP TABLE test_join_fail_log;
+SELECT 'TEST PASSED' AS result;
diff --git a/tests/e2e/sql/50_signal_edge_cases.sql b/tests/e2e/sql/50_signal_edge_cases.sql
new file mode 100644
index 00000000..9ecd5e5b
--- /dev/null
+++ b/tests/e2e/sql/50_signal_edge_cases.sql
@@ -0,0 +1,93 @@
+-- Test: Signal edge cases (B12 / B13)
+-- B12: Signal to non-existent or already-completed instance
+-- B13: Multiple signals with the same name sent to one instance
+
+-- ============================================================================
+-- B12a: Signal to a garbage/non-existent instance ID
+-- ============================================================================
+DO $$
+BEGIN
+    BEGIN
+        PERFORM df.signal('nonexistentid', 'approve', '{}');
+        -- Sending to a non-existent ID may silently succeed (no row to update)
+        -- or raise an error — document the actual behavior
+        RAISE NOTICE 'B12a: df.signal to non-existent ID did not raise an error';
+    EXCEPTION WHEN OTHERS THEN
+        RAISE NOTICE 'B12a: df.signal to non-existent ID raised: %', SQLERRM;
+    END;
+END $$;
+
+-- ============================================================================
+-- B12b: Signal to an already-completed instance
+-- ============================================================================
+DO $$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+BEGIN
+    -- Start and complete a trivial instance
+    inst_id := df.start('SELECT 1', 'test-signal-after-complete');
+    SELECT df.wait_for_completion(inst_id, 30) INTO status;
+
+    IF status != 'completed' THEN
+        RAISE EXCEPTION 'TEST SETUP FAILED [B12b]: instance did not complete, got %', status;
+    END IF;
+
+    -- Now try to signal the completed instance
+    BEGIN
+        PERFORM df.signal(inst_id, 'too-late', '{"note": "already done"}');
+        RAISE NOTICE 'B12b: df.signal to completed instance did not raise an error';
+    EXCEPTION WHEN OTHERS THEN
+        RAISE NOTICE 'B12b: df.signal to completed instance raised: %', SQLERRM;
+    END;
+END $$;
+
+-- ============================================================================
+-- B13: Multiple signals with the same name to the same waiting instance
+-- ============================================================================
+CREATE TEMP TABLE _b13_state AS
+SELECT df.start(
+    df.wait_for_signal('multi-signal') |=> 'sig'
+    ~> 'SELECT $sig',
+    'test-multi-signal'
+) AS instance_id;
+
+-- Wait for the instance to reach the waiting-for-signal state
+SELECT pg_sleep(2);
+
+DO $$
+DECLARE
+    inst_id TEXT;
+    status TEXT;
+BEGIN
+    SELECT instance_id INTO inst_id FROM _b13_state;
+    RAISE NOTICE 'B13: instance waiting for signal: %', inst_id;
+
+    -- Send the signal twice
+    BEGIN
+        PERFORM df.signal(inst_id, 'multi-signal', '{"delivery": 1}');
+        RAISE NOTICE 'B13: first signal sent';
+    EXCEPTION WHEN OTHERS THEN
+        RAISE NOTICE 'B13: first signal error: %', SQLERRM;
+    END;
+
+    BEGIN
+        PERFORM df.signal(inst_id, 'multi-signal', '{"delivery": 2}');
+        RAISE NOTICE 'B13: second signal sent';
+    EXCEPTION WHEN OTHERS THEN
+        RAISE NOTICE 'B13: second signal error: %', SQLERRM;
+    END;
+
+    -- Wait for instance to complete
+    SELECT df.wait_for_completion(inst_id, 30) INTO status;
+
+    IF lower(status) NOT IN ('completed', 'failed') THEN
+        RAISE EXCEPTION 'TEST FAILED [B13]: expected Completed or Failed, got %', status;
+    END IF;
+
+    RAISE NOTICE 'B13: multiple signals result status = %', status;
+    RAISE NOTICE 'PASSED [B13]: duplicate signal handled without crash';
+END $$;
+
+DROP TABLE _b13_state;
+SELECT 'TEST PASSED' AS result;

From bb6a52c289460f09c26ef9aa7102566982a91a9e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 13 Mar 2026 06:13:10 +0000
Subject: [PATCH 3/3] Fix timeout handling in truthiness test: increase to 10s,
 handle fallthrough case

Co-authored-by: pinodeca <32303022+pinodeca@users.noreply.github.com>
---
 tests/e2e/sql/39_truthiness_edge_cases.sql | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/e2e/sql/39_truthiness_edge_cases.sql b/tests/e2e/sql/39_truthiness_edge_cases.sql
index 64b79584..5c71d1f6 100644
--- a/tests/e2e/sql/39_truthiness_edge_cases.sql
+++ b/tests/e2e/sql/39_truthiness_edge_cases.sql
@@ -32,13 +32,13 @@ BEGIN
         format('truth-%s', p_variant)
     );
 
-    -- Wait up to 3s for the loop to either stop on its own or run 2 iterations
+    -- Wait up to 10s for the loop to either stop on its own or run 2 iterations
     LOOP
         SELECT s INTO status FROM df.status(inst_id) s;
         SELECT COUNT(*) INTO cnt FROM test_truth_log WHERE variant = p_variant;
         EXIT WHEN lower(status) IN ('completed', 'failed', 'canceled', 'cancelled')
                OR cnt >= 2
-               OR attempts > 30;
+               OR attempts > 100;
         PERFORM pg_sleep(0.1);
         attempts := attempts + 1;
     END LOOP;
@@ -46,8 +46,8 @@ BEGIN
     IF lower(status) IN ('completed', 'failed', 'canceled', 'cancelled') THEN
         -- Loop stopped by itself → condition was falsy
         RETURN 'falsy';
-    ELSE
-        -- Loop kept running → condition is truthy; cancel it
+    ELSIF cnt >= 2 THEN
+        -- Loop kept running beyond 1 iteration → condition is truthy; cancel it
         PERFORM df.cancel(inst_id, 'truth-test-done');
         -- Wait for cancel to land
         attempts := 0;
@@ -59,6 +59,10 @@ BEGIN
             attempts := attempts + 1;
         END LOOP;
         RETURN 'truthy';
+    ELSE
+        -- Timeout: instance did not start within 10s (worker busy or dead)
+        PERFORM df.cancel(inst_id, 'truth-test-timeout');
+        RAISE EXCEPTION 'Timeout waiting for truth test [%] (status=%, cnt=%)', p_variant, status, cnt;
     END IF;
 END $$;