Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
716 changes: 716 additions & 0 deletions docs/resilience-testing.md

Large diffs are not rendered by default.

37 changes: 36 additions & 1 deletion scripts/test-e2e-local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
# --clean Start with fresh database (wipes all data)
# --verbose Show all NOTICE messages and full error output
# -v Same as --verbose
# --keep-going Continue running tests after a failure (default: exit on first failure)
# -k Same as --keep-going
# --pg-version VER PostgreSQL major version to use (default: 17)
# --no-preload Start PostgreSQL WITHOUT shared_preload_libraries=pg_durable
# (runs only 00_requires_shared_preload test)
Expand All @@ -22,6 +24,7 @@
# ./scripts/test-e2e-local.sh -v 27_database_guc # Run test with verbose output
# ./scripts/test-e2e-local.sh --pg-version 18 # Run all tests against PG18
# ./scripts/test-e2e-local.sh --no-preload # Test shared_preload_libraries enforcement
# ./scripts/test-e2e-local.sh --keep-going # Run all tests, don't stop on failure

set -e

Expand All @@ -33,6 +36,7 @@ SQL_DIR="$PROJECT_DIR/tests/e2e/sql"
KEEP_RUNNING=false
CLEAN_START=false
VERBOSE=false
KEEP_GOING=false
NO_PRELOAD=false
TEST_FILTER=""
REPEAT_COUNT=1
Expand All @@ -53,6 +57,10 @@ while [[ $# -gt 0 ]]; do
VERBOSE=true
shift
;;
--keep-going|-k)
KEEP_GOING=true
shift
;;
--pg-version)
if ! [[ "$2" =~ ^[0-9]+$ ]]; then
echo "Error: --pg-version requires a numeric argument, got: $2"
Expand Down Expand Up @@ -125,6 +133,9 @@ fi
if [ "$VERBOSE" = true ]; then
echo -e "Mode: ${YELLOW}Verbose output (show NOTICE messages)${NC}"
fi
if [ "$KEEP_GOING" = true ]; then
echo -e "Mode: ${YELLOW}Keep going on failure${NC}"
fi
if [ "$NO_PRELOAD" = true ]; then
echo -e "Mode: ${YELLOW}No-preload (testing shared_preload_libraries enforcement)${NC}"
fi
Expand Down Expand Up @@ -269,6 +280,12 @@ fi
# Run tests
TOTAL_PASSED=0
TOTAL_FAILED=0
FAILED_TESTS=()

# When --keep-going is set, don't exit on test failure
if [ "$KEEP_GOING" = true ]; then
set +e
fi

for run in $(seq 1 $REPEAT_COUNT); do
if [ "$REPEAT_COUNT" -gt 1 ]; then
Expand Down Expand Up @@ -316,6 +333,10 @@ for run in $(seq 1 $REPEAT_COUNT); do
# 35 reads df._worker_epoch (internal table)
# 37 tests RLS policies, including for superuser, changes users
# 38 tests per-user vars RLS isolation, changes users
# 58 kills background worker (requires pg_terminate_backend + _worker_epoch)
# 60 deletes instance rows directly (bypasses RLS, superuser only)
# 62 uses dblink with postgres credentials for concurrent sessions
# 63 uses dblink with postgres credentials for variable race test
PSQL_USER="$E2E_USER"
if [[ "$test_name" == "00_requires_shared_preload" \
|| "$test_name" == "22_cross_connection" \
Expand All @@ -328,7 +349,11 @@ for run in $(seq 1 $REPEAT_COUNT); do
|| "$test_name" == "34_multi_database" \
|| "$test_name" == "35_heartbeat_liveness" \
|| "$test_name" == "37_rls" \
|| "$test_name" == "38_rls_vars" ]]; then
|| "$test_name" == "38_rls_vars" \
|| "$test_name" == "58_kill_worker_mid_execution" \
|| "$test_name" == "60_orphaned_nodes" \
|| "$test_name" == "62_concurrent_sessions" \
|| "$test_name" == "63_shared_variable_race" ]]; then
PSQL_USER="$PG_USER"
fi

Expand All @@ -344,6 +369,7 @@ for run in $(seq 1 $REPEAT_COUNT); do
else
echo -e " ${RED}FAIL${NC}"
FAILED=$((FAILED + 1))
FAILED_TESTS+=("$test_name")
fi
else
# Non-verbose mode: capture output and show summary
Expand All @@ -358,6 +384,7 @@ for run in $(seq 1 $REPEAT_COUNT); do
echo -e "${RED}FAIL${NC}"
echo "$output" | grep -E "(NOTICE|ERROR|TEST FAILED)" | tail -15
FAILED=$((FAILED + 1))
FAILED_TESTS+=("$test_name")
else
echo -e "${GREEN}PASS${NC}"
PASSED=$((PASSED + 1))
Expand All @@ -366,6 +393,7 @@ for run in $(seq 1 $REPEAT_COUNT); do
echo -e "${RED}FAIL${NC}"
echo "$output" | grep -E "(NOTICE|ERROR)" | tail -15
FAILED=$((FAILED + 1))
FAILED_TESTS+=("$test_name")
fi
fi
done
Expand All @@ -385,6 +413,13 @@ if [ "$REPEAT_COUNT" -gt 1 ]; then
echo "Total Results ($REPEAT_COUNT runs):"
fi
echo -e "Results: ${GREEN}$TOTAL_PASSED passed${NC}, ${RED}$TOTAL_FAILED failed${NC}"
if [ ${#FAILED_TESTS[@]} -gt 0 ]; then
echo ""
echo -e "${RED}Failed tests:${NC}"
for t in "${FAILED_TESTS[@]}"; do
echo -e " ${RED}- $t${NC}"
done
fi
echo "================================================"

[ $TOTAL_FAILED -eq 0 ]
Expand Down
79 changes: 79 additions & 0 deletions tests/e2e/sql/38_infinite_loop.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
-- Test: Infinite loop cancellation (B1 / B2)
-- Demonstrates: df.loop() with always-true condition and unconditional loop
-- Expected:
-- - Loops run indefinitely; df.cancel() successfully stops them
-- - Instance ends in canceled/failed state, not stuck in running

DROP TABLE IF EXISTS test_infinite_log;
CREATE TABLE test_infinite_log (id SERIAL, variant TEXT, ts TIMESTAMP DEFAULT now());

CREATE TEMP TABLE _inf_state (instance_id TEXT, variant TEXT);

-- B1: always-true while-condition loop
INSERT INTO _inf_state
SELECT df.start(
df.loop(
'INSERT INTO test_infinite_log (variant) VALUES (''while_true'')',
'SELECT true' -- condition never becomes false
),
'test-infinite-while-true'
), 'while_true';

-- B2: unconditional loop (no condition argument)
INSERT INTO _inf_state
SELECT df.start(
df.loop(
'INSERT INTO test_infinite_log (variant) VALUES (''unconditional'')'
),
'test-infinite-unconditional'
), 'unconditional';

DO $$
DECLARE
rec RECORD;
cnt INT;
status TEXT;
attempts INT;
BEGIN
FOR rec IN SELECT instance_id, variant FROM _inf_state LOOP
RAISE NOTICE 'Testing infinite loop [%]: %', rec.variant, rec.instance_id;

-- Wait for at least 2 iterations to prove the loop is actually running
attempts := 0;
LOOP
SELECT COUNT(*) INTO cnt FROM test_infinite_log WHERE variant = rec.variant;
EXIT WHEN cnt >= 2 OR attempts > 200;
PERFORM pg_sleep(0.1);
attempts := attempts + 1;
END LOOP;

IF cnt < 2 THEN
RAISE EXCEPTION 'TEST FAILED [%]: expected >= 2 iterations before cancel, got %',
rec.variant, cnt;
END IF;

-- Cancel the running loop
PERFORM df.cancel(rec.instance_id, 'test-cancel');

-- Wait for cancellation to take effect
attempts := 0;
LOOP
SELECT s INTO status FROM df.status(rec.instance_id) s;
EXIT WHEN lower(status) IN ('canceled', 'cancelled', 'failed') OR attempts > 100;
PERFORM pg_sleep(0.2);
attempts := attempts + 1;
END LOOP;

IF lower(status) NOT IN ('canceled', 'cancelled', 'failed') THEN
RAISE EXCEPTION 'TEST FAILED [%]: expected canceled/failed after cancel, got %',
rec.variant, status;
END IF;

RAISE NOTICE 'PASSED [%]: ran % iterations, then canceled (status=%)',
rec.variant, cnt, status;
END LOOP;
END $$;

DROP TABLE _inf_state;
DROP TABLE test_infinite_log;
SELECT 'TEST PASSED' AS result;
114 changes: 114 additions & 0 deletions tests/e2e/sql/39_truthiness_edge_cases.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
-- Test: Loop condition truthiness edge cases (B3)
-- Demonstrates: evaluate_condition / is_truthy behavior for ambiguous values
-- Expected: Documents and verifies the actual truthiness semantics for:
-- NULL, integer 0, float 0.0, empty string, string "false", string "no",
-- empty JSON array, empty JSON object

-- Each sub-test starts a df.loop(body, condition) and checks whether the loop
-- stops (condition is falsy) or runs at least 2 iterations before cancel
-- (condition is truthy).

DROP TABLE IF EXISTS test_truth_log;
CREATE TABLE test_truth_log (id SERIAL, variant TEXT, ts TIMESTAMP DEFAULT now());

-- Store test cases and instance IDs
CREATE TEMP TABLE _truth_cases (
variant TEXT,
condition_sql TEXT,
expected TEXT,
instance_id TEXT
);

INSERT INTO _truth_cases (variant, condition_sql, expected) VALUES
('null_val', 'SELECT NULL', 'falsy'),
('int_zero', 'SELECT 0', 'falsy'),
('int_one', 'SELECT 1', 'truthy'),
('bool_false', 'SELECT false', 'falsy'),
('bool_true', 'SELECT true', 'truthy'),
-- [KNOWN QUIRK] Non-empty strings that are not "true"/"t"/"yes"/"1" and
-- not parseable as non-zero integers: actual behavior is falsy.
('str_false', 'SELECT ''false''', 'falsy'),
('str_no', 'SELECT ''no''', 'falsy'),
('empty_str', 'SELECT ''''', 'falsy'),
('float_zero', 'SELECT 0.0', 'falsy'),
('empty_array', 'SELECT ''[]''::jsonb', 'falsy'),
('empty_obj', 'SELECT ''{}''::jsonb', 'falsy');

-- Start all loop instances at top level (auto-commits so background worker can see them)
UPDATE _truth_cases SET instance_id = df.start(
df.loop(
format('INSERT INTO test_truth_log (variant) VALUES (%L)', variant),
condition_sql
),
format('truth-%s', variant)
);

-- NOTE on known behavior quirks:
-- String "false" and "no" are treated as FALSY by is_truthy() — they are
-- recognized as falsy string values. The correct way to return a falsy condition
-- is `SELECT false` (boolean), `SELECT 0`, or string "false"/"no".

-- Poll each instance and determine truthy/falsy
DO $$
DECLARE
rec RECORD;
status TEXT;
cnt INT;
got TEXT;
attempts INT;
failures INT := 0;
BEGIN
FOR rec IN SELECT * FROM _truth_cases LOOP
attempts := 0;

-- Wait up to 10s for the loop to either stop on its own or run 2 iterations
LOOP
SELECT s INTO status FROM df.status(rec.instance_id) s;
SELECT COUNT(*) INTO cnt FROM test_truth_log WHERE variant = rec.variant;
EXIT WHEN lower(status) IN ('completed', 'failed', 'canceled', 'cancelled')
OR cnt >= 2
OR attempts > 100;
PERFORM pg_sleep(0.1);
attempts := attempts + 1;
END LOOP;

IF lower(status) IN ('completed', 'failed', 'canceled', 'cancelled') THEN
-- Loop stopped by itself → condition was falsy
got := 'falsy';
ELSIF cnt >= 2 THEN
-- Loop kept running beyond 1 iteration → condition is truthy; cancel it
PERFORM df.cancel(rec.instance_id, 'truth-test-done');
-- Wait for cancel to land
attempts := 0;
LOOP
SELECT s INTO status FROM df.status(rec.instance_id) s;
EXIT WHEN lower(status) IN ('completed', 'failed', 'canceled', 'cancelled')
OR attempts > 50;
PERFORM pg_sleep(0.1);
attempts := attempts + 1;
END LOOP;
got := 'truthy';
ELSE
-- Timeout: instance did not start within 10s (worker busy or dead)
PERFORM df.cancel(rec.instance_id, 'truth-test-timeout');
RAISE EXCEPTION 'Timeout waiting for truth test [%] (status=%, cnt=%)', rec.variant, status, cnt;
END IF;

RAISE NOTICE 'Truthiness [%]: condition=% → %', rec.variant, rec.condition_sql, got;
IF got != rec.expected THEN
RAISE WARNING 'REGRESSION [%]: got % expected %', rec.variant, got, rec.expected;
failures := failures + 1;
END IF;
END LOOP;

-- Emit a clear notice about the known quirks so they are visible in test output
RAISE NOTICE 'NOTE: SELECT ''false'' and SELECT ''no'' are falsy in loop conditions.';

IF failures > 0 THEN
RAISE EXCEPTION 'TEST FAILED: % truthiness regression(s) — see WARNINGs above', failures;
END IF;
END $$;

DROP TABLE _truth_cases;
DROP TABLE test_truth_log;
SELECT 'TEST PASSED' AS result;
Loading
Loading