From 5e22fb2ed1658eb63fa1393b693109d0bdae81b3 Mon Sep 17 00:00:00 2001
From: Neil Deshpande <ndeshpan@microsoft.com>
Date: Wed, 1 Apr 2026 21:47:37 +0000
Subject: [PATCH 1/7] Add a new GUC and the SortedMergeAdapter

---
 .../distributed/executor/multi_executor.c     |   3 +
 .../distributed/executor/sorted_merge.c       | 204 ++++++++++++++++++
 src/backend/distributed/shared_library_init.c |  15 ++
 src/include/distributed/citus_custom_scan.h   |   3 +
 src/include/distributed/multi_executor.h      |   1 +
 src/include/distributed/sorted_merge.h        |  14 ++
 6 files changed, 240 insertions(+)

diff --git a/src/backend/distributed/executor/multi_executor.c b/src/backend/distributed/executor/multi_executor.c
index 8661d367345..91479019df1 100644
--- a/src/backend/distributed/executor/multi_executor.c
+++ b/src/backend/distributed/executor/multi_executor.c
@@ -88,6 +88,9 @@ bool SortReturning = false;
 /* when true at planning time, enables coordinator sorted merge for ORDER BY */
 bool EnableSortedMerge = false;
 
+/* when true, uses streaming adapter instead of eager merge for sorted merge */
+bool EnableStreamingSortedMerge = false;
+
 /*
  * How many nested executors have we started? This can happen for SQL
  * UDF calls. The outer query starts an executor, then postgres opens
diff --git a/src/backend/distributed/executor/sorted_merge.c b/src/backend/distributed/executor/sorted_merge.c
index f3514dfdb38..76b9989b504 100644
--- a/src/backend/distributed/executor/sorted_merge.c
+++ b/src/backend/distributed/executor/sorted_merge.c
@@ -331,3 +331,207 @@ MergeHeapComparator(Datum a, Datum b, void *arg)
 
 	return 0;
 }
+
+
+/*
+ * SortedMergeAdapter streams tuples from K pre-sorted per-task stores
+ * via a binary heap, returning one globally-sorted tuple per call.
+ *
+ * This is the streaming replacement for MergePerTaskStoresIntoFinalStore().
+ * Instead of copying all tuples into a final tuplestore, the adapter holds
+ * the per-task stores and heap alive, producing tuples on demand.
+ *
+ * Modeled after PostgreSQL's MergeAppend (nodeMergeAppend.c), which uses
+ * the same binary-heap-over-sorted-inputs pattern.
+ */
+struct SortedMergeAdapter
+{
+	Tuplestorestate **perTaskStores;    /* K per-task stores (owned) */
+	int nstores;
+
+	binaryheap *heap;
+
+	MergeContext mergeCtx;              /* embedded — passed to heap as bh_arg */
+
+	TupleDesc tupleDesc;
+	bool exhausted;
+	bool initialized;
+};
+
+
+/*
+ * CreateSortedMergeAdapter builds a streaming merge adapter over K per-task
+ * stores. The adapter takes ownership of perTaskStores — the caller must
+ * not free them; FreeSortedMergeAdapter() handles cleanup.
+ *
+ * All memory is allocated in CurrentMemoryContext. The caller must ensure
+ * this context outlives the adapter (the AdaptiveExecutor local context
+ * already satisfies this — see adaptive_executor.c).
+ */
+SortedMergeAdapter *
+CreateSortedMergeAdapter(Tuplestorestate **perTaskStores,
+						 int nstores,
+						 SortedMergeKey *mergeKeys,
+						 int nkeys,
+						 TupleDesc tupleDesc)
+{
+	SortedMergeAdapter *adapter = palloc0(sizeof(SortedMergeAdapter));
+	adapter->perTaskStores = perTaskStores;
+	adapter->nstores = nstores;
+	adapter->tupleDesc = tupleDesc;
+
+	/* one comparison slot per store — owned via mergeCtx.slots */
+	TupleTableSlot **slots = palloc(nstores * sizeof(TupleTableSlot *));
+	for (int i = 0; i < nstores; i++)
+	{
+		slots[i] = MakeSingleTupleTableSlot(tupleDesc, &TTSOpsMinimalTuple);
+	}
+
+	/* build SortSupport (same logic as MergePerTaskStoresIntoFinalStore) */
+	SortSupportData *sortKeys = palloc0(nkeys * sizeof(SortSupportData));
+	for (int i = 0; i < nkeys; i++)
+	{
+		SortSupport sk = &sortKeys[i];
+		sk->ssup_cxt = CurrentMemoryContext;
+		sk->ssup_collation = mergeKeys[i].collation;
+		sk->ssup_nulls_first = mergeKeys[i].nullsFirst;
+		sk->ssup_attno = mergeKeys[i].attno;
+		PrepareSortSupportFromOrderingOp(mergeKeys[i].sortop, sk);
+	}
+
+	/* set up embedded merge context for heap comparisons */
+	adapter->mergeCtx.slots = slots;
+	adapter->mergeCtx.sortKeys = sortKeys;
+	adapter->mergeCtx.nkeys = nkeys;
+
+	/* allocate heap with embedded context as comparator arg */
+	adapter->heap = binaryheap_allocate(nstores, MergeHeapComparator,
+										&adapter->mergeCtx);
+
+	return adapter;
+}
+
+
+/*
+ * SortedMergeAdapterNext returns the next globally-sorted tuple from the
+ * adapter by copying it into the provided scanSlot. Returns true if a tuple
+ * was returned, false if all stores are exhausted.
+ *
+ * The heap uses per-store comparison slots (mergeCtx.slots). After
+ * identifying the winner, we ExecCopySlot from the winner's comparison
+ * slot into the scan slot. This is a MinimalTuple copy, comparable in
+ * cost to the tuplestore_puttupleslot write in the eager merge path.
+ *
+ * On each call after the first, we advance the previous winner's store
+ * and update the heap before selecting the new winner. This matches the
+ * MergeAppend pattern in nodeMergeAppend.c.
+ */
+bool
+SortedMergeAdapterNext(SortedMergeAdapter *adapter, TupleTableSlot *scanSlot)
+{
+	if (adapter->exhausted)
+	{
+		ExecClearTuple(scanSlot);
+		return false;
+	}
+
+	if (!adapter->initialized)
+	{
+		/* first call: seed the heap with the first tuple from each store */
+		for (int i = 0; i < adapter->nstores; i++)
+		{
+			tuplestore_rescan(adapter->perTaskStores[i]);
+			if (tuplestore_gettupleslot(adapter->perTaskStores[i], true, false,
+										adapter->mergeCtx.slots[i]))
+			{
+				binaryheap_add_unordered(adapter->heap, Int32GetDatum(i));
+			}
+		}
+		binaryheap_build(adapter->heap);
+		adapter->initialized = true;
+	}
+	else
+	{
+		/* advance the previous winner and update the heap */
+		int prevWinner = DatumGetInt32(binaryheap_first(adapter->heap));
+		if (tuplestore_gettupleslot(adapter->perTaskStores[prevWinner], true,
+									false, adapter->mergeCtx.slots[prevWinner]))
+		{
+			binaryheap_replace_first(adapter->heap, Int32GetDatum(prevWinner));
+		}
+		else
+		{
+			(void) binaryheap_remove_first(adapter->heap);
+		}
+	}
+
+	if (binaryheap_empty(adapter->heap))
+	{
+		adapter->exhausted = true;
+		ExecClearTuple(scanSlot);
+		return false;
+	}
+
+	int winner = DatumGetInt32(binaryheap_first(adapter->heap));
+	ExecCopySlot(scanSlot, adapter->mergeCtx.slots[winner]);
+
+	return true;
+}
+
+
+/*
+ * SortedMergeAdapterRescan resets the adapter to re-read from the beginning.
+ * Called from CitusReScan() for cursor WITH HOLD patterns.
+ *
+ * Cost is O(K log K) to rebuild the heap, which is negligible for typical
+ * shard counts (4-64). Both binaryheap_reset() and tuplestore_rescan()
+ * are proven APIs used by PostgreSQL's ExecReScanMergeAppend.
+ */
+void
+SortedMergeAdapterRescan(SortedMergeAdapter *adapter)
+{
+	binaryheap_reset(adapter->heap);
+
+	for (int i = 0; i < adapter->nstores; i++)
+	{
+		tuplestore_rescan(adapter->perTaskStores[i]);
+		if (tuplestore_gettupleslot(adapter->perTaskStores[i], true, false,
+									adapter->mergeCtx.slots[i]))
+		{
+			binaryheap_add_unordered(adapter->heap, Int32GetDatum(i));
+		}
+	}
+	binaryheap_build(adapter->heap);
+
+	adapter->exhausted = false;
+	adapter->initialized = true;
+}
+
+
+/*
+ * FreeSortedMergeAdapter releases all adapter resources including
+ * per-task stores, comparison slots, sort keys, and the heap.
+ * Called from CitusEndScan() for deterministic cleanup.
+ */
+void
+FreeSortedMergeAdapter(SortedMergeAdapter *adapter)
+{
+	if (adapter == NULL)
+	{
+		return;
+	}
+
+	for (int i = 0; i < adapter->nstores; i++)
+	{
+		tuplestore_end(adapter->perTaskStores[i]);
+		ExecDropSingleTupleTableSlot(adapter->mergeCtx.slots[i]);
+	}
+
+	binaryheap_free(adapter->heap);
+	pfree(adapter->mergeCtx.slots);
+	pfree(adapter->mergeCtx.sortKeys);
+	pfree(adapter->perTaskStores);
+
+	/* mergeCtx is embedded in adapter, freed with the adapter itself */
+	pfree(adapter);
+}
diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c
index 48842050c3b..8b4b366ace3 100644
--- a/src/backend/distributed/shared_library_init.c
+++ b/src/backend/distributed/shared_library_init.c
@@ -1617,6 +1617,21 @@ RegisterCitusConfigVariables(void)
 		GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
 		NULL, NULL, NULL);
 
+	DefineCustomBoolVariable(
+		"citus.enable_streaming_sorted_merge",
+		gettext_noop("Use streaming adapter instead of eager merge for sorted merge."),
+		gettext_noop("When enabled alongside citus.enable_sorted_merge, the coordinator "
+					 "streams merged tuples directly from per-task stores via a binary "
+					 "heap instead of eagerly copying all tuples into a final tuplestore. "
+					 "This reduces memory usage and improves time-to-first-tuple, "
+					 "especially for LIMIT queries. Requires citus.enable_sorted_merge "
+					 "to also be enabled. This is an experimental feature."),
+		&EnableStreamingSortedMerge,
+		false,
+		PGC_USERSET,
+		GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
+		NULL, NULL, NULL);
+
 	DefineCustomBoolVariable(
 		"citus.enable_stat_counters",
 		gettext_noop("Enables the collection of statistic counters for Citus."),
diff --git a/src/include/distributed/citus_custom_scan.h b/src/include/distributed/citus_custom_scan.h
index db1f0ce1f2a..dbe71df0856 100644
--- a/src/include/distributed/citus_custom_scan.h
+++ b/src/include/distributed/citus_custom_scan.h
@@ -28,6 +28,9 @@ typedef struct CitusScanState
 	MultiExecutorType executorType;   /* distributed executor type */
 	bool finishedRemoteScan;          /* flag to check if remote scan is finished */
 	Tuplestorestate *tuplestorestate; /* tuple store to store distributed results */
+
+	/* streaming sorted merge adapter (NULL when not using sorted merge) */
+	struct SortedMergeAdapter *mergeAdapter;
 } CitusScanState;
 
 
diff --git a/src/include/distributed/multi_executor.h b/src/include/distributed/multi_executor.h
index c18067b5499..da30bd0c838 100644
--- a/src/include/distributed/multi_executor.h
+++ b/src/include/distributed/multi_executor.h
@@ -71,6 +71,7 @@ extern int MaxAdaptiveExecutorPoolSize;
 extern int ExecutorSlowStartInterval;
 extern bool SortReturning;
 extern bool EnableSortedMerge;
+extern bool EnableStreamingSortedMerge;
 extern int ExecutorLevel;
 
 
diff --git a/src/include/distributed/sorted_merge.h b/src/include/distributed/sorted_merge.h
index eeb3e690d35..d39f03ad777 100644
--- a/src/include/distributed/sorted_merge.h
+++ b/src/include/distributed/sorted_merge.h
@@ -18,6 +18,10 @@
 #include "distributed/tuple_destination.h"
 
 
+/* opaque streaming merge adapter — full definition in sorted_merge.c */
+typedef struct SortedMergeAdapter SortedMergeAdapter;
+
+
 extern TupleDestination * CreatePerTaskDispatchDest(List *taskList,
 													TupleDesc tupleDesc,
 													TupleDestinationStats *sharedStats,
@@ -31,4 +35,14 @@ extern void MergePerTaskStoresIntoFinalStore(Tuplestorestate *finalStore,
 											 int nkeys,
 											 TupleDesc tupleDesc);
 
+extern SortedMergeAdapter * CreateSortedMergeAdapter(Tuplestorestate **perTaskStores,
+													 int nstores,
+													 SortedMergeKey *mergeKeys,
+													 int nkeys,
+													 TupleDesc tupleDesc);
+extern bool SortedMergeAdapterNext(SortedMergeAdapter *adapter,
+								   TupleTableSlot *scanSlot);
+extern void SortedMergeAdapterRescan(SortedMergeAdapter *adapter);
+extern void FreeSortedMergeAdapter(SortedMergeAdapter *adapter);
+
 #endif /* SORTED_MERGE_H */

From 71a653304dc675d975338d49f82115c4377b3b45 Mon Sep 17 00:00:00 2001
From: Neil Deshpande <ndeshpan@microsoft.com>
Date: Fri, 3 Apr 2026 20:17:55 +0000
Subject: [PATCH 2/7] Use the SortedMergeAdapter in the existing functions
 instead of duplicating code

---
 .../distributed/executor/sorted_merge.c       | 151 +++++++-----------
 src/include/distributed/sorted_merge.h        |   3 +-
 2 files changed, 62 insertions(+), 92 deletions(-)

diff --git a/src/backend/distributed/executor/sorted_merge.c b/src/backend/distributed/executor/sorted_merge.c
index 76b9989b504..75e09f92baa 100644
--- a/src/backend/distributed/executor/sorted_merge.c
+++ b/src/backend/distributed/executor/sorted_merge.c
@@ -66,6 +66,32 @@ typedef struct MergeContext
 } MergeContext;
 
 
+/*
+ * SortedMergeAdapter streams tuples from K pre-sorted per-task stores
+ * via a binary heap, returning one globally-sorted tuple per call.
+ *
+ * Used both as the streaming replacement for MergePerTaskStoresIntoFinalStore()
+ * and internally by that function itself (to avoid duplicating the merge logic).
+ *
+ * Modeled after PostgreSQL's MergeAppend (nodeMergeAppend.c), which uses
+ * the same binary-heap-over-sorted-inputs pattern.
+ */
+struct SortedMergeAdapter
+{
+	Tuplestorestate **perTaskStores;    /* K per-task stores (not owned in eager mode) */
+	int nstores;
+	bool ownsStores;                    /* if true, FreeSortedMergeAdapter frees stores */
+
+	binaryheap *heap;
+
+	MergeContext mergeCtx;              /* embedded — passed to heap as bh_arg */
+
+	TupleDesc tupleDesc;
+	bool exhausted;
+	bool initialized;
+};
+
+
 /* forward declarations */
 static void PerTaskDispatchPutTuple(TupleDestination *self, Task *task,
 									int placementIndex, int queryNumber,
@@ -213,7 +239,9 @@ PerTaskDispatchTupleDescForQuery(TupleDestination *self, int queryNumber)
  * Each per-task store must contain tuples sorted by the given merge keys.
  * The output tuplestore will contain all tuples in globally sorted order.
  *
- * Uses PostgreSQL's public binaryheap and SortSupport APIs.
+ * Implemented by creating a temporary SortedMergeAdapter, draining it into
+ * the final store, and freeing the adapter. The per-task stores are NOT
+ * freed by this function — the caller is responsible for that.
  */
 void
 MergePerTaskStoresIntoFinalStore(Tuplestorestate *finalStore,
@@ -228,69 +256,21 @@ MergePerTaskStoresIntoFinalStore(Tuplestorestate *finalStore,
 		return;
 	}
 
-	/* allocate one reusable slot per task store */
-	TupleTableSlot **slots = palloc(nstores * sizeof(TupleTableSlot *));
-	for (int i = 0; i < nstores; i++)
-	{
-		slots[i] = MakeSingleTupleTableSlot(tupleDesc, &TTSOpsMinimalTuple);
-	}
+	SortedMergeAdapter *adapter = CreateSortedMergeAdapter(perTaskStores,
+														   nstores, mergeKeys,
+														   nkeys, tupleDesc,
+														   false);
 
-	/* build SortSupport from serialized merge keys */
-	SortSupportData *sortKeys = palloc0(nkeys * sizeof(SortSupportData));
-	for (int i = 0; i < nkeys; i++)
-	{
-		SortSupport sk = &sortKeys[i];
-		sk->ssup_cxt = CurrentMemoryContext;
-		sk->ssup_collation = mergeKeys[i].collation;
-		sk->ssup_nulls_first = mergeKeys[i].nullsFirst;
-		sk->ssup_attno = mergeKeys[i].attno;
-		PrepareSortSupportFromOrderingOp(mergeKeys[i].sortop, sk);
-	}
-
-	/* set up merge context for heap comparisons */
-	MergeContext ctx;
-	ctx.slots = slots;
-	ctx.sortKeys = sortKeys;
-	ctx.nkeys = nkeys;
-
-	binaryheap *heap = binaryheap_allocate(nstores, MergeHeapComparator, &ctx);
+	TupleTableSlot *slot = MakeSingleTupleTableSlot(tupleDesc,
+													&TTSOpsMinimalTuple);
 
-	/* seed the heap with the first tuple from each non-empty store */
-	for (int i = 0; i < nstores; i++)
+	while (SortedMergeAdapterNext(adapter, slot))
 	{
-		tuplestore_rescan(perTaskStores[i]);
-		if (tuplestore_gettupleslot(perTaskStores[i], true, false, slots[i]))
-		{
-			binaryheap_add_unordered(heap, Int32GetDatum(i));
-		}
+		tuplestore_puttupleslot(finalStore, slot);
 	}
-	binaryheap_build(heap);
-
-	/* merge loop: extract min, write to final store, advance winner */
-	while (!binaryheap_empty(heap))
-	{
-		int winner = DatumGetInt32(binaryheap_first(heap));
-		tuplestore_puttupleslot(finalStore, slots[winner]);
 
-		if (tuplestore_gettupleslot(perTaskStores[winner], true, false,
-									slots[winner]))
-		{
-			binaryheap_replace_first(heap, Int32GetDatum(winner));
-		}
-		else
-		{
-			(void) binaryheap_remove_first(heap);
-		}
-	}
-
-	/* free merge-local resources */
-	binaryheap_free(heap);
-	for (int i = 0; i < nstores; i++)
-	{
-		ExecDropSingleTupleTableSlot(slots[i]);
-	}
-	pfree(slots);
-	pfree(sortKeys);
+	ExecDropSingleTupleTableSlot(slot);
+	FreeSortedMergeAdapter(adapter);
 }
 
 
@@ -333,36 +313,11 @@ MergeHeapComparator(Datum a, Datum b, void *arg)
 }
 
 
-/*
- * SortedMergeAdapter streams tuples from K pre-sorted per-task stores
- * via a binary heap, returning one globally-sorted tuple per call.
- *
- * This is the streaming replacement for MergePerTaskStoresIntoFinalStore().
- * Instead of copying all tuples into a final tuplestore, the adapter holds
- * the per-task stores and heap alive, producing tuples on demand.
- *
- * Modeled after PostgreSQL's MergeAppend (nodeMergeAppend.c), which uses
- * the same binary-heap-over-sorted-inputs pattern.
- */
-struct SortedMergeAdapter
-{
-	Tuplestorestate **perTaskStores;    /* K per-task stores (owned) */
-	int nstores;
-
-	binaryheap *heap;
-
-	MergeContext mergeCtx;              /* embedded — passed to heap as bh_arg */
-
-	TupleDesc tupleDesc;
-	bool exhausted;
-	bool initialized;
-};
-
-
 /*
  * CreateSortedMergeAdapter builds a streaming merge adapter over K per-task
- * stores. The adapter takes ownership of perTaskStores — the caller must
- * not free them; FreeSortedMergeAdapter() handles cleanup.
+ * stores. When ownsStores is true, FreeSortedMergeAdapter() will call
+ * tuplestore_end() on each per-task store; when false, the caller retains
+ * ownership and must free them separately.
  *
  * All memory is allocated in CurrentMemoryContext. The caller must ensure
  * this context outlives the adapter (the AdaptiveExecutor local context
@@ -373,11 +328,13 @@ CreateSortedMergeAdapter(Tuplestorestate **perTaskStores,
 						 int nstores,
 						 SortedMergeKey *mergeKeys,
 						 int nkeys,
-						 TupleDesc tupleDesc)
+						 TupleDesc tupleDesc,
+						 bool ownsStores)
 {
 	SortedMergeAdapter *adapter = palloc0(sizeof(SortedMergeAdapter));
 	adapter->perTaskStores = perTaskStores;
 	adapter->nstores = nstores;
+	adapter->ownsStores = ownsStores;
 	adapter->tupleDesc = tupleDesc;
 
 	/* one comparison slot per store — owned via mergeCtx.slots */
@@ -387,7 +344,7 @@ CreateSortedMergeAdapter(Tuplestorestate **perTaskStores,
 		slots[i] = MakeSingleTupleTableSlot(tupleDesc, &TTSOpsMinimalTuple);
 	}
 
-	/* build SortSupport (same logic as MergePerTaskStoresIntoFinalStore) */
+	/* build SortSupport from serialized merge keys */
 	SortSupportData *sortKeys = palloc0(nkeys * sizeof(SortSupportData));
 	for (int i = 0; i < nkeys; i++)
 	{
@@ -425,6 +382,11 @@ CreateSortedMergeAdapter(Tuplestorestate **perTaskStores,
  * On each call after the first, we advance the previous winner's store
  * and update the heap before selecting the new winner. This matches the
  * MergeAppend pattern in nodeMergeAppend.c.
+ * 
+ * Possible perf optimizations to explore in the future:
+ * Avoid copying the winning tuple into the scan slot by returning a pointer to the winner's slot instead.
+ * This would require changes to the caller to not modify the returned slot and to understand that it's owned by the adapter until the next call.
+ * It would save a copy per tuple at the cost of a more complex API and potential lifetime management issues.
  */
 bool
 SortedMergeAdapterNext(SortedMergeAdapter *adapter, TupleTableSlot *scanSlot)
@@ -523,14 +485,21 @@ FreeSortedMergeAdapter(SortedMergeAdapter *adapter)
 
 	for (int i = 0; i < adapter->nstores; i++)
 	{
-		tuplestore_end(adapter->perTaskStores[i]);
+		if (adapter->ownsStores)
+		{
+			tuplestore_end(adapter->perTaskStores[i]);
+		}
 		ExecDropSingleTupleTableSlot(adapter->mergeCtx.slots[i]);
 	}
 
 	binaryheap_free(adapter->heap);
 	pfree(adapter->mergeCtx.slots);
 	pfree(adapter->mergeCtx.sortKeys);
-	pfree(adapter->perTaskStores);
+
+	if (adapter->ownsStores)
+	{
+		pfree(adapter->perTaskStores);
+	}
 
 	/* mergeCtx is embedded in adapter, freed with the adapter itself */
 	pfree(adapter);
diff --git a/src/include/distributed/sorted_merge.h b/src/include/distributed/sorted_merge.h
index d39f03ad777..d82fd626030 100644
--- a/src/include/distributed/sorted_merge.h
+++ b/src/include/distributed/sorted_merge.h
@@ -39,7 +39,8 @@ extern SortedMergeAdapter * CreateSortedMergeAdapter(Tuplestorestate **perTaskSt
 													 int nstores,
 													 SortedMergeKey *mergeKeys,
 													 int nkeys,
-													 TupleDesc tupleDesc);
+													 TupleDesc tupleDesc,
+													 bool ownsStores);
 extern bool SortedMergeAdapterNext(SortedMergeAdapter *adapter,
 								   TupleTableSlot *scanSlot);
 extern void SortedMergeAdapterRescan(SortedMergeAdapter *adapter);

From 10ac20a030fe5d0153ff7e6351d464ff86701bce Mon Sep 17 00:00:00 2001
From: Neil Deshpande <ndeshpan@microsoft.com>
Date: Fri, 3 Apr 2026 23:16:02 +0000
Subject: [PATCH 3/7] Wire up streaming k way merge and add integration test

---
 .../distributed/executor/adaptive_executor.c  |   48 +-
 .../distributed/executor/citus_custom_scan.c  |   14 +-
 .../distributed/executor/multi_executor.c     |   54 +-
 .../distributed/planner/distributed_planner.c |   13 +-
 .../multi_orderby_pushdown_streaming.out      | 1888 +++++++++++++++++
 .../sql/multi_orderby_pushdown_streaming.sql  |   14 +
 6 files changed, 2002 insertions(+), 29 deletions(-)
 create mode 100644 src/test/regress/expected/multi_orderby_pushdown_streaming.out
 create mode 100644 src/test/regress/sql/multi_orderby_pushdown_streaming.sql

diff --git a/src/backend/distributed/executor/adaptive_executor.c b/src/backend/distributed/executor/adaptive_executor.c
index a4e5461e51e..53a6c4c7b46 100644
--- a/src/backend/distributed/executor/adaptive_executor.c
+++ b/src/backend/distributed/executor/adaptive_executor.c
@@ -947,23 +947,45 @@ AdaptiveExecutor(CitusScanState *scanState)
 	 * When sorted merge is active, k-way merge the per-task stores into
 	 * the final tuplestore. This produces globally sorted output that the
 	 * existing ReturnTupleFromTuplestore() path can read unchanged.
+	 *
+	 * When streaming sorted merge is enabled, create an adapter instead
+	 * that delivers tuples one at a time without a final tuplestore.
 	 */
 	if (execution->useSortedMerge && execution->perTaskStoreCount > 0)
 	{
-		scanState->tuplestorestate =
-			tuplestore_begin_heap(randomAccess, interTransactions, work_mem);
-
-		MergePerTaskStoresIntoFinalStore(scanState->tuplestorestate,
-										 execution->perTaskStores,
-										 execution->perTaskStoreCount,
-										 distributedPlan->sortedMergeKeys,
-										 distributedPlan->sortedMergeKeyCount,
-										 tupleDescriptor);
-
-		/* free per-task stores — they are no longer needed */
-		for (int i = 0; i < execution->perTaskStoreCount; i++)
+		if (EnableStreamingSortedMerge)
+		{
+			/*
+			 * Streaming mode: create an adapter that delivers tuples one
+			 * at a time from the per-task stores via a binary heap. The
+			 * adapter takes ownership of the per-task stores.
+			 */
+			scanState->mergeAdapter = CreateSortedMergeAdapter(
+				execution->perTaskStores,
+				execution->perTaskStoreCount,
+				distributedPlan->sortedMergeKeys,
+				distributedPlan->sortedMergeKeyCount,
+				tupleDescriptor,
+				true);
+		}
+		else
 		{
-			tuplestore_end(execution->perTaskStores[i]);
+			/* Eager mode (default): merge all tuples into a final tuplestore */
+			scanState->tuplestorestate =
+				tuplestore_begin_heap(randomAccess, interTransactions, work_mem);
+
+			MergePerTaskStoresIntoFinalStore(scanState->tuplestorestate,
+											 execution->perTaskStores,
+											 execution->perTaskStoreCount,
+											 distributedPlan->sortedMergeKeys,
+											 distributedPlan->sortedMergeKeyCount,
+											 tupleDescriptor);
+
+			/* free per-task stores — they are no longer needed */
+			for (int i = 0; i < execution->perTaskStoreCount; i++)
+			{
+				tuplestore_end(execution->perTaskStores[i]);
+			}
 		}
 	}
 
diff --git a/src/backend/distributed/executor/citus_custom_scan.c b/src/backend/distributed/executor/citus_custom_scan.c
index db7e4f725ff..4b1c4701d08 100644
--- a/src/backend/distributed/executor/citus_custom_scan.c
+++ b/src/backend/distributed/executor/citus_custom_scan.c
@@ -46,6 +46,7 @@
 #include "distributed/multi_router_planner.h"
 #include "distributed/multi_server_executor.h"
 #include "distributed/shard_utils.h"
+#include "distributed/sorted_merge.h"
 #include "distributed/stats/query_stats.h"
 #include "distributed/stats/stat_counters.h"
 #include "distributed/subplan_execution.h"
@@ -835,6 +836,12 @@ CitusEndScan(CustomScanState *node)
 		CitusQueryStatsExecutorsEntry(queryId, executorType, partitionKeyString);
 	}
 
+	if (scanState->mergeAdapter)
+	{
+		FreeSortedMergeAdapter(scanState->mergeAdapter);
+		scanState->mergeAdapter = NULL;
+	}
+
 	if (scanState->tuplestorestate)
 	{
 		tuplestore_end(scanState->tuplestorestate);
@@ -857,7 +864,12 @@ CitusReScan(CustomScanState *node)
 	ExecScanReScan(&node->ss);
 
 	CitusScanState *scanState = (CitusScanState *) node;
-	if (scanState->tuplestorestate)
+
+	if (scanState->mergeAdapter)
+	{
+		SortedMergeAdapterRescan(scanState->mergeAdapter);
+	}
+	else if (scanState->tuplestorestate)
 	{
 		tuplestore_rescan(scanState->tuplestorestate);
 	}
diff --git a/src/backend/distributed/executor/multi_executor.c b/src/backend/distributed/executor/multi_executor.c
index 91479019df1..8f2774af057 100644
--- a/src/backend/distributed/executor/multi_executor.c
+++ b/src/backend/distributed/executor/multi_executor.c
@@ -50,6 +50,7 @@
 #include "distributed/multi_server_executor.h"
 #include "distributed/relation_access_tracking.h"
 #include "distributed/resource_lock.h"
+#include "distributed/sorted_merge.h"
 #include "distributed/transaction_management.h"
 #include "distributed/version_compat.h"
 #include "distributed/worker_protocol.h"
@@ -346,21 +347,48 @@ CitusCustomScanStateWalker(PlanState *planState, List **citusCustomScanStates)
 
 
 /*
- * ReturnTupleFromTuplestore reads the next tuple from the tuple store of the
- * given Citus scan node and returns it. It returns null if all tuples are read
- * from the tuple store.
+ * FetchNextScanTuple loads the next tuple into the scan slot.
+ * Returns true if a tuple was loaded, false if exhausted.
+ *
+ * When a merge adapter is active, it streams from the adapter.
+ * Otherwise, it reads from the tuplestore in the given direction.
  */
-TupleTableSlot *
-ReturnTupleFromTuplestore(CitusScanState *scanState)
+static inline bool
+FetchNextScanTuple(CitusScanState *scanState, bool forward, TupleTableSlot *slot)
 {
-	Tuplestorestate *tupleStore = scanState->tuplestorestate;
-	bool forwardScanDirection = true;
+	if (scanState->mergeAdapter != NULL)
+	{
+		/*
+		 * Adapter is forward-only. Backward scan should never reach here
+		 * because the planner removes CUSTOMPATH_SUPPORT_BACKWARD_SCAN
+		 * when sorted merge is active, causing PostgreSQL to insert a
+		 * Material node above us for scrollable cursors.
+		 */
+		Assert(forward);
+		return SortedMergeAdapterNext(scanState->mergeAdapter, slot);
+	}
 
+	Tuplestorestate *tupleStore = scanState->tuplestorestate;
 	if (tupleStore == NULL)
 	{
-		return NULL;
+		ExecClearTuple(slot);
+		return false;
 	}
 
+	return tuplestore_gettupleslot(tupleStore, forward, false, slot);
+}
+
+
+/*
+ * ReturnTupleFromTuplestore reads the next tuple from the tuple store (or
+ * streaming merge adapter) of the given Citus scan node and returns it.
+ * It returns null if all tuples are read.
+ */
+TupleTableSlot *
+ReturnTupleFromTuplestore(CitusScanState *scanState)
+{
+	bool forwardScanDirection = true;
+
 	EState *executorState = ScanStateGetExecutorState(scanState);
 	ScanDirection scanDirection = executorState->es_direction;
 	Assert(ScanDirectionIsValid(scanDirection));
@@ -376,9 +404,9 @@ ReturnTupleFromTuplestore(CitusScanState *scanState)
 
 	if (!qual && !projInfo)
 	{
-		/* no quals, nor projections return directly from the tuple store. */
+		/* no quals, nor projections return directly from the tuple source. */
 		TupleTableSlot *slot = scanState->customScanState.ss.ss_ScanTupleSlot;
-		tuplestore_gettupleslot(tupleStore, forwardScanDirection, false, slot);
+		FetchNextScanTuple(scanState, forwardScanDirection, slot);
 		return slot;
 	}
 
@@ -397,12 +425,10 @@ ReturnTupleFromTuplestore(CitusScanState *scanState)
 		ResetExprContext(econtext);
 
 		TupleTableSlot *slot = scanState->customScanState.ss.ss_ScanTupleSlot;
-		tuplestore_gettupleslot(tupleStore, forwardScanDirection, false, slot);
-
-		if (TupIsNull(slot))
+		if (!FetchNextScanTuple(scanState, forwardScanDirection, slot))
 		{
 			/*
-			 * When the tuple is null we have reached the end of the tuplestore. We will
+			 * When the tuple is null we have reached the end of the source. We will
 			 * return a null tuple, however, depending on the existence of a projection we
 			 * need to either return the scan tuple or the projected tuple.
 			 */
diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c
index d80216b3682..5961cca91a5 100644
--- a/src/backend/distributed/planner/distributed_planner.c
+++ b/src/backend/distributed/planner/distributed_planner.c
@@ -1499,7 +1499,18 @@ FinalizePlan(PlannedStmt *localPlan, DistributedPlan *distributedPlan)
 	customScan->custom_private = list_make1(distributedPlanData);
 
 	/* necessary to avoid extra Result node in PG15 */
-	customScan->flags = CUSTOMPATH_SUPPORT_BACKWARD_SCAN | CUSTOMPATH_SUPPORT_PROJECTION;
+	int customFlags = CUSTOMPATH_SUPPORT_PROJECTION;
+	if (!(distributedPlan->useSortedMerge && EnableStreamingSortedMerge))
+	{
+		/*
+		 * Advertise backward-scan support unless both sorted merge and
+		 * the streaming adapter are active. When streaming, the adapter
+		 * is forward-only; PostgreSQL's planner will insert a Material
+		 * node above us for scrollable cursors.
+		 */
+		customFlags |= CUSTOMPATH_SUPPORT_BACKWARD_SCAN;
+	}
+	customScan->flags = customFlags;
 
 	/*
 	 * Fast path queries cannot have any subplans by definition, so skip
diff --git a/src/test/regress/expected/multi_orderby_pushdown_streaming.out b/src/test/regress/expected/multi_orderby_pushdown_streaming.out
new file mode 100644
index 00000000000..45d3522aed0
--- /dev/null
+++ b/src/test/regress/expected/multi_orderby_pushdown_streaming.out
@@ -0,0 +1,1888 @@
+--
+-- MULTI_SORTED_MERGE_STREAMING
+--
+-- Runs the same test cases as multi_orderby_pushdown.sql but with the
+-- streaming sorted merge adapter enabled via the GUC. This validates
+-- that the streaming code path produces identical results to the eager
+-- merge path.
+--
+SET citus.enable_streaming_sorted_merge TO on;
+\i sql/multi_orderby_pushdown.sql
+--
+-- MULTI_SORTED_MERGE
+--
+-- Tests for the citus.enable_sorted_merge GUC and the sorted merge
+-- planner eligibility logic. Verifies that enabling the GUC does not
+-- introduce regressions for any query pattern.
+--
+SET citus.next_shard_id TO 960000;
+-- =================================================================
+-- Setup: create test tables
+-- =================================================================
+CREATE TABLE sorted_merge_test (
+    id int,
+    val text,
+    num numeric,
+    ts timestamptz DEFAULT now()
+);
+SELECT create_distributed_table('sorted_merge_test', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+-- Insert 100 rows + NULLs + duplicates
+INSERT INTO sorted_merge_test (id, val, num)
+SELECT i, 'val_' || i, (i * 1.5)::numeric
+FROM generate_series(1, 100) i;
+INSERT INTO sorted_merge_test (id, val, num) VALUES (101, NULL, NULL);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (102, NULL, NULL);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (200, 'dup_a', 10.5);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (201, 'dup_b', 10.5);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (202, 'dup_c', 10.5);
+-- Second table for join tests
+CREATE TABLE sorted_merge_events (
+    id int,
+    event_type text,
+    event_val int
+);
+SELECT create_distributed_table('sorted_merge_events', 'id');
+ create_distributed_table
+---------------------------------------------------------------------
+
+(1 row)
+
+INSERT INTO sorted_merge_events
+SELECT i % 50 + 1, CASE WHEN i % 3 = 0 THEN 'click' WHEN i % 3 = 1 THEN 'view' ELSE 'buy' END, i
+FROM generate_series(1, 200) i;
+-- =================================================================
+-- 1. GUC basics
+-- =================================================================
+SHOW citus.enable_sorted_merge;
+ citus.enable_sorted_merge
+---------------------------------------------------------------------
+ off
+(1 row)
+
+SET citus.enable_sorted_merge TO on;
+SHOW citus.enable_sorted_merge;
+ citus.enable_sorted_merge
+---------------------------------------------------------------------
+ on
+(1 row)
+
+SET citus.enable_sorted_merge TO off;
+-- =================================================================
+-- Category A: Eligibility — sort IS pushed to workers
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- A1: ORDER BY distribution column
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val FROM sorted_merge_test ORDER BY id;
+                                                 QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: 4
+   Tuple data received from nodes: 1027 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id
+         Tuple data received from node: 255 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, val
+               Sort Key: sorted_merge_test.id
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, val
+(15 rows)
+
+-- A2: ORDER BY DESC
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id FROM sorted_merge_test ORDER BY id DESC;
+                                                 QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id
+   Task Count: 4
+   Tuple data received from nodes: 420 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id DESC
+         Tuple data received from node: 104 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id
+               Sort Key: sorted_merge_test.id DESC
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id
+(15 rows)
+
+-- A3: ORDER BY DESC NULLS LAST
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, num FROM sorted_merge_test ORDER BY num DESC NULLS LAST;
+                                                          QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.num
+   Task Count: 4
+   Tuple data received from nodes: 1556 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY num DESC NULLS LAST
+         Tuple data received from node: 392 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, num
+               Sort Key: sorted_merge_test.num DESC NULLS LAST
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, num
+(15 rows)
+
+-- A4: ORDER BY non-distribution column
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val FROM sorted_merge_test ORDER BY val;
+                                                  QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: 4
+   Tuple data received from nodes: 1027 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY val
+         Tuple data received from node: 255 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, val
+               Sort Key: sorted_merge_test.val
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, val
+(15 rows)
+
+-- A5: Multi-column ORDER BY
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val FROM sorted_merge_test ORDER BY id, val;
+                                                    QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: 4
+   Tuple data received from nodes: 1027 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id, val
+         Tuple data received from node: 255 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, val
+               Sort Key: sorted_merge_test.id, sorted_merge_test.val
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, val
+(15 rows)
+
+-- A6: Mixed directions
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val, num FROM sorted_merge_test ORDER BY id ASC, num DESC;
+                                                         QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.val, remote_scan.num
+   Task Count: 4
+   Tuple data received from nodes: 2163 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id, num DESC
+         Tuple data received from node: 543 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, val, num
+               Sort Key: sorted_merge_test.id, sorted_merge_test.num DESC
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, val, num
+(15 rows)
+
+-- A7: GROUP BY dist_col ORDER BY dist_col
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id;
+                                                              QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.count
+   Task Count: 4
+   Tuple data received from nodes: 1260 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id
+         Tuple data received from node: 312 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, (count(*))
+               Sort Key: sorted_merge_test.id
+               Sort Method: quicksort  Memory: 25kB
+               ->  HashAggregate (actual rows=26 loops=1)
+                     Output: id, count(*)
+                     Group Key: sorted_merge_test.id
+                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                           Output: id, val, num, ts
+(19 rows)
+
+-- A8: WHERE clause + ORDER BY
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val FROM sorted_merge_test WHERE num > 50 ORDER BY id;
+                                                                    QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=67 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: 4
+   Tuple data received from nodes: 671 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE (num OPERATOR(pg_catalog.>) '50'::numeric) ORDER BY id
+         Tuple data received from node: 130 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=13 loops=1)
+               Output: id, val
+               Sort Key: sorted_merge_test.id
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=13 loops=1)
+                     Output: id, val
+                     Filter: (sorted_merge_test.num > '50'::numeric)
+                     Rows Removed by Filter: 13
+(17 rows)
+
+-- A9: Expression in ORDER BY (non-aggregate)
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, num FROM sorted_merge_test ORDER BY id + 1;
+                                                                                        QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.num, remote_scan.worker_column_3
+   Task Count: 4
+   Tuple data received from nodes: 1976 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, num, (id OPERATOR(pg_catalog.+) 1) AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (id OPERATOR(pg_catalog.+) 1)
+         Tuple data received from node: 496 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, num, ((id + 1))
+               Sort Key: ((sorted_merge_test.id + 1))
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, num, (id + 1)
+(15 rows)
+
+-- A10: ORDER BY with LIMIT (existing pushdown, verify no regression)
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
+                                                           QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=5 loops=1)
+   Output: remote_scan.id
+   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+         Output: remote_scan.id
+         Task Count: 4
+         Tuple data received from nodes: 80 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
+               Tuple data received from node: 20 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit (actual rows=5 loops=1)
+                     Output: id
+                     ->  Sort (actual rows=5 loops=1)
+                           Output: id
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: top-N heapsort  Memory: 25kB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 Output: id
+(19 rows)
+
+-- =================================================================
+-- Category B: Ineligibility — sort NOT pushed for merge
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- B1: ORDER BY count(*)
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*);
+                                                           QUERY PLAN
+---------------------------------------------------------------------
+ Sort (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.count
+   Sort Key: remote_scan.count
+   Sort Method: quicksort  Memory: 28kB
+   ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+         Output: remote_scan.id, remote_scan.count
+         Task Count: 4
+         Tuple data received from nodes: 1260 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id
+               Tuple data received from node: 312 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  HashAggregate (actual rows=26 loops=1)
+                     Output: id, count(*)
+                     Group Key: sorted_merge_test.id
+                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                           Output: id, val, num, ts
+(19 rows)
+
+-- B2: ORDER BY avg(col)
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, avg(num) FROM sorted_merge_test GROUP BY id ORDER BY avg(num);
+                                                          QUERY PLAN
+---------------------------------------------------------------------
+ Sort (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.avg
+   Sort Key: remote_scan.avg
+   Sort Method: quicksort  Memory: 28kB
+   ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+         Output: remote_scan.id, remote_scan.avg
+         Task Count: 4
+         Tuple data received from nodes: 1556 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT id, avg(num) AS avg FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id
+               Tuple data received from node: 392 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  HashAggregate (actual rows=26 loops=1)
+                     Output: id, avg(num)
+                     Group Key: sorted_merge_test.id
+                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                           Output: id, val, num, ts
+(19 rows)
+
+-- B3: GROUP BY non-dist col, ORDER BY non-dist col
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT val, count(*) FROM sorted_merge_test GROUP BY val ORDER BY val;
+                                                               QUERY PLAN
+---------------------------------------------------------------------
+ Sort (actual rows=104 loops=1)
+   Output: remote_scan.val, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
+   Sort Key: remote_scan.val
+   Sort Method: quicksort  Memory: 28kB
+   ->  HashAggregate (actual rows=104 loops=1)
+         Output: remote_scan.val, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
+         Group Key: remote_scan.val
+         ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+               Output: remote_scan.val, remote_scan.count
+               Task Count: 4
+               Tuple data received from nodes: 1447 bytes
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT val, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY val
+                     Tuple data received from node: 359 bytes
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  HashAggregate (actual rows=26 loops=1)
+                           Output: val, count(*)
+                           Group Key: sorted_merge_test.val
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 Output: id, val, num, ts
+(23 rows)
+
+-- B4: GROUP BY non-dist col, ORDER BY aggregate
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT val, count(*) FROM sorted_merge_test GROUP BY val ORDER BY count(*);
+                                                               QUERY PLAN
+---------------------------------------------------------------------
+ Sort (actual rows=104 loops=1)
+   Output: remote_scan.val, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
+   Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
+   Sort Method: quicksort  Memory: 28kB
+   ->  HashAggregate (actual rows=104 loops=1)
+         Output: remote_scan.val, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
+         Group Key: remote_scan.val
+         ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+               Output: remote_scan.val, remote_scan.count
+               Task Count: 4
+               Tuple data received from nodes: 1447 bytes
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT val, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY val
+                     Tuple data received from node: 359 bytes
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  HashAggregate (actual rows=26 loops=1)
+                           Output: val, count(*)
+                           Group Key: sorted_merge_test.val
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 Output: id, val, num, ts
+(23 rows)
+
+-- =================================================================
+-- Category C: Correctness — results match GUC off vs on
+-- =================================================================
+-- C1: Simple ORDER BY
+SET citus.enable_sorted_merge TO off;
+SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+-- C2: ORDER BY DESC
+SET citus.enable_sorted_merge TO off;
+SELECT id FROM sorted_merge_test ORDER BY id DESC LIMIT 5;
+ id
+---------------------------------------------------------------------
+ 202
+ 201
+ 200
+ 102
+ 101
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id FROM sorted_merge_test ORDER BY id DESC LIMIT 5;
+ id
+---------------------------------------------------------------------
+ 202
+ 201
+ 200
+ 102
+ 101
+(5 rows)
+
+-- C3: Multi-column ORDER BY
+SET citus.enable_sorted_merge TO off;
+SELECT id, num FROM sorted_merge_test ORDER BY num, id LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, num FROM sorted_merge_test ORDER BY num, id LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
+
+-- C4: ORDER BY non-distribution column
+SET citus.enable_sorted_merge TO off;
+SELECT id, val FROM sorted_merge_test WHERE val IS NOT NULL ORDER BY val LIMIT 5;
+ id  |  val
+---------------------------------------------------------------------
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+   1 | val_1
+  10 | val_10
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test WHERE val IS NOT NULL ORDER BY val LIMIT 5;
+ id  |  val
+---------------------------------------------------------------------
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+   1 | val_1
+  10 | val_10
+(5 rows)
+
+-- C5: GROUP BY dist_col ORDER BY dist_col
+SET citus.enable_sorted_merge TO off;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+ id | count
+---------------------------------------------------------------------
+  1 |     1
+  2 |     1
+  3 |     1
+  4 |     1
+  5 |     1
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+ id | count
+---------------------------------------------------------------------
+  1 |     1
+  2 |     1
+  3 |     1
+  4 |     1
+  5 |     1
+(5 rows)
+
+-- C6: Mixed directions
+SET citus.enable_sorted_merge TO off;
+SELECT id, num FROM sorted_merge_test WHERE num IS NOT NULL ORDER BY id ASC, num DESC LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, num FROM sorted_merge_test WHERE num IS NOT NULL ORDER BY id ASC, num DESC LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
+
+-- C7: WHERE + ORDER BY
+SET citus.enable_sorted_merge TO off;
+SELECT id, val FROM sorted_merge_test WHERE num > 100 ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+ 67 | val_67
+ 68 | val_68
+ 69 | val_69
+ 70 | val_70
+ 71 | val_71
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test WHERE num > 100 ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+ 67 | val_67
+ 68 | val_68
+ 69 | val_69
+ 70 | val_70
+ 71 | val_71
+(5 rows)
+
+-- C8: Aggregates in SELECT, ORDER BY on dist_col (GROUP BY dist_col)
+SET citus.enable_sorted_merge TO off;
+SELECT id, count(*), sum(num), avg(num) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+ id | count | sum |          avg
+---------------------------------------------------------------------
+  1 |     1 | 1.5 | 1.50000000000000000000
+  2 |     1 | 3.0 |     3.0000000000000000
+  3 |     1 | 4.5 |     4.5000000000000000
+  4 |     1 | 6.0 |     6.0000000000000000
+  5 |     1 | 7.5 |     7.5000000000000000
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, count(*), sum(num), avg(num) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+ id | count | sum |          avg
+---------------------------------------------------------------------
+  1 |     1 | 1.5 | 1.50000000000000000000
+  2 |     1 | 3.0 |     3.0000000000000000
+  3 |     1 | 4.5 |     4.5000000000000000
+  4 |     1 | 6.0 |     6.0000000000000000
+  5 |     1 | 7.5 |     7.5000000000000000
+(5 rows)
+
+-- =================================================================
+-- Category D: Complex queries — regression guards
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- D1: Subquery in FROM with ORDER BY
+SELECT * FROM (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 5
+) sub ORDER BY id;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- D2: CTE with ORDER BY
+WITH top5 AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 5
+)
+SELECT * FROM top5 ORDER BY id;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- D3: Co-located JOIN + ORDER BY
+SELECT t.id, t.val, e.event_type
+FROM sorted_merge_test t
+JOIN sorted_merge_events e ON t.id = e.id
+WHERE t.id <= 5
+ORDER BY t.id, e.event_type
+LIMIT 10;
+ id |  val  | event_type
+---------------------------------------------------------------------
+  1 | val_1 | buy
+  1 | val_1 | buy
+  1 | val_1 | click
+  1 | val_1 | view
+  2 | val_2 | buy
+  2 | val_2 | click
+  2 | val_2 | view
+  2 | val_2 | view
+  3 | val_3 | buy
+  3 | val_3 | buy
+(10 rows)
+
+-- D4: UNION ALL + ORDER BY
+SELECT id, val FROM sorted_merge_test WHERE id <= 3
+UNION ALL
+SELECT id, val FROM sorted_merge_test WHERE id BETWEEN 98 AND 100
+ORDER BY id;
+ id  |   val
+---------------------------------------------------------------------
+   1 | val_1
+   2 | val_2
+   3 | val_3
+  98 | val_98
+  99 | val_99
+ 100 | val_100
+(6 rows)
+
+-- D5: DISTINCT + ORDER BY
+SELECT DISTINCT id FROM sorted_merge_test WHERE id <= 10 ORDER BY id;
+ id
+---------------------------------------------------------------------
+  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+(10 rows)
+
+-- D6: DISTINCT ON + ORDER BY
+SELECT DISTINCT ON (id) id, val, num
+FROM sorted_merge_test
+WHERE id <= 5
+ORDER BY id, num DESC;
+ id |  val  | num
+---------------------------------------------------------------------
+  1 | val_1 | 1.5
+  2 | val_2 | 3.0
+  3 | val_3 | 4.5
+  4 | val_4 | 6.0
+  5 | val_5 | 7.5
+(5 rows)
+
+-- D7: EXISTS subquery + ORDER BY
+SELECT id, val FROM sorted_merge_test t
+WHERE EXISTS (SELECT 1 FROM sorted_merge_events e WHERE e.id = t.id)
+ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- D8: IN subquery + ORDER BY
+SELECT id, val FROM sorted_merge_test
+WHERE id IN (SELECT id FROM sorted_merge_events WHERE event_type = 'click')
+ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- D9: Multiple aggregates, GROUP BY dist_col, ORDER BY dist_col
+SELECT id, count(*), sum(num), avg(num), min(val), max(val)
+FROM sorted_merge_test
+GROUP BY id
+ORDER BY id
+LIMIT 5;
+ id | count | sum |          avg           |  min  |  max
+---------------------------------------------------------------------
+  1 |     1 | 1.5 | 1.50000000000000000000 | val_1 | val_1
+  2 |     1 | 3.0 |     3.0000000000000000 | val_2 | val_2
+  3 |     1 | 4.5 |     4.5000000000000000 | val_3 | val_3
+  4 |     1 | 6.0 |     6.0000000000000000 | val_4 | val_4
+  5 |     1 | 7.5 |     7.5000000000000000 | val_5 | val_5
+(5 rows)
+
+-- D10: CASE expression in SELECT + ORDER BY
+SELECT id,
+       CASE WHEN num > 75 THEN 'high' WHEN num > 25 THEN 'mid' ELSE 'low' END as bucket
+FROM sorted_merge_test
+WHERE num IS NOT NULL
+ORDER BY id
+LIMIT 10;
+ id | bucket
+---------------------------------------------------------------------
+  1 | low
+  2 | low
+  3 | low
+  4 | low
+  5 | low
+  6 | low
+  7 | low
+  8 | low
+  9 | low
+ 10 | low
+(10 rows)
+
+-- D11: NULL values ordering
+SELECT id, num FROM sorted_merge_test ORDER BY num NULLS FIRST, id LIMIT 5;
+ id  | num
+---------------------------------------------------------------------
+ 101 |
+ 102 |
+   1 | 1.5
+   2 | 3.0
+   3 | 4.5
+(5 rows)
+
+SELECT id, num FROM sorted_merge_test ORDER BY num NULLS LAST, id LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
+
+SELECT id, num FROM sorted_merge_test ORDER BY num DESC NULLS FIRST, id LIMIT 5;
+ id  |  num
+---------------------------------------------------------------------
+ 101 |
+ 102 |
+ 100 | 150.0
+  99 | 148.5
+  98 | 147.0
+(5 rows)
+
+SELECT id, num FROM sorted_merge_test ORDER BY num DESC NULLS LAST, id DESC LIMIT 5;
+ id  |  num
+---------------------------------------------------------------------
+ 100 | 150.0
+  99 | 148.5
+  98 | 147.0
+  97 | 145.5
+  96 | 144.0
+(5 rows)
+
+-- D12: Large OFFSET
+SELECT id FROM sorted_merge_test ORDER BY id OFFSET 100 LIMIT 5;
+ id
+---------------------------------------------------------------------
+ 101
+ 102
+ 200
+ 201
+ 202
+(5 rows)
+
+-- D13: ORDER BY ordinal position
+SELECT id, val FROM sorted_merge_test ORDER BY 2, 1 LIMIT 5;
+ id  |  val
+---------------------------------------------------------------------
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+   1 | val_1
+  10 | val_10
+(5 rows)
+
+-- =================================================================
+-- Category E: Edge cases
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- E1: Empty result set
+SELECT id FROM sorted_merge_test WHERE id < 0 ORDER BY id;
+ id
+---------------------------------------------------------------------
+(0 rows)
+
+-- E2: Single row (may go through router planner)
+SELECT id, val FROM sorted_merge_test WHERE id = 42 ORDER BY id;
+ id |  val
+---------------------------------------------------------------------
+ 42 | val_42
+(1 row)
+
+-- E3: All rows with same sort value
+SELECT id, num FROM sorted_merge_test WHERE num = 10.5 ORDER BY num, id;
+ id  | num
+---------------------------------------------------------------------
+   7 | 10.5
+ 200 | 10.5
+ 201 | 10.5
+ 202 | 10.5
+(4 rows)
+
+-- E4: Wide sort key (4 columns)
+SELECT id, val, num FROM sorted_merge_test
+WHERE id <= 5
+ORDER BY num, val, id
+LIMIT 5;
+ id |  val  | num
+---------------------------------------------------------------------
+  1 | val_1 | 1.5
+  2 | val_2 | 3.0
+  3 | val_3 | 4.5
+  4 | val_4 | 6.0
+  5 | val_5 | 7.5
+(5 rows)
+
+-- E5: Zero-task defensive path
+-- CreatePerTaskDispatchDest handles taskCount=0 gracefully (returns a no-op
+-- destination). This cannot be triggered via normal SQL because distributed
+-- tables always have at least one shard. The closest we can test is an
+-- empty-result query through the sorted merge path to verify no crash.
+SELECT id FROM sorted_merge_test WHERE false ORDER BY id;
+ id
+---------------------------------------------------------------------
+(0 rows)
+
+-- =================================================================
+-- Category F: Existing LIMIT pushdown stability
+-- =================================================================
+-- F1: Simple LIMIT + ORDER BY: plan unchanged between GUC off and on
+SET citus.enable_sorted_merge TO off;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
+                                                              QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=5 loops=1)
+   Output: remote_scan.id
+   ->  Sort (actual rows=5 loops=1)
+         Output: remote_scan.id
+         Sort Key: remote_scan.id
+         Sort Method: top-N heapsort  Memory: 25kB
+         ->  Custom Scan (Citus Adaptive) (actual rows=20 loops=1)
+               Output: remote_scan.id
+               Task Count: 4
+               Tuple data received from nodes: 80 bytes
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
+                     Tuple data received from node: 20 bytes
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  Limit (actual rows=5 loops=1)
+                           Output: id
+                           ->  Sort (actual rows=5 loops=1)
+                                 Output: id
+                                 Sort Key: sorted_merge_test.id
+                                 Sort Method: top-N heapsort  Memory: 25kB
+                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                       Output: id
+(23 rows)
+
+SET citus.enable_sorted_merge TO on;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
+                                                           QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=5 loops=1)
+   Output: remote_scan.id
+   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+         Output: remote_scan.id
+         Task Count: 4
+         Tuple data received from nodes: 80 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
+               Tuple data received from node: 20 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit (actual rows=5 loops=1)
+                     Output: id
+                     ->  Sort (actual rows=5 loops=1)
+                           Output: id
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: top-N heapsort  Memory: 25kB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 Output: id
+(19 rows)
+
+-- F2: GROUP BY dist_col + ORDER BY + LIMIT
+SET citus.enable_sorted_merge TO off;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+                                                                             QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.count
+   ->  Sort (actual rows=5 loops=1)
+         Output: remote_scan.id, remote_scan.count
+         Sort Key: remote_scan.id
+         Sort Method: top-N heapsort  Memory: 25kB
+         ->  Custom Scan (Citus Adaptive) (actual rows=20 loops=1)
+               Output: remote_scan.id, remote_scan.count
+               Task Count: 4
+               Tuple data received from nodes: 240 bytes
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id LIMIT '5'::bigint
+                     Tuple data received from node: 60 bytes
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  Limit (actual rows=5 loops=1)
+                           Output: id, (count(*))
+                           ->  Sort (actual rows=5 loops=1)
+                                 Output: id, (count(*))
+                                 Sort Key: sorted_merge_test.id
+                                 Sort Method: top-N heapsort  Memory: 25kB
+                                 ->  HashAggregate (actual rows=26 loops=1)
+                                       Output: id, count(*)
+                                       Group Key: sorted_merge_test.id
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                             Output: id, val, num, ts
+(27 rows)
+
+SET citus.enable_sorted_merge TO on;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+                                                                          QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.count
+   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+         Output: remote_scan.id, remote_scan.count
+         Task Count: 4
+         Tuple data received from nodes: 240 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id LIMIT '5'::bigint
+               Tuple data received from node: 60 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit (actual rows=5 loops=1)
+                     Output: id, (count(*))
+                     ->  Sort (actual rows=5 loops=1)
+                           Output: id, (count(*))
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: top-N heapsort  Memory: 25kB
+                           ->  HashAggregate (actual rows=26 loops=1)
+                                 Output: id, count(*)
+                                 Group Key: sorted_merge_test.id
+                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                       Output: id, val, num, ts
+(23 rows)
+
+-- F3: ORDER BY aggregate + LIMIT (not eligible for merge)
+SET citus.enable_sorted_merge TO off;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 5;
+ id | count
+---------------------------------------------------------------------
+  1 |     1
+  2 |     1
+  3 |     1
+  4 |     1
+  5 |     1
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 5;
+ id | count
+---------------------------------------------------------------------
+  1 |     1
+  2 |     1
+  3 |     1
+  4 |     1
+  5 |     1
+(5 rows)
+
+-- =================================================================
+-- Category G: Phase 4 — Sort elision and advanced scenarios
+-- =================================================================
+-- G1: Sort elision verification — coordinator Sort node absent
+SET citus.enable_sorted_merge TO off;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val FROM sorted_merge_test ORDER BY id;
+                                                 QUERY PLAN
+---------------------------------------------------------------------
+ Sort (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   Sort Key: remote_scan.id
+   Sort Method: quicksort  Memory: 28kB
+   ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+         Output: remote_scan.id, remote_scan.val
+         Task Count: 4
+         Tuple data received from nodes: 1027 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true
+               Tuple data received from node: 255 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, val
+(15 rows)
+
+SET citus.enable_sorted_merge TO on;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val FROM sorted_merge_test ORDER BY id;
+                                                 QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: 4
+   Tuple data received from nodes: 1027 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id
+         Tuple data received from node: 255 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, val
+               Sort Key: sorted_merge_test.id
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, val
+(15 rows)
+
+-- G2a: PREPARE with merge ON, EXECUTE after turning OFF
+-- Plan-time decision is baked in — cached plan must still merge correctly
+SET citus.enable_sorted_merge TO on;
+PREPARE merge_on_stmt AS SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+SET citus.enable_sorted_merge TO off;
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+DEALLOCATE merge_on_stmt;
+-- G2b: PREPARE with merge OFF, EXECUTE after turning ON
+-- Cached plan has Sort node — must still return sorted results
+SET citus.enable_sorted_merge TO off;
+PREPARE merge_off_stmt AS SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+SET citus.enable_sorted_merge TO on;
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+DEALLOCATE merge_off_stmt;
+-- G3: Cursor with backward scan
+SET citus.enable_sorted_merge TO on;
+BEGIN;
+DECLARE sorted_cursor CURSOR FOR SELECT id FROM sorted_merge_test ORDER BY id;
+FETCH 3 FROM sorted_cursor;
+ id
+---------------------------------------------------------------------
+  1
+  2
+  3
+(3 rows)
+
+FETCH BACKWARD 1 FROM sorted_cursor;
+psql:sql/multi_orderby_pushdown.sql:355: ERROR:  cursor can only scan forward
+HINT:  Declare it with SCROLL option to enable backward scan.
+FETCH 2 FROM sorted_cursor;
+psql:sql/multi_orderby_pushdown.sql:356: ERROR:  current transaction is aborted, commands ignored until end of transaction block
+CLOSE sorted_cursor;
+psql:sql/multi_orderby_pushdown.sql:357: ERROR:  current transaction is aborted, commands ignored until end of transaction block
+COMMIT;
+-- G4: EXPLAIN ANALYZE (sorted merge skipped for EXPLAIN ANALYZE)
+SET citus.enable_sorted_merge TO on;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
+                                                           QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=5 loops=1)
+   Output: remote_scan.id
+   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+         Output: remote_scan.id
+         Task Count: 4
+         Tuple data received from nodes: 80 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
+               Tuple data received from node: 20 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit (actual rows=5 loops=1)
+                     Output: id
+                     ->  Sort (actual rows=5 loops=1)
+                           Output: id
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: top-N heapsort  Memory: 25kB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 Output: id
+(19 rows)
+
+-- G5: ORDER BY aggregate + LIMIT — crash regression test
+-- Previously caused SIGSEGV when sorted merge was enabled because
+-- aggregate ORDER BY was erroneously tagged as merge-eligible.
+SET citus.enable_sorted_merge TO on;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 3;
+ id | count
+---------------------------------------------------------------------
+  1 |     1
+  2 |     1
+  3 |     1
+(3 rows)
+
+-- G6: Small work_mem with many tasks (32 shards)
+SET citus.enable_sorted_merge TO on;
+SET work_mem TO '64kB';
+SELECT id FROM sorted_merge_test ORDER BY id LIMIT 10;
+ id
+---------------------------------------------------------------------
+  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+(10 rows)
+
+RESET work_mem;
+-- G7: max_intermediate_result_size with CTE subplan
+SET citus.enable_sorted_merge TO on;
+SET citus.max_intermediate_result_size TO '4kB';
+WITH cte AS (SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 50)
+SELECT * FROM cte ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+RESET citus.max_intermediate_result_size;
+-- =================================================================
+-- Category H: Subplan + Sorted Merge interactions
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- H1: CTE subplan with simple ORDER BY — eligible for sorted merge
+-- The CTE becomes a subplan; its DistributedPlan may have useSortedMerge=true
+WITH ordered_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id
+)
+SELECT * FROM ordered_cte ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- H2: Multiple CTEs — one eligible (ORDER BY col), one ineligible (ORDER BY agg)
+WITH eligible_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
+),
+ineligible_cte AS (
+    SELECT id, count(*) as cnt FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 15
+)
+SELECT e.id, e.val, i.cnt
+FROM eligible_cte e JOIN ineligible_cte i ON e.id = i.id
+ORDER BY e.id;
+ id |  val   | cnt
+---------------------------------------------------------------------
+  1 | val_1  |   1
+  2 | val_2  |   1
+  3 | val_3  |   1
+  4 | val_4  |   1
+  5 | val_5  |   1
+  6 | val_6  |   1
+  7 | val_7  |   1
+  8 | val_8  |   1
+  9 | val_9  |   1
+ 10 | val_10 |   1
+ 11 | val_11 |   1
+ 12 | val_12 |   1
+ 13 | val_13 |   1
+ 14 | val_14 |   1
+ 15 | val_15 |   1
+(15 rows)
+
+-- H3: CTE subplan feeding outer ORDER BY — both levels may merge independently
+WITH top_ids AS (
+    SELECT id FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT t.id, t.val
+FROM sorted_merge_test t
+JOIN top_ids ON t.id = top_ids.id
+ORDER BY t.id
+LIMIT 10;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+-- H4: Subquery in WHERE with ORDER BY + LIMIT — becomes subplan with merge
+SELECT id, val FROM sorted_merge_test
+WHERE id IN (
+    SELECT id FROM sorted_merge_events ORDER BY id LIMIT 10
+)
+ORDER BY id
+LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+(3 rows)
+
+-- H5: CTE subplan with max_intermediate_result_size enforcement
+-- Tests that EnsureIntermediateSizeLimitNotExceeded works through per-task dispatch
+SET citus.max_intermediate_result_size TO '4kB';
+WITH small_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM small_cte ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+RESET citus.max_intermediate_result_size;
+-- H6: Cross-join subplan with non-aggregate ORDER BY (crash regression variant)
+-- Similar pattern to subquery_complex_target_list but without aggregate ORDER BY
+SELECT foo.id, bar.id as bar_id
+FROM
+    (SELECT id FROM sorted_merge_test ORDER BY id LIMIT 3) as foo,
+    (SELECT id FROM sorted_merge_events ORDER BY id LIMIT 3) as bar
+ORDER BY foo.id, bar.id
+LIMIT 5;
+ id | bar_id
+---------------------------------------------------------------------
+  1 |      1
+  1 |      1
+  1 |      1
+  2 |      1
+  2 |      1
+(5 rows)
+
+-- H7: CTE correctness comparison — GUC off vs on must produce identical results
+SET citus.enable_sorted_merge TO off;
+WITH cte AS (
+    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
+ id |  val   | num
+---------------------------------------------------------------------
+  7 | val_7  | 10.5
+  8 | val_8  | 12.0
+  9 | val_9  | 13.5
+ 10 | val_10 | 15.0
+ 11 | val_11 | 16.5
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+WITH cte AS (
+    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
+ id |  val   | num
+---------------------------------------------------------------------
+  7 | val_7  | 10.5
+  8 | val_8  | 12.0
+  9 | val_9  | 13.5
+ 10 | val_10 | 15.0
+ 11 | val_11 | 16.5
+(5 rows)
+
+-- =================================================================
+-- Category H EXPLAIN: Query plans for subplan + sorted merge
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- H1 EXPLAIN
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+WITH ordered_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id
+)
+SELECT * FROM ordered_cte ORDER BY id LIMIT 5;
+                                                                                                                                                                                     QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+         Output: remote_scan.id, remote_scan.val
+         Task Count: 4
+         Tuple data received from nodes: 191 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT ordered_cte.id AS worker_column_1, ordered_cte.val AS worker_column_2 FROM (SELECT sorted_merge_test.id, sorted_merge_test.val FROM public.sorted_merge_test_960000 sorted_merge_test ORDER BY sorted_merge_test.id) ordered_cte) worker_subquery ORDER BY worker_column_1 LIMIT '5'::bigint
+               Tuple data received from node: 47 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit (actual rows=5 loops=1)
+                     Output: sorted_merge_test.id, sorted_merge_test.val
+                     ->  Sort (actual rows=5 loops=1)
+                           Output: sorted_merge_test.id, sorted_merge_test.val
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: top-N heapsort  Memory: 25kB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 Output: sorted_merge_test.id, sorted_merge_test.val
+(19 rows)
+
+-- H2 EXPLAIN
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+WITH eligible_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
+),
+ineligible_cte AS (
+    SELECT id, count(*) as cnt FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 15
+)
+SELECT e.id, e.val, i.cnt
+FROM eligible_cte e JOIN ineligible_cte i ON e.id = i.id
+ORDER BY e.id;
+                                                                                                                                                                                                                                 QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.val, remote_scan.cnt
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: 397 bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=20 loops=1)
+               Output: remote_scan.id, remote_scan.val
+               ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
+                     Output: remote_scan.id, remote_scan.val
+                     Task Count: 4
+                     Tuple data received from nodes: 791 bytes
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
+                           Tuple data received from node: 197 bytes
+                           Node: host=localhost port=xxxxx dbname=regression
+                           ->  Limit (actual rows=20 loops=1)
+                                 Output: id, val
+                                 ->  Sort (actual rows=20 loops=1)
+                                       Output: id, val
+                                       Sort Key: sorted_merge_test.id
+                                       Sort Method: quicksort  Memory: 26kB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                             Output: id, val
+   ->  Distributed Subplan XXX_2
+         Intermediate Data Size: 330 bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=15 loops=1)
+               Output: remote_scan.id, remote_scan.cnt
+               ->  Sort (actual rows=15 loops=1)
+                     Output: remote_scan.id, remote_scan.cnt
+                     Sort Key: remote_scan.cnt DESC, remote_scan.id
+                     ->  Custom Scan (Citus Adaptive) (never executed)
+                           Output: remote_scan.id, remote_scan.cnt
+                           Task Count: 4
+                           Tuple data received from nodes: 720 bytes
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Query: SELECT id, count(*) AS cnt FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY (count(*)) DESC, id LIMIT '15'::bigint
+                                 Tuple data received from node: 180 bytes
+                                 Node: host=localhost port=xxxxx dbname=regression
+                                 ->  Limit (actual rows=15 loops=1)
+                                       Output: id, (count(*))
+                                       ->  Sort (actual rows=15 loops=1)
+                                             Output: id, (count(*))
+                                             Sort Key: (count(*)) DESC, sorted_merge_test.id
+                                             Sort Method: quicksort  Memory: 26kB
+                                             ->  HashAggregate (actual rows=26 loops=1)
+                                                   Output: id, count(*)
+                                                   Group Key: sorted_merge_test.id
+                                                   ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                                         Output: id, val, num, ts
+   Task Count: 1
+   Tuple data received from nodes: 87 bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT e.id, e.val, i.cnt FROM ((SELECT intermediate_result.id, intermediate_result.val FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text)) e JOIN (SELECT intermediate_result.id, intermediate_result.cnt FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer, cnt bigint)) i ON ((e.id OPERATOR(pg_catalog.=) i.id))) ORDER BY e.id
+         Tuple data received from node: 87 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Merge Join (actual rows=5 loops=1)
+               Output: intermediate_result.id, intermediate_result.val, intermediate_result_1.cnt
+               Merge Cond: (intermediate_result.id = intermediate_result_1.id)
+               ->  Sort (actual rows=6 loops=1)
+                     Output: intermediate_result.id, intermediate_result.val
+                     Sort Key: intermediate_result.id
+                     Sort Method: quicksort  Memory: 25kB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=20 loops=1)
+                           Output: intermediate_result.id, intermediate_result.val
+                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+               ->  Sort (actual rows=15 loops=1)
+                     Output: intermediate_result_1.cnt, intermediate_result_1.id
+                     Sort Key: intermediate_result_1.id
+                     Sort Method: quicksort  Memory: 25kB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 (actual rows=15 loops=1)
+                           Output: intermediate_result_1.cnt, intermediate_result_1.id
+                           Function Call: read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format)
+(77 rows)
+
+-- H3 EXPLAIN
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+WITH top_ids AS (
+    SELECT id FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT t.id, t.val
+FROM sorted_merge_test t
+JOIN top_ids ON t.id = top_ids.id
+ORDER BY t.id
+LIMIT 10;
+                                                                                                                                                                                                                     QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=10 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   ->  Custom Scan (Citus Adaptive) (actual rows=10 loops=1)
+         Output: remote_scan.id, remote_scan.val
+         ->  Distributed Subplan XXX_1
+               Intermediate Data Size: 200 bytes
+               Result destination: Send to 2 nodes
+               ->  Limit (actual rows=20 loops=1)
+                     Output: remote_scan.id
+                     ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
+                           Output: remote_scan.id
+                           Task Count: 4
+                           Tuple data received from nodes: 320 bytes
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
+                                 Tuple data received from node: 80 bytes
+                                 Node: host=localhost port=xxxxx dbname=regression
+                                 ->  Limit (actual rows=20 loops=1)
+                                       Output: id
+                                       ->  Sort (actual rows=20 loops=1)
+                                             Output: id
+                                             Sort Key: sorted_merge_test.id
+                                             Sort Method: quicksort  Memory: 25kB
+                                             ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                                   Output: id
+         Task Count: 4
+         Tuple data received from nodes: 97 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT t.id AS worker_column_1, t.val AS worker_column_2 FROM (public.sorted_merge_test_960000 t JOIN (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) top_ids ON ((t.id OPERATOR(pg_catalog.=) top_ids.id)))) worker_subquery ORDER BY worker_column_1 LIMIT '10'::bigint
+               Tuple data received from node: 97 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit (actual rows=10 loops=1)
+                     Output: t.id, t.val
+                     ->  Merge Join (actual rows=10 loops=1)
+                           Output: t.id, t.val
+                           Merge Cond: (intermediate_result.id = t.id)
+                           ->  Sort (actual rows=10 loops=1)
+                                 Output: intermediate_result.id
+                                 Sort Key: intermediate_result.id
+                                 Sort Method: quicksort  Memory: 25kB
+                                 ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=20 loops=1)
+                                       Output: intermediate_result.id
+                                       Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+                           ->  Sort (actual rows=10 loops=1)
+                                 Output: t.id, t.val
+                                 Sort Key: t.id
+                                 Sort Method: quicksort  Memory: 25kB
+                                 ->  Seq Scan on public.sorted_merge_test_960000 t (actual rows=26 loops=1)
+                                       Output: t.id, t.val
+(51 rows)
+
+-- H4 EXPLAIN
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val FROM sorted_merge_test
+WHERE id IN (
+    SELECT id FROM sorted_merge_events ORDER BY id LIMIT 10
+)
+ORDER BY id
+LIMIT 5;
+                                                                                                                                                                                                                                          QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=3 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   ->  Custom Scan (Citus Adaptive) (actual rows=3 loops=1)
+         Output: remote_scan.id, remote_scan.val
+         ->  Distributed Subplan XXX_1
+               Intermediate Data Size: 100 bytes
+               Result destination: Send to 2 nodes
+               ->  Limit (actual rows=10 loops=1)
+                     Output: remote_scan.id
+                     ->  Custom Scan (Citus Adaptive) (actual rows=40 loops=1)
+                           Output: remote_scan.id
+                           Task Count: 4
+                           Tuple data received from nodes: 160 bytes
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Query: SELECT id FROM public.sorted_merge_events_960004 sorted_merge_events WHERE true ORDER BY id LIMIT '10'::bigint
+                                 Tuple data received from node: 40 bytes
+                                 Node: host=localhost port=xxxxx dbname=regression
+                                 ->  Limit (actual rows=10 loops=1)
+                                       Output: id
+                                       ->  Sort (actual rows=10 loops=1)
+                                             Output: id
+                                             Sort Key: sorted_merge_events.id
+                                             Sort Method: top-N heapsort  Memory: 25kB
+                                             ->  Seq Scan on public.sorted_merge_events_960004 sorted_merge_events (actual rows=56 loops=1)
+                                                   Output: id
+         Task Count: 4
+         Tuple data received from nodes: 27 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT sorted_merge_test.id AS worker_column_1, sorted_merge_test.val AS worker_column_2 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE (sorted_merge_test.id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)))) worker_subquery ORDER BY worker_column_1 LIMIT '5'::bigint
+               Tuple data received from node: 27 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit (actual rows=3 loops=1)
+                     Output: sorted_merge_test.id, sorted_merge_test.val
+                     ->  Sort (actual rows=3 loops=1)
+                           Output: sorted_merge_test.id, sorted_merge_test.val
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: quicksort  Memory: 25kB
+                           ->  Hash Semi Join (actual rows=3 loops=1)
+                                 Output: sorted_merge_test.id, sorted_merge_test.val
+                                 Hash Cond: (sorted_merge_test.id = intermediate_result.id)
+                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                       Output: sorted_merge_test.id, sorted_merge_test.val, sorted_merge_test.num, sorted_merge_test.ts
+                                 ->  Hash (actual rows=10 loops=1)
+                                       Output: intermediate_result.id
+                                       ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=10 loops=1)
+                                             Output: intermediate_result.id
+                                             Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+(50 rows)
+
+-- H5 EXPLAIN
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+WITH small_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM small_cte ORDER BY id LIMIT 5;
+                                                                                                                  QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: 397 bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=20 loops=1)
+               Output: remote_scan.id, remote_scan.val
+               ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
+                     Output: remote_scan.id, remote_scan.val
+                     Task Count: 4
+                     Tuple data received from nodes: 791 bytes
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
+                           Tuple data received from node: 197 bytes
+                           Node: host=localhost port=xxxxx dbname=regression
+                           ->  Limit (actual rows=20 loops=1)
+                                 Output: id, val
+                                 ->  Sort (actual rows=20 loops=1)
+                                       Output: id, val
+                                       Sort Key: sorted_merge_test.id
+                                       Sort Method: quicksort  Memory: 26kB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                             Output: id, val
+   Task Count: 1
+   Tuple data received from nodes: 47 bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT id, val FROM (SELECT intermediate_result.id, intermediate_result.val FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text)) small_cte ORDER BY id LIMIT 5
+         Tuple data received from node: 47 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Limit (actual rows=5 loops=1)
+               Output: intermediate_result.id, intermediate_result.val
+               ->  Sort (actual rows=5 loops=1)
+                     Output: intermediate_result.id, intermediate_result.val
+                     Sort Key: intermediate_result.id
+                     Sort Method: top-N heapsort  Memory: 25kB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=20 loops=1)
+                           Output: intermediate_result.id, intermediate_result.val
+                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+(40 rows)
+
+-- H6 EXPLAIN
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT foo.id, bar.id as bar_id
+FROM
+    (SELECT id FROM sorted_merge_test ORDER BY id LIMIT 3) as foo,
+    (SELECT id FROM sorted_merge_events ORDER BY id LIMIT 3) as bar
+ORDER BY foo.id, bar.id
+LIMIT 5;
+                                                                                                                                                                                    QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.bar_id
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: 30 bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=3 loops=1)
+               Output: remote_scan.id
+               ->  Custom Scan (Citus Adaptive) (actual rows=12 loops=1)
+                     Output: remote_scan.id
+                     Task Count: 4
+                     Tuple data received from nodes: 48 bytes
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '3'::bigint
+                           Tuple data received from node: 12 bytes
+                           Node: host=localhost port=xxxxx dbname=regression
+                           ->  Limit (actual rows=3 loops=1)
+                                 Output: id
+                                 ->  Sort (actual rows=3 loops=1)
+                                       Output: id
+                                       Sort Key: sorted_merge_test.id
+                                       Sort Method: top-N heapsort  Memory: 25kB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                             Output: id
+   ->  Distributed Subplan XXX_2
+         Intermediate Data Size: 30 bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=3 loops=1)
+               Output: remote_scan.id
+               ->  Custom Scan (Citus Adaptive) (actual rows=12 loops=1)
+                     Output: remote_scan.id
+                     Task Count: 4
+                     Tuple data received from nodes: 48 bytes
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Query: SELECT id FROM public.sorted_merge_events_960004 sorted_merge_events WHERE true ORDER BY id LIMIT '3'::bigint
+                           Tuple data received from node: 12 bytes
+                           Node: host=localhost port=xxxxx dbname=regression
+                           ->  Limit (actual rows=3 loops=1)
+                                 Output: id
+                                 ->  Sort (actual rows=3 loops=1)
+                                       Output: id
+                                       Sort Key: sorted_merge_events.id
+                                       Sort Method: top-N heapsort  Memory: 25kB
+                                       ->  Seq Scan on public.sorted_merge_events_960004 sorted_merge_events (actual rows=56 loops=1)
+                                             Output: id
+   Task Count: 1
+   Tuple data received from nodes: 40 bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT foo.id, bar.id AS bar_id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) foo, (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) bar ORDER BY foo.id, bar.id LIMIT 5
+         Tuple data received from node: 40 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Limit (actual rows=5 loops=1)
+               Output: intermediate_result.id, intermediate_result_1.id
+               ->  Sort (actual rows=5 loops=1)
+                     Output: intermediate_result.id, intermediate_result_1.id
+                     Sort Key: intermediate_result.id, intermediate_result_1.id
+                     Sort Method: quicksort  Memory: 25kB
+                     ->  Nested Loop (actual rows=9 loops=1)
+                           Output: intermediate_result.id, intermediate_result_1.id
+                           ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=3 loops=1)
+                                 Output: intermediate_result.id
+                                 Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+                           ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 (actual rows=3 loops=3)
+                                 Output: intermediate_result_1.id
+                                 Function Call: read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format)
+(67 rows)
+
+-- H7 EXPLAIN — GUC off vs on
+SET citus.enable_sorted_merge TO off;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+WITH cte AS (
+    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
+                                                                                                                                                             QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.val, remote_scan.num
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: 691 bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=20 loops=1)
+               Output: remote_scan.id, remote_scan.val, remote_scan.num
+               ->  Sort (actual rows=20 loops=1)
+                     Output: remote_scan.id, remote_scan.val, remote_scan.num
+                     Sort Key: remote_scan.id
+                     ->  Custom Scan (Citus Adaptive) (never executed)
+                           Output: remote_scan.id, remote_scan.val, remote_scan.num
+                           Task Count: 4
+                           Tuple data received from nodes: 1673 bytes
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
+                                 Tuple data received from node: 419 bytes
+                                 Node: host=localhost port=xxxxx dbname=regression
+                                 ->  Limit (actual rows=20 loops=1)
+                                       Output: id, val, num
+                                       ->  Sort (actual rows=20 loops=1)
+                                             Output: id, val, num
+                                             Sort Key: sorted_merge_test.id
+                                             Sort Method: quicksort  Memory: 26kB
+                                             ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                                   Output: id, val, num
+   Task Count: 1
+   Tuple data received from nodes: 103 bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT id, val, num FROM (SELECT intermediate_result.id, intermediate_result.val, intermediate_result.num FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text, num numeric)) cte WHERE (num OPERATOR(pg_catalog.>) (10)::numeric) ORDER BY id LIMIT 5
+         Tuple data received from node: 103 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Limit (actual rows=5 loops=1)
+               Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+               ->  Sort (actual rows=5 loops=1)
+                     Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+                     Sort Key: intermediate_result.id
+                     Sort Method: top-N heapsort  Memory: 25kB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=14 loops=1)
+                           Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+                           Filter: (intermediate_result.num > '10'::numeric)
+                           Rows Removed by Filter: 6
+(45 rows)
+
+SET citus.enable_sorted_merge TO on;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+WITH cte AS (
+    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
+                                                                                                                                                             QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.val, remote_scan.num
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: 699 bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=20 loops=1)
+               Output: remote_scan.id, remote_scan.val, remote_scan.num
+               ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
+                     Output: remote_scan.id, remote_scan.val, remote_scan.num
+                     Task Count: 4
+                     Tuple data received from nodes: 1673 bytes
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
+                           Tuple data received from node: 419 bytes
+                           Node: host=localhost port=xxxxx dbname=regression
+                           ->  Limit (actual rows=20 loops=1)
+                                 Output: id, val, num
+                                 ->  Sort (actual rows=20 loops=1)
+                                       Output: id, val, num
+                                       Sort Key: sorted_merge_test.id
+                                       Sort Method: quicksort  Memory: 26kB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                             Output: id, val, num
+   Task Count: 1
+   Tuple data received from nodes: 101 bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT id, val, num FROM (SELECT intermediate_result.id, intermediate_result.val, intermediate_result.num FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text, num numeric)) cte WHERE (num OPERATOR(pg_catalog.>) (10)::numeric) ORDER BY id LIMIT 5
+         Tuple data received from node: 101 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Limit (actual rows=5 loops=1)
+               Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+               ->  Sort (actual rows=5 loops=1)
+                     Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+                     Sort Key: intermediate_result.id
+                     Sort Method: top-N heapsort  Memory: 25kB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=18 loops=1)
+                           Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+                           Filter: (intermediate_result.num > '10'::numeric)
+                           Rows Removed by Filter: 2
+(42 rows)
+
+-- =================================================================
+-- Cleanup
+-- =================================================================
+SET citus.enable_sorted_merge TO off;
+DROP TABLE sorted_merge_test;
+DROP TABLE sorted_merge_events;
+RESET citus.enable_streaming_sorted_merge;
diff --git a/src/test/regress/sql/multi_orderby_pushdown_streaming.sql b/src/test/regress/sql/multi_orderby_pushdown_streaming.sql
new file mode 100644
index 00000000000..e7faed04373
--- /dev/null
+++ b/src/test/regress/sql/multi_orderby_pushdown_streaming.sql
@@ -0,0 +1,14 @@
+--
+-- MULTI_SORTED_MERGE_STREAMING
+--
+-- Runs the same test cases as multi_orderby_pushdown.sql but with the
+-- streaming sorted merge adapter enabled via the GUC. This validates
+-- that the streaming code path produces identical results to the eager
+-- merge path.
+--
+
+SET citus.enable_streaming_sorted_merge TO on;
+
+\i sql/multi_orderby_pushdown.sql
+
+RESET citus.enable_streaming_sorted_merge;

From f1f7f3599b7905d85476f73e1ef926869f8ab98e Mon Sep 17 00:00:00 2001
From: Neil Deshpande <ndeshpan@microsoft.com>
Date: Mon, 13 Apr 2026 17:48:32 +0000
Subject: [PATCH 4/7] Refactor regress test for order by pushdown and handle
 scroll cursors

---
 .../distributed/executor/multi_executor.c     |   22 +-
 .../distributed/planner/distributed_planner.c |   16 +
 .../multi_orderby_pushdown_streaming.out      | 1947 ++++++++++++++++-
 .../regress/sql/multi_orderby_pushdown.sql    |   48 +-
 .../sql/multi_orderby_pushdown_streaming.sql  |   22 +-
 .../sql/setup_multi_orderby_pushdown.sql      |   45 +
 6 files changed, 2033 insertions(+), 67 deletions(-)
 create mode 100644 src/test/regress/sql/setup_multi_orderby_pushdown.sql

diff --git a/src/backend/distributed/executor/multi_executor.c b/src/backend/distributed/executor/multi_executor.c
index 8f2774af057..e9857fda136 100644
--- a/src/backend/distributed/executor/multi_executor.c
+++ b/src/backend/distributed/executor/multi_executor.c
@@ -359,12 +359,24 @@ FetchNextScanTuple(CitusScanState *scanState, bool forward, TupleTableSlot *slot
 	if (scanState->mergeAdapter != NULL)
 	{
 		/*
-		 * Adapter is forward-only. Backward scan should never reach here
-		 * because the planner removes CUSTOMPATH_SUPPORT_BACKWARD_SCAN
-		 * when sorted merge is active, causing PostgreSQL to insert a
-		 * Material node above us for scrollable cursors.
+		 * The streaming merge adapter is forward-only.
+		 *
+		 * Citus replaces the entire plan tree after standard_planner()
+		 * returns, so PostgreSQL's cursor-time materialize_finished_plan()
+		 * check does not see the Citus CustomScan. That means SCROLL
+		 * cursors can reach here with a backward scan request even though
+		 * the adapter cannot satisfy it. Report a user-facing error
+		 * rather than crashing.
 		 */
-		Assert(forward);
+		if (!forward)
+		{
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("streaming sorted merge does not support "
+							"backward scan"),
+					 errhint("Use SET citus.enable_streaming_sorted_merge "
+							 "TO off to allow backward scan.")));
+		}
 		return SortedMergeAdapterNext(scanState->mergeAdapter, slot);
 	}
 
diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c
index 5961cca91a5..0db8cf338cf 100644
--- a/src/backend/distributed/planner/distributed_planner.c
+++ b/src/backend/distributed/planner/distributed_planner.c
@@ -855,6 +855,22 @@ CreateDistributedPlannedStmt(DistributedPlanningContext *planContext)
 	/* create final plan by combining local plan with distributed plan */
 	resultPlan = FinalizePlan(planContext->plan, distributedPlan);
 
+	/*
+	 * When the streaming sorted merge adapter is active, the CustomScan
+	 * does not support backward scan. If the query is a SCROLL cursor,
+	 * insert a Material node above the plan tree so backward fetches work.
+	 *
+	 * Normally standard_planner() handles this (planner.c:447-451), but
+	 * Citus replaces the plan tree after standard_planner returns via
+	 * FinalizePlan(), losing any Material node it inserted.
+	 */
+	if ((planContext->cursorOptions & CURSOR_OPT_SCROLL) &&
+		distributedPlan->useSortedMerge && EnableStreamingSortedMerge &&
+		!ExecSupportsBackwardScan(resultPlan->planTree))
+	{
+		resultPlan->planTree = materialize_finished_plan(resultPlan->planTree);
+	}
+
 	/*
 	 * As explained above, force planning costs to be unrealistically high if
 	 * query planning failed (possibly) due to prepared statement parameters or
diff --git a/src/test/regress/expected/multi_orderby_pushdown_streaming.out b/src/test/regress/expected/multi_orderby_pushdown_streaming.out
index 45d3522aed0..b000e7a0c5f 100644
--- a/src/test/regress/expected/multi_orderby_pushdown_streaming.out
+++ b/src/test/regress/expected/multi_orderby_pushdown_streaming.out
@@ -1,19 +1,21 @@
 --
--- MULTI_SORTED_MERGE_STREAMING
+-- MULTI_ORDERBY_PUSHDOWN_STREAMING
 --
--- Runs the same test cases as multi_orderby_pushdown.sql but with the
--- streaming sorted merge adapter enabled via the GUC. This validates
--- that the streaming code path produces identical results to the eager
--- merge path.
+-- Runs the sorted merge test suite (multi_orderby_pushdown.sql) twice:
+-- first with the default eager-merge path, then with the streaming
+-- adapter enabled via citus.enable_streaming_sorted_merge. Both runs
+-- share the same setup tables and must produce identical results
+-- (except for the G3 backward-scan test, where the streaming adapter's
+-- forward-only cursor correctly errors on FETCH BACKWARD).
 --
-SET citus.enable_streaming_sorted_merge TO on;
-\i sql/multi_orderby_pushdown.sql
+\i sql/setup_multi_orderby_pushdown.sql
 --
--- MULTI_SORTED_MERGE
+-- SETUP_MULTI_ORDERBY_PUSHDOWN
 --
--- Tests for the citus.enable_sorted_merge GUC and the sorted merge
--- planner eligibility logic. Verifies that enabling the GUC does not
--- introduce regressions for any query pattern.
+-- Creates the test tables and data used by multi_orderby_pushdown.sql
+-- and its variants (e.g., multi_orderby_pushdown_streaming.sql).
+-- This file is meant to be included via \i from test files that need
+-- these tables.
 --
 SET citus.next_shard_id TO 960000;
 -- =================================================================
@@ -55,6 +57,15 @@ SELECT create_distributed_table('sorted_merge_events', 'id');
 INSERT INTO sorted_merge_events
 SELECT i % 50 + 1, CASE WHEN i % 3 = 0 THEN 'click' WHEN i % 3 = 1 THEN 'view' ELSE 'buy' END, i
 FROM generate_series(1, 200) i;
+-- Run 1: eager merge (default)
+\i sql/multi_orderby_pushdown.sql
+--
+-- MULTI_SORTED_MERGE
+--
+-- Tests for the citus.enable_sorted_merge GUC and the sorted merge
+-- planner eligibility logic. Verifies that enabling the GUC does not
+-- introduce regressions for any query pattern.
+--
 -- =================================================================
 -- 1. GUC basics
 -- =================================================================
@@ -1164,12 +1175,46 @@ FETCH 3 FROM sorted_cursor;
 (3 rows)
 
 FETCH BACKWARD 1 FROM sorted_cursor;
-psql:sql/multi_orderby_pushdown.sql:355: ERROR:  cursor can only scan forward
-HINT:  Declare it with SCROLL option to enable backward scan.
+ id
+---------------------------------------------------------------------
+  2
+(1 row)
+
 FETCH 2 FROM sorted_cursor;
-psql:sql/multi_orderby_pushdown.sql:356: ERROR:  current transaction is aborted, commands ignored until end of transaction block
+ id
+---------------------------------------------------------------------
+  3
+  4
+(2 rows)
+
 CLOSE sorted_cursor;
-psql:sql/multi_orderby_pushdown.sql:357: ERROR:  current transaction is aborted, commands ignored until end of transaction block
+COMMIT;
+-- G3b: SCROLL cursor with backward scan
+SET citus.enable_sorted_merge TO on;
+BEGIN;
+DECLARE sorted_scroll_cursor SCROLL CURSOR FOR SELECT id FROM sorted_merge_test ORDER BY id;
+FETCH 3 FROM sorted_scroll_cursor;
+ id
+---------------------------------------------------------------------
+  1
+  2
+  3
+(3 rows)
+
+FETCH BACKWARD 1 FROM sorted_scroll_cursor;
+ id
+---------------------------------------------------------------------
+  2
+(1 row)
+
+FETCH 2 FROM sorted_scroll_cursor;
+ id
+---------------------------------------------------------------------
+  3
+  4
+(2 rows)
+
+CLOSE sorted_scroll_cursor;
 COMMIT;
 -- G4: EXPLAIN ANALYZE (sorted merge skipped for EXPLAIN ANALYZE)
 SET citus.enable_sorted_merge TO on;
@@ -1883,6 +1928,1872 @@ SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
 -- Cleanup
 -- =================================================================
 SET citus.enable_sorted_merge TO off;
-DROP TABLE sorted_merge_test;
-DROP TABLE sorted_merge_events;
-RESET citus.enable_streaming_sorted_merge;
+-- Run 2: streaming adapter
+SET citus.enable_streaming_sorted_merge TO on;
+\i sql/multi_orderby_pushdown.sql
+--
+-- MULTI_SORTED_MERGE
+--
+-- Tests for the citus.enable_sorted_merge GUC and the sorted merge
+-- planner eligibility logic. Verifies that enabling the GUC does not
+-- introduce regressions for any query pattern.
+--
+-- =================================================================
+-- 1. GUC basics
+-- =================================================================
+SHOW citus.enable_sorted_merge;
+ citus.enable_sorted_merge
+---------------------------------------------------------------------
+ off
+(1 row)
+
+SET citus.enable_sorted_merge TO on;
+SHOW citus.enable_sorted_merge;
+ citus.enable_sorted_merge
+---------------------------------------------------------------------
+ on
+(1 row)
+
+SET citus.enable_sorted_merge TO off;
+-- =================================================================
+-- Category A: Eligibility — sort IS pushed to workers
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- A1: ORDER BY distribution column
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val FROM sorted_merge_test ORDER BY id;
+                                                 QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: 4
+   Tuple data received from nodes: 1027 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id
+         Tuple data received from node: 255 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, val
+               Sort Key: sorted_merge_test.id
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, val
+(15 rows)
+
+-- A2: ORDER BY DESC
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id FROM sorted_merge_test ORDER BY id DESC;
+                                                 QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id
+   Task Count: 4
+   Tuple data received from nodes: 420 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id DESC
+         Tuple data received from node: 104 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id
+               Sort Key: sorted_merge_test.id DESC
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id
+(15 rows)
+
+-- A3: ORDER BY DESC NULLS LAST
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, num FROM sorted_merge_test ORDER BY num DESC NULLS LAST;
+                                                          QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.num
+   Task Count: 4
+   Tuple data received from nodes: 1556 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY num DESC NULLS LAST
+         Tuple data received from node: 392 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, num
+               Sort Key: sorted_merge_test.num DESC NULLS LAST
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, num
+(15 rows)
+
+-- A4: ORDER BY non-distribution column
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val FROM sorted_merge_test ORDER BY val;
+                                                  QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: 4
+   Tuple data received from nodes: 1027 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY val
+         Tuple data received from node: 255 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, val
+               Sort Key: sorted_merge_test.val
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, val
+(15 rows)
+
+-- A5: Multi-column ORDER BY
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val FROM sorted_merge_test ORDER BY id, val;
+                                                    QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: 4
+   Tuple data received from nodes: 1027 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id, val
+         Tuple data received from node: 255 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, val
+               Sort Key: sorted_merge_test.id, sorted_merge_test.val
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, val
+(15 rows)
+
+-- A6: Mixed directions
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val, num FROM sorted_merge_test ORDER BY id ASC, num DESC;
+                                                         QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.val, remote_scan.num
+   Task Count: 4
+   Tuple data received from nodes: 2163 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id, num DESC
+         Tuple data received from node: 543 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, val, num
+               Sort Key: sorted_merge_test.id, sorted_merge_test.num DESC
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, val, num
+(15 rows)
+
+-- A7: GROUP BY dist_col ORDER BY dist_col
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id;
+                                                              QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.count
+   Task Count: 4
+   Tuple data received from nodes: 1260 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id
+         Tuple data received from node: 312 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, (count(*))
+               Sort Key: sorted_merge_test.id
+               Sort Method: quicksort  Memory: 25kB
+               ->  HashAggregate (actual rows=26 loops=1)
+                     Output: id, count(*)
+                     Group Key: sorted_merge_test.id
+                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                           Output: id, val, num, ts
+(19 rows)
+
+-- A8: WHERE clause + ORDER BY
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val FROM sorted_merge_test WHERE num > 50 ORDER BY id;
+                                                                    QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=67 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: 4
+   Tuple data received from nodes: 671 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE (num OPERATOR(pg_catalog.>) '50'::numeric) ORDER BY id
+         Tuple data received from node: 130 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=13 loops=1)
+               Output: id, val
+               Sort Key: sorted_merge_test.id
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=13 loops=1)
+                     Output: id, val
+                     Filter: (sorted_merge_test.num > '50'::numeric)
+                     Rows Removed by Filter: 13
+(17 rows)
+
+-- A9: Expression in ORDER BY (non-aggregate)
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, num FROM sorted_merge_test ORDER BY id + 1;
+                                                                                        QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.num, remote_scan.worker_column_3
+   Task Count: 4
+   Tuple data received from nodes: 1976 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, num, (id OPERATOR(pg_catalog.+) 1) AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (id OPERATOR(pg_catalog.+) 1)
+         Tuple data received from node: 496 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, num, ((id + 1))
+               Sort Key: ((sorted_merge_test.id + 1))
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, num, (id + 1)
+(15 rows)
+
+-- A10: ORDER BY with LIMIT (existing pushdown, verify no regression)
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
+                                                           QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=5 loops=1)
+   Output: remote_scan.id
+   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+         Output: remote_scan.id
+         Task Count: 4
+         Tuple data received from nodes: 80 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
+               Tuple data received from node: 20 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit (actual rows=5 loops=1)
+                     Output: id
+                     ->  Sort (actual rows=5 loops=1)
+                           Output: id
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: top-N heapsort  Memory: 25kB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 Output: id
+(19 rows)
+
+-- =================================================================
+-- Category B: Ineligibility — sort NOT pushed for merge
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- B1: ORDER BY count(*)
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*);
+                                                           QUERY PLAN
+---------------------------------------------------------------------
+ Sort (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.count
+   Sort Key: remote_scan.count
+   Sort Method: quicksort  Memory: 28kB
+   ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+         Output: remote_scan.id, remote_scan.count
+         Task Count: 4
+         Tuple data received from nodes: 1260 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id
+               Tuple data received from node: 312 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  HashAggregate (actual rows=26 loops=1)
+                     Output: id, count(*)
+                     Group Key: sorted_merge_test.id
+                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                           Output: id, val, num, ts
+(19 rows)
+
+-- B2: ORDER BY avg(col)
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, avg(num) FROM sorted_merge_test GROUP BY id ORDER BY avg(num);
+                                                          QUERY PLAN
+---------------------------------------------------------------------
+ Sort (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.avg
+   Sort Key: remote_scan.avg
+   Sort Method: quicksort  Memory: 28kB
+   ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+         Output: remote_scan.id, remote_scan.avg
+         Task Count: 4
+         Tuple data received from nodes: 1556 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT id, avg(num) AS avg FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id
+               Tuple data received from node: 392 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  HashAggregate (actual rows=26 loops=1)
+                     Output: id, avg(num)
+                     Group Key: sorted_merge_test.id
+                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                           Output: id, val, num, ts
+(19 rows)
+
+-- B3: GROUP BY non-dist col, ORDER BY non-dist col
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT val, count(*) FROM sorted_merge_test GROUP BY val ORDER BY val;
+                                                               QUERY PLAN
+---------------------------------------------------------------------
+ Sort (actual rows=104 loops=1)
+   Output: remote_scan.val, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
+   Sort Key: remote_scan.val
+   Sort Method: quicksort  Memory: 28kB
+   ->  HashAggregate (actual rows=104 loops=1)
+         Output: remote_scan.val, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
+         Group Key: remote_scan.val
+         ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+               Output: remote_scan.val, remote_scan.count
+               Task Count: 4
+               Tuple data received from nodes: 1447 bytes
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT val, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY val
+                     Tuple data received from node: 359 bytes
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  HashAggregate (actual rows=26 loops=1)
+                           Output: val, count(*)
+                           Group Key: sorted_merge_test.val
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 Output: id, val, num, ts
+(23 rows)
+
+-- B4: GROUP BY non-dist col, ORDER BY aggregate
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT val, count(*) FROM sorted_merge_test GROUP BY val ORDER BY count(*);
+                                                               QUERY PLAN
+---------------------------------------------------------------------
+ Sort (actual rows=104 loops=1)
+   Output: remote_scan.val, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
+   Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
+   Sort Method: quicksort  Memory: 28kB
+   ->  HashAggregate (actual rows=104 loops=1)
+         Output: remote_scan.val, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
+         Group Key: remote_scan.val
+         ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+               Output: remote_scan.val, remote_scan.count
+               Task Count: 4
+               Tuple data received from nodes: 1447 bytes
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT val, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY val
+                     Tuple data received from node: 359 bytes
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  HashAggregate (actual rows=26 loops=1)
+                           Output: val, count(*)
+                           Group Key: sorted_merge_test.val
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 Output: id, val, num, ts
+(23 rows)
+
+-- =================================================================
+-- Category C: Correctness — results match GUC off vs on
+-- =================================================================
+-- C1: Simple ORDER BY
+SET citus.enable_sorted_merge TO off;
+SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+-- C2: ORDER BY DESC
+SET citus.enable_sorted_merge TO off;
+SELECT id FROM sorted_merge_test ORDER BY id DESC LIMIT 5;
+ id
+---------------------------------------------------------------------
+ 202
+ 201
+ 200
+ 102
+ 101
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id FROM sorted_merge_test ORDER BY id DESC LIMIT 5;
+ id
+---------------------------------------------------------------------
+ 202
+ 201
+ 200
+ 102
+ 101
+(5 rows)
+
+-- C3: Multi-column ORDER BY
+SET citus.enable_sorted_merge TO off;
+SELECT id, num FROM sorted_merge_test ORDER BY num, id LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, num FROM sorted_merge_test ORDER BY num, id LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
+
+-- C4: ORDER BY non-distribution column
+SET citus.enable_sorted_merge TO off;
+SELECT id, val FROM sorted_merge_test WHERE val IS NOT NULL ORDER BY val LIMIT 5;
+ id  |  val
+---------------------------------------------------------------------
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+   1 | val_1
+  10 | val_10
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test WHERE val IS NOT NULL ORDER BY val LIMIT 5;
+ id  |  val
+---------------------------------------------------------------------
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+   1 | val_1
+  10 | val_10
+(5 rows)
+
+-- C5: GROUP BY dist_col ORDER BY dist_col
+SET citus.enable_sorted_merge TO off;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+ id | count
+---------------------------------------------------------------------
+  1 |     1
+  2 |     1
+  3 |     1
+  4 |     1
+  5 |     1
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+ id | count
+---------------------------------------------------------------------
+  1 |     1
+  2 |     1
+  3 |     1
+  4 |     1
+  5 |     1
+(5 rows)
+
+-- C6: Mixed directions
+SET citus.enable_sorted_merge TO off;
+SELECT id, num FROM sorted_merge_test WHERE num IS NOT NULL ORDER BY id ASC, num DESC LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, num FROM sorted_merge_test WHERE num IS NOT NULL ORDER BY id ASC, num DESC LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
+
+-- C7: WHERE + ORDER BY
+SET citus.enable_sorted_merge TO off;
+SELECT id, val FROM sorted_merge_test WHERE num > 100 ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+ 67 | val_67
+ 68 | val_68
+ 69 | val_69
+ 70 | val_70
+ 71 | val_71
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test WHERE num > 100 ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+ 67 | val_67
+ 68 | val_68
+ 69 | val_69
+ 70 | val_70
+ 71 | val_71
+(5 rows)
+
+-- C8: Aggregates in SELECT, ORDER BY on dist_col (GROUP BY dist_col)
+SET citus.enable_sorted_merge TO off;
+SELECT id, count(*), sum(num), avg(num) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+ id | count | sum |          avg
+---------------------------------------------------------------------
+  1 |     1 | 1.5 | 1.50000000000000000000
+  2 |     1 | 3.0 |     3.0000000000000000
+  3 |     1 | 4.5 |     4.5000000000000000
+  4 |     1 | 6.0 |     6.0000000000000000
+  5 |     1 | 7.5 |     7.5000000000000000
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, count(*), sum(num), avg(num) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+ id | count | sum |          avg
+---------------------------------------------------------------------
+  1 |     1 | 1.5 | 1.50000000000000000000
+  2 |     1 | 3.0 |     3.0000000000000000
+  3 |     1 | 4.5 |     4.5000000000000000
+  4 |     1 | 6.0 |     6.0000000000000000
+  5 |     1 | 7.5 |     7.5000000000000000
+(5 rows)
+
+-- =================================================================
+-- Category D: Complex queries — regression guards
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- D1: Subquery in FROM with ORDER BY
+SELECT * FROM (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 5
+) sub ORDER BY id;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- D2: CTE with ORDER BY
+WITH top5 AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 5
+)
+SELECT * FROM top5 ORDER BY id;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- D3: Co-located JOIN + ORDER BY
+SELECT t.id, t.val, e.event_type
+FROM sorted_merge_test t
+JOIN sorted_merge_events e ON t.id = e.id
+WHERE t.id <= 5
+ORDER BY t.id, e.event_type
+LIMIT 10;
+ id |  val  | event_type
+---------------------------------------------------------------------
+  1 | val_1 | buy
+  1 | val_1 | buy
+  1 | val_1 | click
+  1 | val_1 | view
+  2 | val_2 | buy
+  2 | val_2 | click
+  2 | val_2 | view
+  2 | val_2 | view
+  3 | val_3 | buy
+  3 | val_3 | buy
+(10 rows)
+
+-- D4: UNION ALL + ORDER BY
+SELECT id, val FROM sorted_merge_test WHERE id <= 3
+UNION ALL
+SELECT id, val FROM sorted_merge_test WHERE id BETWEEN 98 AND 100
+ORDER BY id;
+ id  |   val
+---------------------------------------------------------------------
+   1 | val_1
+   2 | val_2
+   3 | val_3
+  98 | val_98
+  99 | val_99
+ 100 | val_100
+(6 rows)
+
+-- D5: DISTINCT + ORDER BY
+SELECT DISTINCT id FROM sorted_merge_test WHERE id <= 10 ORDER BY id;
+ id
+---------------------------------------------------------------------
+  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+(10 rows)
+
+-- D6: DISTINCT ON + ORDER BY
+SELECT DISTINCT ON (id) id, val, num
+FROM sorted_merge_test
+WHERE id <= 5
+ORDER BY id, num DESC;
+ id |  val  | num
+---------------------------------------------------------------------
+  1 | val_1 | 1.5
+  2 | val_2 | 3.0
+  3 | val_3 | 4.5
+  4 | val_4 | 6.0
+  5 | val_5 | 7.5
+(5 rows)
+
+-- D7: EXISTS subquery + ORDER BY
+SELECT id, val FROM sorted_merge_test t
+WHERE EXISTS (SELECT 1 FROM sorted_merge_events e WHERE e.id = t.id)
+ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- D8: IN subquery + ORDER BY
+SELECT id, val FROM sorted_merge_test
+WHERE id IN (SELECT id FROM sorted_merge_events WHERE event_type = 'click')
+ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- D9: Multiple aggregates, GROUP BY dist_col, ORDER BY dist_col
+SELECT id, count(*), sum(num), avg(num), min(val), max(val)
+FROM sorted_merge_test
+GROUP BY id
+ORDER BY id
+LIMIT 5;
+ id | count | sum |          avg           |  min  |  max
+---------------------------------------------------------------------
+  1 |     1 | 1.5 | 1.50000000000000000000 | val_1 | val_1
+  2 |     1 | 3.0 |     3.0000000000000000 | val_2 | val_2
+  3 |     1 | 4.5 |     4.5000000000000000 | val_3 | val_3
+  4 |     1 | 6.0 |     6.0000000000000000 | val_4 | val_4
+  5 |     1 | 7.5 |     7.5000000000000000 | val_5 | val_5
+(5 rows)
+
+-- D10: CASE expression in SELECT + ORDER BY
+SELECT id,
+       CASE WHEN num > 75 THEN 'high' WHEN num > 25 THEN 'mid' ELSE 'low' END as bucket
+FROM sorted_merge_test
+WHERE num IS NOT NULL
+ORDER BY id
+LIMIT 10;
+ id | bucket
+---------------------------------------------------------------------
+  1 | low
+  2 | low
+  3 | low
+  4 | low
+  5 | low
+  6 | low
+  7 | low
+  8 | low
+  9 | low
+ 10 | low
+(10 rows)
+
+-- D11: NULL values ordering
+SELECT id, num FROM sorted_merge_test ORDER BY num NULLS FIRST, id LIMIT 5;
+ id  | num
+---------------------------------------------------------------------
+ 101 |
+ 102 |
+   1 | 1.5
+   2 | 3.0
+   3 | 4.5
+(5 rows)
+
+SELECT id, num FROM sorted_merge_test ORDER BY num NULLS LAST, id LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
+
+SELECT id, num FROM sorted_merge_test ORDER BY num DESC NULLS FIRST, id LIMIT 5;
+ id  |  num
+---------------------------------------------------------------------
+ 101 |
+ 102 |
+ 100 | 150.0
+  99 | 148.5
+  98 | 147.0
+(5 rows)
+
+SELECT id, num FROM sorted_merge_test ORDER BY num DESC NULLS LAST, id DESC LIMIT 5;
+ id  |  num
+---------------------------------------------------------------------
+ 100 | 150.0
+  99 | 148.5
+  98 | 147.0
+  97 | 145.5
+  96 | 144.0
+(5 rows)
+
+-- D12: Large OFFSET
+SELECT id FROM sorted_merge_test ORDER BY id OFFSET 100 LIMIT 5;
+ id
+---------------------------------------------------------------------
+ 101
+ 102
+ 200
+ 201
+ 202
+(5 rows)
+
+-- D13: ORDER BY ordinal position
+SELECT id, val FROM sorted_merge_test ORDER BY 2, 1 LIMIT 5;
+ id  |  val
+---------------------------------------------------------------------
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+   1 | val_1
+  10 | val_10
+(5 rows)
+
+-- =================================================================
+-- Category E: Edge cases
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- E1: Empty result set
+SELECT id FROM sorted_merge_test WHERE id < 0 ORDER BY id;
+ id
+---------------------------------------------------------------------
+(0 rows)
+
+-- E2: Single row (may go through router planner)
+SELECT id, val FROM sorted_merge_test WHERE id = 42 ORDER BY id;
+ id |  val
+---------------------------------------------------------------------
+ 42 | val_42
+(1 row)
+
+-- E3: All rows with same sort value
+SELECT id, num FROM sorted_merge_test WHERE num = 10.5 ORDER BY num, id;
+ id  | num
+---------------------------------------------------------------------
+   7 | 10.5
+ 200 | 10.5
+ 201 | 10.5
+ 202 | 10.5
+(4 rows)
+
+-- E4: Wide sort key (4 columns)
+SELECT id, val, num FROM sorted_merge_test
+WHERE id <= 5
+ORDER BY num, val, id
+LIMIT 5;
+ id |  val  | num
+---------------------------------------------------------------------
+  1 | val_1 | 1.5
+  2 | val_2 | 3.0
+  3 | val_3 | 4.5
+  4 | val_4 | 6.0
+  5 | val_5 | 7.5
+(5 rows)
+
+-- E5: Zero-task defensive path
+-- CreatePerTaskDispatchDest handles taskCount=0 gracefully (returns a no-op
+-- destination). This cannot be triggered via normal SQL because distributed
+-- tables always have at least one shard. The closest we can test is an
+-- empty-result query through the sorted merge path to verify no crash.
+SELECT id FROM sorted_merge_test WHERE false ORDER BY id;
+ id
+---------------------------------------------------------------------
+(0 rows)
+
+-- =================================================================
+-- Category F: Existing LIMIT pushdown stability
+-- =================================================================
+-- F1: Simple LIMIT + ORDER BY: plan unchanged between GUC off and on
+SET citus.enable_sorted_merge TO off;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
+                                                              QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=5 loops=1)
+   Output: remote_scan.id
+   ->  Sort (actual rows=5 loops=1)
+         Output: remote_scan.id
+         Sort Key: remote_scan.id
+         Sort Method: top-N heapsort  Memory: 25kB
+         ->  Custom Scan (Citus Adaptive) (actual rows=20 loops=1)
+               Output: remote_scan.id
+               Task Count: 4
+               Tuple data received from nodes: 80 bytes
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
+                     Tuple data received from node: 20 bytes
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  Limit (actual rows=5 loops=1)
+                           Output: id
+                           ->  Sort (actual rows=5 loops=1)
+                                 Output: id
+                                 Sort Key: sorted_merge_test.id
+                                 Sort Method: top-N heapsort  Memory: 25kB
+                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                       Output: id
+(23 rows)
+
+SET citus.enable_sorted_merge TO on;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
+                                                           QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=5 loops=1)
+   Output: remote_scan.id
+   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+         Output: remote_scan.id
+         Task Count: 4
+         Tuple data received from nodes: 80 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
+               Tuple data received from node: 20 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit (actual rows=5 loops=1)
+                     Output: id
+                     ->  Sort (actual rows=5 loops=1)
+                           Output: id
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: top-N heapsort  Memory: 25kB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 Output: id
+(19 rows)
+
+-- F2: GROUP BY dist_col + ORDER BY + LIMIT
+SET citus.enable_sorted_merge TO off;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+                                                                             QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.count
+   ->  Sort (actual rows=5 loops=1)
+         Output: remote_scan.id, remote_scan.count
+         Sort Key: remote_scan.id
+         Sort Method: top-N heapsort  Memory: 25kB
+         ->  Custom Scan (Citus Adaptive) (actual rows=20 loops=1)
+               Output: remote_scan.id, remote_scan.count
+               Task Count: 4
+               Tuple data received from nodes: 240 bytes
+               Tasks Shown: One of 4
+               ->  Task
+                     Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id LIMIT '5'::bigint
+                     Tuple data received from node: 60 bytes
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  Limit (actual rows=5 loops=1)
+                           Output: id, (count(*))
+                           ->  Sort (actual rows=5 loops=1)
+                                 Output: id, (count(*))
+                                 Sort Key: sorted_merge_test.id
+                                 Sort Method: top-N heapsort  Memory: 25kB
+                                 ->  HashAggregate (actual rows=26 loops=1)
+                                       Output: id, count(*)
+                                       Group Key: sorted_merge_test.id
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                             Output: id, val, num, ts
+(27 rows)
+
+SET citus.enable_sorted_merge TO on;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+                                                                          QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.count
+   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+         Output: remote_scan.id, remote_scan.count
+         Task Count: 4
+         Tuple data received from nodes: 240 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id LIMIT '5'::bigint
+               Tuple data received from node: 60 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit (actual rows=5 loops=1)
+                     Output: id, (count(*))
+                     ->  Sort (actual rows=5 loops=1)
+                           Output: id, (count(*))
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: top-N heapsort  Memory: 25kB
+                           ->  HashAggregate (actual rows=26 loops=1)
+                                 Output: id, count(*)
+                                 Group Key: sorted_merge_test.id
+                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                       Output: id, val, num, ts
+(23 rows)
+
+-- F3: ORDER BY aggregate + LIMIT (not eligible for merge)
+SET citus.enable_sorted_merge TO off;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 5;
+ id | count
+---------------------------------------------------------------------
+  1 |     1
+  2 |     1
+  3 |     1
+  4 |     1
+  5 |     1
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 5;
+ id | count
+---------------------------------------------------------------------
+  1 |     1
+  2 |     1
+  3 |     1
+  4 |     1
+  5 |     1
+(5 rows)
+
+-- =================================================================
+-- Category G: Phase 4 — Sort elision and advanced scenarios
+-- =================================================================
+-- G1: Sort elision verification — coordinator Sort node absent
+SET citus.enable_sorted_merge TO off;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val FROM sorted_merge_test ORDER BY id;
+                                                 QUERY PLAN
+---------------------------------------------------------------------
+ Sort (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   Sort Key: remote_scan.id
+   Sort Method: quicksort  Memory: 28kB
+   ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+         Output: remote_scan.id, remote_scan.val
+         Task Count: 4
+         Tuple data received from nodes: 1027 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true
+               Tuple data received from node: 255 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, val
+(15 rows)
+
+SET citus.enable_sorted_merge TO on;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val FROM sorted_merge_test ORDER BY id;
+                                                 QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: 4
+   Tuple data received from nodes: 1027 bytes
+   Tasks Shown: One of 4
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id
+         Tuple data received from node: 255 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Sort (actual rows=26 loops=1)
+               Output: id, val
+               Sort Key: sorted_merge_test.id
+               Sort Method: quicksort  Memory: 25kB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     Output: id, val
+(15 rows)
+
+-- G2a: PREPARE with merge ON, EXECUTE after turning OFF
+-- Plan-time decision is baked in — cached plan must still merge correctly
+SET citus.enable_sorted_merge TO on;
+PREPARE merge_on_stmt AS SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+SET citus.enable_sorted_merge TO off;
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+DEALLOCATE merge_on_stmt;
+-- G2b: PREPARE with merge OFF, EXECUTE after turning ON
+-- Cached plan has Sort node — must still return sorted results
+SET citus.enable_sorted_merge TO off;
+PREPARE merge_off_stmt AS SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+SET citus.enable_sorted_merge TO on;
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+DEALLOCATE merge_off_stmt;
+-- G3: Cursor with backward scan
+SET citus.enable_sorted_merge TO on;
+BEGIN;
+DECLARE sorted_cursor CURSOR FOR SELECT id FROM sorted_merge_test ORDER BY id;
+FETCH 3 FROM sorted_cursor;
+ id
+---------------------------------------------------------------------
+  1
+  2
+  3
+(3 rows)
+
+FETCH BACKWARD 1 FROM sorted_cursor;
+psql:sql/multi_orderby_pushdown.sql:319: ERROR:  cursor can only scan forward
+HINT:  Declare it with SCROLL option to enable backward scan.
+FETCH 2 FROM sorted_cursor;
+psql:sql/multi_orderby_pushdown.sql:320: ERROR:  current transaction is aborted, commands ignored until end of transaction block
+CLOSE sorted_cursor;
+psql:sql/multi_orderby_pushdown.sql:321: ERROR:  current transaction is aborted, commands ignored until end of transaction block
+COMMIT;
+-- G3b: SCROLL cursor with backward scan
+SET citus.enable_sorted_merge TO on;
+BEGIN;
+DECLARE sorted_scroll_cursor SCROLL CURSOR FOR SELECT id FROM sorted_merge_test ORDER BY id;
+FETCH 3 FROM sorted_scroll_cursor;
+ id
+---------------------------------------------------------------------
+  1
+  2
+  3
+(3 rows)
+
+FETCH BACKWARD 1 FROM sorted_scroll_cursor;
+ id
+---------------------------------------------------------------------
+  2
+(1 row)
+
+FETCH 2 FROM sorted_scroll_cursor;
+ id
+---------------------------------------------------------------------
+  3
+  4
+(2 rows)
+
+CLOSE sorted_scroll_cursor;
+COMMIT;
+-- G4: EXPLAIN ANALYZE (sorted merge skipped for EXPLAIN ANALYZE)
+SET citus.enable_sorted_merge TO on;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
+                                                           QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=5 loops=1)
+   Output: remote_scan.id
+   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+         Output: remote_scan.id
+         Task Count: 4
+         Tuple data received from nodes: 80 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
+               Tuple data received from node: 20 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit (actual rows=5 loops=1)
+                     Output: id
+                     ->  Sort (actual rows=5 loops=1)
+                           Output: id
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: top-N heapsort  Memory: 25kB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 Output: id
+(19 rows)
+
+-- G5: ORDER BY aggregate + LIMIT — crash regression test
+-- Previously caused SIGSEGV when sorted merge was enabled because
+-- aggregate ORDER BY was erroneously tagged as merge-eligible.
+SET citus.enable_sorted_merge TO on;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 3;
+ id | count
+---------------------------------------------------------------------
+  1 |     1
+  2 |     1
+  3 |     1
+(3 rows)
+
+-- G6: Small work_mem with many tasks (32 shards)
+SET citus.enable_sorted_merge TO on;
+SET work_mem TO '64kB';
+SELECT id FROM sorted_merge_test ORDER BY id LIMIT 10;
+ id
+---------------------------------------------------------------------
+  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+(10 rows)
+
+RESET work_mem;
+-- G7: max_intermediate_result_size with CTE subplan
+SET citus.enable_sorted_merge TO on;
+SET citus.max_intermediate_result_size TO '4kB';
+WITH cte AS (SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 50)
+SELECT * FROM cte ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+RESET citus.max_intermediate_result_size;
+-- =================================================================
+-- Category H: Subplan + Sorted Merge interactions
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- H1: CTE subplan with simple ORDER BY — eligible for sorted merge
+-- The CTE becomes a subplan; its DistributedPlan may have useSortedMerge=true
+WITH ordered_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id
+)
+SELECT * FROM ordered_cte ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- H2: Multiple CTEs — one eligible (ORDER BY col), one ineligible (ORDER BY agg)
+WITH eligible_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
+),
+ineligible_cte AS (
+    SELECT id, count(*) as cnt FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 15
+)
+SELECT e.id, e.val, i.cnt
+FROM eligible_cte e JOIN ineligible_cte i ON e.id = i.id
+ORDER BY e.id;
+ id |  val   | cnt
+---------------------------------------------------------------------
+  1 | val_1  |   1
+  2 | val_2  |   1
+  3 | val_3  |   1
+  4 | val_4  |   1
+  5 | val_5  |   1
+  6 | val_6  |   1
+  7 | val_7  |   1
+  8 | val_8  |   1
+  9 | val_9  |   1
+ 10 | val_10 |   1
+ 11 | val_11 |   1
+ 12 | val_12 |   1
+ 13 | val_13 |   1
+ 14 | val_14 |   1
+ 15 | val_15 |   1
+(15 rows)
+
+-- H3: CTE subplan feeding outer ORDER BY — both levels may merge independently
+WITH top_ids AS (
+    SELECT id FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT t.id, t.val
+FROM sorted_merge_test t
+JOIN top_ids ON t.id = top_ids.id
+ORDER BY t.id
+LIMIT 10;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+-- H4: Subquery in WHERE with ORDER BY + LIMIT — becomes subplan with merge
+SELECT id, val FROM sorted_merge_test
+WHERE id IN (
+    SELECT id FROM sorted_merge_events ORDER BY id LIMIT 10
+)
+ORDER BY id
+LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+(3 rows)
+
+-- H5: CTE subplan with max_intermediate_result_size enforcement
+-- Tests that EnsureIntermediateSizeLimitNotExceeded works through per-task dispatch
+SET citus.max_intermediate_result_size TO '4kB';
+WITH small_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM small_cte ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+RESET citus.max_intermediate_result_size;
+-- H6: Cross-join subplan with non-aggregate ORDER BY (crash regression variant)
+-- Similar pattern to subquery_complex_target_list but without aggregate ORDER BY
+SELECT foo.id, bar.id as bar_id
+FROM
+    (SELECT id FROM sorted_merge_test ORDER BY id LIMIT 3) as foo,
+    (SELECT id FROM sorted_merge_events ORDER BY id LIMIT 3) as bar
+ORDER BY foo.id, bar.id
+LIMIT 5;
+ id | bar_id
+---------------------------------------------------------------------
+  1 |      1
+  1 |      1
+  1 |      1
+  2 |      1
+  2 |      1
+(5 rows)
+
+-- H7: CTE correctness comparison — GUC off vs on must produce identical results
+SET citus.enable_sorted_merge TO off;
+WITH cte AS (
+    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
+ id |  val   | num
+---------------------------------------------------------------------
+  7 | val_7  | 10.5
+  8 | val_8  | 12.0
+  9 | val_9  | 13.5
+ 10 | val_10 | 15.0
+ 11 | val_11 | 16.5
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+WITH cte AS (
+    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
+ id |  val   | num
+---------------------------------------------------------------------
+  7 | val_7  | 10.5
+  8 | val_8  | 12.0
+  9 | val_9  | 13.5
+ 10 | val_10 | 15.0
+ 11 | val_11 | 16.5
+(5 rows)
+
+-- =================================================================
+-- Category H EXPLAIN: Query plans for subplan + sorted merge
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- H1 EXPLAIN
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+WITH ordered_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id
+)
+SELECT * FROM ordered_cte ORDER BY id LIMIT 5;
+                                                                                                                                                                                     QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+         Output: remote_scan.id, remote_scan.val
+         Task Count: 4
+         Tuple data received from nodes: 191 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT ordered_cte.id AS worker_column_1, ordered_cte.val AS worker_column_2 FROM (SELECT sorted_merge_test.id, sorted_merge_test.val FROM public.sorted_merge_test_960000 sorted_merge_test ORDER BY sorted_merge_test.id) ordered_cte) worker_subquery ORDER BY worker_column_1 LIMIT '5'::bigint
+               Tuple data received from node: 47 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit (actual rows=5 loops=1)
+                     Output: sorted_merge_test.id, sorted_merge_test.val
+                     ->  Sort (actual rows=5 loops=1)
+                           Output: sorted_merge_test.id, sorted_merge_test.val
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: top-N heapsort  Memory: 25kB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 Output: sorted_merge_test.id, sorted_merge_test.val
+(19 rows)
+
+-- H2 EXPLAIN
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+WITH eligible_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
+),
+ineligible_cte AS (
+    SELECT id, count(*) as cnt FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 15
+)
+SELECT e.id, e.val, i.cnt
+FROM eligible_cte e JOIN ineligible_cte i ON e.id = i.id
+ORDER BY e.id;
+                                                                                                                                                                                                                                 QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.val, remote_scan.cnt
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: 397 bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=20 loops=1)
+               Output: remote_scan.id, remote_scan.val
+               ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
+                     Output: remote_scan.id, remote_scan.val
+                     Task Count: 4
+                     Tuple data received from nodes: 791 bytes
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
+                           Tuple data received from node: 197 bytes
+                           Node: host=localhost port=xxxxx dbname=regression
+                           ->  Limit (actual rows=20 loops=1)
+                                 Output: id, val
+                                 ->  Sort (actual rows=20 loops=1)
+                                       Output: id, val
+                                       Sort Key: sorted_merge_test.id
+                                       Sort Method: quicksort  Memory: 26kB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                             Output: id, val
+   ->  Distributed Subplan XXX_2
+         Intermediate Data Size: 330 bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=15 loops=1)
+               Output: remote_scan.id, remote_scan.cnt
+               ->  Sort (actual rows=15 loops=1)
+                     Output: remote_scan.id, remote_scan.cnt
+                     Sort Key: remote_scan.cnt DESC, remote_scan.id
+                     ->  Custom Scan (Citus Adaptive) (never executed)
+                           Output: remote_scan.id, remote_scan.cnt
+                           Task Count: 4
+                           Tuple data received from nodes: 720 bytes
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Query: SELECT id, count(*) AS cnt FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY (count(*)) DESC, id LIMIT '15'::bigint
+                                 Tuple data received from node: 180 bytes
+                                 Node: host=localhost port=xxxxx dbname=regression
+                                 ->  Limit (actual rows=15 loops=1)
+                                       Output: id, (count(*))
+                                       ->  Sort (actual rows=15 loops=1)
+                                             Output: id, (count(*))
+                                             Sort Key: (count(*)) DESC, sorted_merge_test.id
+                                             Sort Method: quicksort  Memory: 26kB
+                                             ->  HashAggregate (actual rows=26 loops=1)
+                                                   Output: id, count(*)
+                                                   Group Key: sorted_merge_test.id
+                                                   ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                                         Output: id, val, num, ts
+   Task Count: 1
+   Tuple data received from nodes: 87 bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT e.id, e.val, i.cnt FROM ((SELECT intermediate_result.id, intermediate_result.val FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text)) e JOIN (SELECT intermediate_result.id, intermediate_result.cnt FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer, cnt bigint)) i ON ((e.id OPERATOR(pg_catalog.=) i.id))) ORDER BY e.id
+         Tuple data received from node: 87 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Merge Join (actual rows=5 loops=1)
+               Output: intermediate_result.id, intermediate_result.val, intermediate_result_1.cnt
+               Merge Cond: (intermediate_result.id = intermediate_result_1.id)
+               ->  Sort (actual rows=6 loops=1)
+                     Output: intermediate_result.id, intermediate_result.val
+                     Sort Key: intermediate_result.id
+                     Sort Method: quicksort  Memory: 25kB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=20 loops=1)
+                           Output: intermediate_result.id, intermediate_result.val
+                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+               ->  Sort (actual rows=15 loops=1)
+                     Output: intermediate_result_1.cnt, intermediate_result_1.id
+                     Sort Key: intermediate_result_1.id
+                     Sort Method: quicksort  Memory: 25kB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 (actual rows=15 loops=1)
+                           Output: intermediate_result_1.cnt, intermediate_result_1.id
+                           Function Call: read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format)
+(77 rows)
+
+-- H3 EXPLAIN
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+WITH top_ids AS (
+    SELECT id FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT t.id, t.val
+FROM sorted_merge_test t
+JOIN top_ids ON t.id = top_ids.id
+ORDER BY t.id
+LIMIT 10;
+                                                                                                                                                                                                                     QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=10 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   ->  Custom Scan (Citus Adaptive) (actual rows=10 loops=1)
+         Output: remote_scan.id, remote_scan.val
+         ->  Distributed Subplan XXX_1
+               Intermediate Data Size: 200 bytes
+               Result destination: Send to 2 nodes
+               ->  Limit (actual rows=20 loops=1)
+                     Output: remote_scan.id
+                     ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
+                           Output: remote_scan.id
+                           Task Count: 4
+                           Tuple data received from nodes: 320 bytes
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
+                                 Tuple data received from node: 80 bytes
+                                 Node: host=localhost port=xxxxx dbname=regression
+                                 ->  Limit (actual rows=20 loops=1)
+                                       Output: id
+                                       ->  Sort (actual rows=20 loops=1)
+                                             Output: id
+                                             Sort Key: sorted_merge_test.id
+                                             Sort Method: quicksort  Memory: 25kB
+                                             ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                                   Output: id
+         Task Count: 4
+         Tuple data received from nodes: 97 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT t.id AS worker_column_1, t.val AS worker_column_2 FROM (public.sorted_merge_test_960000 t JOIN (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) top_ids ON ((t.id OPERATOR(pg_catalog.=) top_ids.id)))) worker_subquery ORDER BY worker_column_1 LIMIT '10'::bigint
+               Tuple data received from node: 97 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit (actual rows=10 loops=1)
+                     Output: t.id, t.val
+                     ->  Merge Join (actual rows=10 loops=1)
+                           Output: t.id, t.val
+                           Merge Cond: (intermediate_result.id = t.id)
+                           ->  Sort (actual rows=10 loops=1)
+                                 Output: intermediate_result.id
+                                 Sort Key: intermediate_result.id
+                                 Sort Method: quicksort  Memory: 25kB
+                                 ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=20 loops=1)
+                                       Output: intermediate_result.id
+                                       Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+                           ->  Sort (actual rows=10 loops=1)
+                                 Output: t.id, t.val
+                                 Sort Key: t.id
+                                 Sort Method: quicksort  Memory: 25kB
+                                 ->  Seq Scan on public.sorted_merge_test_960000 t (actual rows=26 loops=1)
+                                       Output: t.id, t.val
+(51 rows)
+
+-- H4 EXPLAIN
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT id, val FROM sorted_merge_test
+WHERE id IN (
+    SELECT id FROM sorted_merge_events ORDER BY id LIMIT 10
+)
+ORDER BY id
+LIMIT 5;
+                                                                                                                                                                                                                                          QUERY PLAN
+---------------------------------------------------------------------
+ Limit (actual rows=3 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   ->  Custom Scan (Citus Adaptive) (actual rows=3 loops=1)
+         Output: remote_scan.id, remote_scan.val
+         ->  Distributed Subplan XXX_1
+               Intermediate Data Size: 100 bytes
+               Result destination: Send to 2 nodes
+               ->  Limit (actual rows=10 loops=1)
+                     Output: remote_scan.id
+                     ->  Custom Scan (Citus Adaptive) (actual rows=40 loops=1)
+                           Output: remote_scan.id
+                           Task Count: 4
+                           Tuple data received from nodes: 160 bytes
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Query: SELECT id FROM public.sorted_merge_events_960004 sorted_merge_events WHERE true ORDER BY id LIMIT '10'::bigint
+                                 Tuple data received from node: 40 bytes
+                                 Node: host=localhost port=xxxxx dbname=regression
+                                 ->  Limit (actual rows=10 loops=1)
+                                       Output: id
+                                       ->  Sort (actual rows=10 loops=1)
+                                             Output: id
+                                             Sort Key: sorted_merge_events.id
+                                             Sort Method: top-N heapsort  Memory: 25kB
+                                             ->  Seq Scan on public.sorted_merge_events_960004 sorted_merge_events (actual rows=56 loops=1)
+                                                   Output: id
+         Task Count: 4
+         Tuple data received from nodes: 27 bytes
+         Tasks Shown: One of 4
+         ->  Task
+               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT sorted_merge_test.id AS worker_column_1, sorted_merge_test.val AS worker_column_2 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE (sorted_merge_test.id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)))) worker_subquery ORDER BY worker_column_1 LIMIT '5'::bigint
+               Tuple data received from node: 27 bytes
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit (actual rows=3 loops=1)
+                     Output: sorted_merge_test.id, sorted_merge_test.val
+                     ->  Sort (actual rows=3 loops=1)
+                           Output: sorted_merge_test.id, sorted_merge_test.val
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: quicksort  Memory: 25kB
+                           ->  Hash Semi Join (actual rows=3 loops=1)
+                                 Output: sorted_merge_test.id, sorted_merge_test.val
+                                 Hash Cond: (sorted_merge_test.id = intermediate_result.id)
+                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                       Output: sorted_merge_test.id, sorted_merge_test.val, sorted_merge_test.num, sorted_merge_test.ts
+                                 ->  Hash (actual rows=10 loops=1)
+                                       Output: intermediate_result.id
+                                       ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=10 loops=1)
+                                             Output: intermediate_result.id
+                                             Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+(50 rows)
+
+-- H5 EXPLAIN
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+WITH small_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM small_cte ORDER BY id LIMIT 5;
+                                                                                                                  QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.val
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: 397 bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=20 loops=1)
+               Output: remote_scan.id, remote_scan.val
+               ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
+                     Output: remote_scan.id, remote_scan.val
+                     Task Count: 4
+                     Tuple data received from nodes: 791 bytes
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
+                           Tuple data received from node: 197 bytes
+                           Node: host=localhost port=xxxxx dbname=regression
+                           ->  Limit (actual rows=20 loops=1)
+                                 Output: id, val
+                                 ->  Sort (actual rows=20 loops=1)
+                                       Output: id, val
+                                       Sort Key: sorted_merge_test.id
+                                       Sort Method: quicksort  Memory: 26kB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                             Output: id, val
+   Task Count: 1
+   Tuple data received from nodes: 47 bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT id, val FROM (SELECT intermediate_result.id, intermediate_result.val FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text)) small_cte ORDER BY id LIMIT 5
+         Tuple data received from node: 47 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Limit (actual rows=5 loops=1)
+               Output: intermediate_result.id, intermediate_result.val
+               ->  Sort (actual rows=5 loops=1)
+                     Output: intermediate_result.id, intermediate_result.val
+                     Sort Key: intermediate_result.id
+                     Sort Method: top-N heapsort  Memory: 25kB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=20 loops=1)
+                           Output: intermediate_result.id, intermediate_result.val
+                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+(40 rows)
+
+-- H6 EXPLAIN
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+SELECT foo.id, bar.id as bar_id
+FROM
+    (SELECT id FROM sorted_merge_test ORDER BY id LIMIT 3) as foo,
+    (SELECT id FROM sorted_merge_events ORDER BY id LIMIT 3) as bar
+ORDER BY foo.id, bar.id
+LIMIT 5;
+                                                                                                                                                                                    QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.bar_id
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: 30 bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=3 loops=1)
+               Output: remote_scan.id
+               ->  Custom Scan (Citus Adaptive) (actual rows=12 loops=1)
+                     Output: remote_scan.id
+                     Task Count: 4
+                     Tuple data received from nodes: 48 bytes
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '3'::bigint
+                           Tuple data received from node: 12 bytes
+                           Node: host=localhost port=xxxxx dbname=regression
+                           ->  Limit (actual rows=3 loops=1)
+                                 Output: id
+                                 ->  Sort (actual rows=3 loops=1)
+                                       Output: id
+                                       Sort Key: sorted_merge_test.id
+                                       Sort Method: top-N heapsort  Memory: 25kB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                             Output: id
+   ->  Distributed Subplan XXX_2
+         Intermediate Data Size: 30 bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=3 loops=1)
+               Output: remote_scan.id
+               ->  Custom Scan (Citus Adaptive) (actual rows=12 loops=1)
+                     Output: remote_scan.id
+                     Task Count: 4
+                     Tuple data received from nodes: 48 bytes
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Query: SELECT id FROM public.sorted_merge_events_960004 sorted_merge_events WHERE true ORDER BY id LIMIT '3'::bigint
+                           Tuple data received from node: 12 bytes
+                           Node: host=localhost port=xxxxx dbname=regression
+                           ->  Limit (actual rows=3 loops=1)
+                                 Output: id
+                                 ->  Sort (actual rows=3 loops=1)
+                                       Output: id
+                                       Sort Key: sorted_merge_events.id
+                                       Sort Method: top-N heapsort  Memory: 25kB
+                                       ->  Seq Scan on public.sorted_merge_events_960004 sorted_merge_events (actual rows=56 loops=1)
+                                             Output: id
+   Task Count: 1
+   Tuple data received from nodes: 40 bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT foo.id, bar.id AS bar_id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) foo, (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) bar ORDER BY foo.id, bar.id LIMIT 5
+         Tuple data received from node: 40 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Limit (actual rows=5 loops=1)
+               Output: intermediate_result.id, intermediate_result_1.id
+               ->  Sort (actual rows=5 loops=1)
+                     Output: intermediate_result.id, intermediate_result_1.id
+                     Sort Key: intermediate_result.id, intermediate_result_1.id
+                     Sort Method: quicksort  Memory: 25kB
+                     ->  Nested Loop (actual rows=9 loops=1)
+                           Output: intermediate_result.id, intermediate_result_1.id
+                           ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=3 loops=1)
+                                 Output: intermediate_result.id
+                                 Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+                           ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 (actual rows=3 loops=3)
+                                 Output: intermediate_result_1.id
+                                 Function Call: read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format)
+(67 rows)
+
+-- H7 EXPLAIN — GUC off vs on
+SET citus.enable_sorted_merge TO off;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+WITH cte AS (
+    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
+                                                                                                                                                             QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.val, remote_scan.num
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: 691 bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=20 loops=1)
+               Output: remote_scan.id, remote_scan.val, remote_scan.num
+               ->  Sort (actual rows=20 loops=1)
+                     Output: remote_scan.id, remote_scan.val, remote_scan.num
+                     Sort Key: remote_scan.id
+                     ->  Custom Scan (Citus Adaptive) (never executed)
+                           Output: remote_scan.id, remote_scan.val, remote_scan.num
+                           Task Count: 4
+                           Tuple data received from nodes: 1673 bytes
+                           Tasks Shown: One of 4
+                           ->  Task
+                                 Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
+                                 Tuple data received from node: 419 bytes
+                                 Node: host=localhost port=xxxxx dbname=regression
+                                 ->  Limit (actual rows=20 loops=1)
+                                       Output: id, val, num
+                                       ->  Sort (actual rows=20 loops=1)
+                                             Output: id, val, num
+                                             Sort Key: sorted_merge_test.id
+                                             Sort Method: quicksort  Memory: 26kB
+                                             ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                                   Output: id, val, num
+   Task Count: 1
+   Tuple data received from nodes: 103 bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT id, val, num FROM (SELECT intermediate_result.id, intermediate_result.val, intermediate_result.num FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text, num numeric)) cte WHERE (num OPERATOR(pg_catalog.>) (10)::numeric) ORDER BY id LIMIT 5
+         Tuple data received from node: 103 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Limit (actual rows=5 loops=1)
+               Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+               ->  Sort (actual rows=5 loops=1)
+                     Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+                     Sort Key: intermediate_result.id
+                     Sort Method: top-N heapsort  Memory: 25kB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=14 loops=1)
+                           Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+                           Filter: (intermediate_result.num > '10'::numeric)
+                           Rows Removed by Filter: 6
+(45 rows)
+
+SET citus.enable_sorted_merge TO on;
+EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
+WITH cte AS (
+    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
+                                                                                                                                                             QUERY PLAN
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   Output: remote_scan.id, remote_scan.val, remote_scan.num
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: 699 bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=20 loops=1)
+               Output: remote_scan.id, remote_scan.val, remote_scan.num
+               ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
+                     Output: remote_scan.id, remote_scan.val, remote_scan.num
+                     Task Count: 4
+                     Tuple data received from nodes: 1673 bytes
+                     Tasks Shown: One of 4
+                     ->  Task
+                           Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
+                           Tuple data received from node: 419 bytes
+                           Node: host=localhost port=xxxxx dbname=regression
+                           ->  Limit (actual rows=20 loops=1)
+                                 Output: id, val, num
+                                 ->  Sort (actual rows=20 loops=1)
+                                       Output: id, val, num
+                                       Sort Key: sorted_merge_test.id
+                                       Sort Method: quicksort  Memory: 26kB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                             Output: id, val, num
+   Task Count: 1
+   Tuple data received from nodes: 101 bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT id, val, num FROM (SELECT intermediate_result.id, intermediate_result.val, intermediate_result.num FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text, num numeric)) cte WHERE (num OPERATOR(pg_catalog.>) (10)::numeric) ORDER BY id LIMIT 5
+         Tuple data received from node: 101 bytes
+         Node: host=localhost port=xxxxx dbname=regression
+         ->  Limit (actual rows=5 loops=1)
+               Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+               ->  Sort (actual rows=5 loops=1)
+                     Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+                     Sort Key: intermediate_result.id
+                     Sort Method: top-N heapsort  Memory: 25kB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=18 loops=1)
+                           Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+                           Filter: (intermediate_result.num > '10'::numeric)
+                           Rows Removed by Filter: 2
+(42 rows)
+
+-- =================================================================
+-- Cleanup
+-- =================================================================
+SET citus.enable_sorted_merge TO off;
+RESET citus.enable_streaming_sorted_merge;
+-- Cleanup
+DROP TABLE sorted_merge_test;
+DROP TABLE sorted_merge_events;
diff --git a/src/test/regress/sql/multi_orderby_pushdown.sql b/src/test/regress/sql/multi_orderby_pushdown.sql
index cc2bb87377f..4fb4f0cab32 100644
--- a/src/test/regress/sql/multi_orderby_pushdown.sql
+++ b/src/test/regress/sql/multi_orderby_pushdown.sql
@@ -10,42 +10,6 @@
 -- when any node in the cluster acts as coordinator.
 --
 
-SET citus.next_shard_id TO 960000;
-
--- =================================================================
--- Setup: create test tables
--- =================================================================
-
-CREATE TABLE sorted_merge_test (
-    id int,
-    val text,
-    num numeric,
-    ts timestamptz DEFAULT now()
-);
-SELECT create_distributed_table('sorted_merge_test', 'id');
-
--- Insert 100 rows + NULLs + duplicates
-INSERT INTO sorted_merge_test (id, val, num)
-SELECT i, 'val_' || i, (i * 1.5)::numeric
-FROM generate_series(1, 100) i;
-
-INSERT INTO sorted_merge_test (id, val, num) VALUES (101, NULL, NULL);
-INSERT INTO sorted_merge_test (id, val, num) VALUES (102, NULL, NULL);
-INSERT INTO sorted_merge_test (id, val, num) VALUES (200, 'dup_a', 10.5);
-INSERT INTO sorted_merge_test (id, val, num) VALUES (201, 'dup_b', 10.5);
-INSERT INTO sorted_merge_test (id, val, num) VALUES (202, 'dup_c', 10.5);
-
--- Second table for join tests
-CREATE TABLE sorted_merge_events (
-    id int,
-    event_type text,
-    event_val int
-);
-SELECT create_distributed_table('sorted_merge_events', 'id');
-
-INSERT INTO sorted_merge_events
-SELECT i % 50 + 1, CASE WHEN i % 3 = 0 THEN 'click' WHEN i % 3 = 1 THEN 'view' ELSE 'buy' END, i
-FROM generate_series(1, 200) i;
 
 -- =================================================================
 -- 1. GUC basics
@@ -361,6 +325,16 @@ FETCH 2 FROM sorted_cursor;
 CLOSE sorted_cursor;
 COMMIT;
 
+-- G3b: SCROLL cursor with backward scan
+SET citus.enable_sorted_merge TO on;
+BEGIN;
+DECLARE sorted_scroll_cursor SCROLL CURSOR FOR SELECT id FROM sorted_merge_test ORDER BY id;
+FETCH 3 FROM sorted_scroll_cursor;
+FETCH BACKWARD 1 FROM sorted_scroll_cursor;
+FETCH 2 FROM sorted_scroll_cursor;
+CLOSE sorted_scroll_cursor;
+COMMIT;
+
 -- G4: EXPLAIN ANALYZE (sorted merge skipped for EXPLAIN ANALYZE)
 SET citus.enable_sorted_merge TO on;
 SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5');
@@ -821,5 +795,3 @@ SET citus.enable_sorted_merge TO off;
 -- =================================================================
 
 SET citus.enable_sorted_merge TO off;
-DROP TABLE sorted_merge_test;
-DROP TABLE sorted_merge_events;
diff --git a/src/test/regress/sql/multi_orderby_pushdown_streaming.sql b/src/test/regress/sql/multi_orderby_pushdown_streaming.sql
index e7faed04373..10c20e26c81 100644
--- a/src/test/regress/sql/multi_orderby_pushdown_streaming.sql
+++ b/src/test/regress/sql/multi_orderby_pushdown_streaming.sql
@@ -1,14 +1,24 @@
 --
--- MULTI_SORTED_MERGE_STREAMING
+-- MULTI_ORDERBY_PUSHDOWN_STREAMING
 --
--- Runs the same test cases as multi_orderby_pushdown.sql but with the
--- streaming sorted merge adapter enabled via the GUC. This validates
--- that the streaming code path produces identical results to the eager
--- merge path.
+-- Runs the sorted merge test suite (multi_orderby_pushdown.sql) twice:
+-- first with the default eager-merge path, then with the streaming
+-- adapter enabled via citus.enable_streaming_sorted_merge. Both runs
+-- share the same setup tables and must produce identical results
+-- (except for the G3 backward-scan test, where the streaming adapter's
+-- forward-only cursor correctly errors on FETCH BACKWARD).
 --
 
-SET citus.enable_streaming_sorted_merge TO on;
+\i sql/setup_multi_orderby_pushdown.sql
 
+-- Run 1: eager merge (default)
 \i sql/multi_orderby_pushdown.sql
 
+-- Run 2: streaming adapter
+SET citus.enable_streaming_sorted_merge TO on;
+\i sql/multi_orderby_pushdown.sql
 RESET citus.enable_streaming_sorted_merge;
+
+-- Cleanup
+DROP TABLE sorted_merge_test;
+DROP TABLE sorted_merge_events;
diff --git a/src/test/regress/sql/setup_multi_orderby_pushdown.sql b/src/test/regress/sql/setup_multi_orderby_pushdown.sql
new file mode 100644
index 00000000000..a1c6e6c5976
--- /dev/null
+++ b/src/test/regress/sql/setup_multi_orderby_pushdown.sql
@@ -0,0 +1,45 @@
+--
+-- SETUP_MULTI_ORDERBY_PUSHDOWN
+--
+-- Creates the test tables and data used by multi_orderby_pushdown.sql
+-- and its variants (e.g., multi_orderby_pushdown_streaming.sql).
+-- This file is meant to be included via \i from test files that need
+-- these tables.
+--
+
+SET citus.next_shard_id TO 960000;
+
+-- =================================================================
+-- Setup: create test tables
+-- =================================================================
+
+CREATE TABLE sorted_merge_test (
+    id int,
+    val text,
+    num numeric,
+    ts timestamptz DEFAULT now()
+);
+SELECT create_distributed_table('sorted_merge_test', 'id');
+
+-- Insert 100 rows + NULLs + duplicates
+INSERT INTO sorted_merge_test (id, val, num)
+SELECT i, 'val_' || i, (i * 1.5)::numeric
+FROM generate_series(1, 100) i;
+
+INSERT INTO sorted_merge_test (id, val, num) VALUES (101, NULL, NULL);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (102, NULL, NULL);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (200, 'dup_a', 10.5);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (201, 'dup_b', 10.5);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (202, 'dup_c', 10.5);
+
+-- Second table for join tests
+CREATE TABLE sorted_merge_events (
+    id int,
+    event_type text,
+    event_val int
+);
+SELECT create_distributed_table('sorted_merge_events', 'id');
+
+INSERT INTO sorted_merge_events
+SELECT i % 50 + 1, CASE WHEN i % 3 = 0 THEN 'click' WHEN i % 3 = 1 THEN 'view' ELSE 'buy' END, i
+FROM generate_series(1, 200) i;

From 1ac461709ec7cb3b2ffc52e1c823dbe91ef48f4f Mon Sep 17 00:00:00 2001
From: Neil Deshpande <ndeshpan@microsoft.com>
Date: Mon, 13 Apr 2026 17:53:04 +0000
Subject: [PATCH 5/7] Change schedule to use new test instead

---
 src/test/regress/multi_schedule | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule
index 06b482ff5c7..6d18db9094c 100644
--- a/src/test/regress/multi_schedule
+++ b/src/test/regress/multi_schedule
@@ -93,7 +93,7 @@ test: multi_reference_table multi_select_for_update relation_access_tracking pg1
 test: custom_aggregate_support aggregate_support tdigest_aggregate_support
 test: multi_average_expression multi_working_columns multi_having_pushdown having_subquery
 test: multi_array_agg multi_limit_clause multi_orderby_limit_pushdown
-test: multi_orderby_pushdown
+test: multi_orderby_pushdown_streaming
 test: multi_jsonb_agg multi_jsonb_object_agg multi_json_agg multi_json_object_agg bool_agg ch_bench_having chbenchmark_all_queries expression_reference_join anonymous_columns
 test: ch_bench_subquery_repartition
 test: subscripting_op

From a25898e61106c3319bc986ac419e4b202ca660a0 Mon Sep 17 00:00:00 2001
From: Neil Deshpande <ndeshpan@microsoft.com>
Date: Tue, 14 Apr 2026 21:20:14 +0000
Subject: [PATCH 6/7] Fix up test output due to rebase

---
 .../multi_orderby_pushdown_streaming.out      | 6408 +++++++++++------
 1 file changed, 4283 insertions(+), 2125 deletions(-)

diff --git a/src/test/regress/expected/multi_orderby_pushdown_streaming.out b/src/test/regress/expected/multi_orderby_pushdown_streaming.out
index b000e7a0c5f..263e93f08f5 100644
--- a/src/test/regress/expected/multi_orderby_pushdown_streaming.out
+++ b/src/test/regress/expected/multi_orderby_pushdown_streaming.out
@@ -66,6 +66,10 @@ FROM generate_series(1, 200) i;
 -- planner eligibility logic. Verifies that enabling the GUC does not
 -- introduce regressions for any query pattern.
 --
+-- MX verification: this test has been verified to pass with zero diffs
+-- under check-base-mx (MX mode), confirming sorted merge works correctly
+-- when any node in the cluster acts as coordinator.
+--
 -- =================================================================
 -- 1. GUC basics
 -- =================================================================
@@ -88,341 +92,337 @@ SET citus.enable_sorted_merge TO off;
 -- =================================================================
 SET citus.enable_sorted_merge TO on;
 -- A1: ORDER BY distribution column
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val FROM sorted_merge_test ORDER BY id;
-                                                 QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY id');
+                                               explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.val
-   Task Count: 4
-   Tuple data received from nodes: 1027 bytes
-   Tasks Shown: One of 4
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
    ->  Task
          Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id
-         Tuple data received from node: 255 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
                Output: id, val
                Sort Key: sorted_merge_test.id
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                      Output: id, val
-(15 rows)
+(16 rows)
 
 -- A2: ORDER BY DESC
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id FROM sorted_merge_test ORDER BY id DESC;
-                                                 QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id FROM sorted_merge_test ORDER BY id DESC');
+                                               explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id
-   Task Count: 4
-   Tuple data received from nodes: 420 bytes
-   Tasks Shown: One of 4
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
    ->  Task
          Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id DESC
-         Tuple data received from node: 104 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
                Output: id
                Sort Key: sorted_merge_test.id DESC
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                      Output: id
-(15 rows)
+(16 rows)
 
 -- A3: ORDER BY DESC NULLS LAST
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, num FROM sorted_merge_test ORDER BY num DESC NULLS LAST;
-                                                          QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, num FROM sorted_merge_test ORDER BY num DESC NULLS LAST');
+                                                        explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.num
-   Task Count: 4
-   Tuple data received from nodes: 1556 bytes
-   Tasks Shown: One of 4
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
    ->  Task
          Query: SELECT id, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY num DESC NULLS LAST
-         Tuple data received from node: 392 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
                Output: id, num
                Sort Key: sorted_merge_test.num DESC NULLS LAST
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                      Output: id, num
-(15 rows)
+(16 rows)
 
 -- A4: ORDER BY non-distribution column
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val FROM sorted_merge_test ORDER BY val;
-                                                  QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY val');
+                                                explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.val
-   Task Count: 4
-   Tuple data received from nodes: 1027 bytes
-   Tasks Shown: One of 4
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
    ->  Task
          Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY val
-         Tuple data received from node: 255 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
                Output: id, val
                Sort Key: sorted_merge_test.val
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                      Output: id, val
-(15 rows)
+(16 rows)
 
 -- A5: Multi-column ORDER BY
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val FROM sorted_merge_test ORDER BY id, val;
-                                                    QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY id, val');
+                                                  explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.val
-   Task Count: 4
-   Tuple data received from nodes: 1027 bytes
-   Tasks Shown: One of 4
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
    ->  Task
          Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id, val
-         Tuple data received from node: 255 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
                Output: id, val
                Sort Key: sorted_merge_test.id, sorted_merge_test.val
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                      Output: id, val
-(15 rows)
+(16 rows)
 
 -- A6: Mixed directions
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val, num FROM sorted_merge_test ORDER BY id ASC, num DESC;
-                                                         QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val, num FROM sorted_merge_test ORDER BY id ASC, num DESC');
+                                                       explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.val, remote_scan.num
-   Task Count: 4
-   Tuple data received from nodes: 2163 bytes
-   Tasks Shown: One of 4
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
    ->  Task
          Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id, num DESC
-         Tuple data received from node: 543 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
                Output: id, val, num
                Sort Key: sorted_merge_test.id, sorted_merge_test.num DESC
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                      Output: id, val, num
-(15 rows)
+(16 rows)
 
 -- A7: GROUP BY dist_col ORDER BY dist_col
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id;
-                                                              QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id');
+                                                            explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.count
-   Task Count: 4
-   Tuple data received from nodes: 1260 bytes
-   Tasks Shown: One of 4
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
    ->  Task
          Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id
-         Tuple data received from node: 312 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
                Output: id, (count(*))
                Sort Key: sorted_merge_test.id
-               Sort Method: quicksort  Memory: 25kB
-               ->  HashAggregate (actual rows=26 loops=1)
+               Sort Method: quicksort  Memory: NkB
+               ->  HashAggregate (actual rows=N loops=N)
                      Output: id, count(*)
                      Group Key: sorted_merge_test.id
-                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                            Output: id, val, num, ts
-(19 rows)
+(20 rows)
 
 -- A8: WHERE clause + ORDER BY
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val FROM sorted_merge_test WHERE num > 50 ORDER BY id;
-                                                                    QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test WHERE num > 50 ORDER BY id');
+                                                                  explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=67 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.val
-   Task Count: 4
-   Tuple data received from nodes: 671 bytes
-   Tasks Shown: One of 4
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
    ->  Task
-         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE (num OPERATOR(pg_catalog.>) '50'::numeric) ORDER BY id
-         Tuple data received from node: 130 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=13 loops=1)
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE (num OPERATOR(pg_catalog.>) 'N'::numeric) ORDER BY id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
                Output: id, val
                Sort Key: sorted_merge_test.id
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=13 loops=1)
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                      Output: id, val
-                     Filter: (sorted_merge_test.num > '50'::numeric)
-                     Rows Removed by Filter: 13
-(17 rows)
+                     Filter: (sorted_merge_test.num > 'N'::numeric)
+                     Rows Removed by Filter: N
+(18 rows)
 
 -- A9: Expression in ORDER BY (non-aggregate)
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, num FROM sorted_merge_test ORDER BY id + 1;
-                                                                                        QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, num FROM sorted_merge_test ORDER BY id + 1');
+                                                                                      explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.num, remote_scan.worker_column_3
-   Task Count: 4
-   Tuple data received from nodes: 1976 bytes
-   Tasks Shown: One of 4
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
    ->  Task
-         Query: SELECT id, num, (id OPERATOR(pg_catalog.+) 1) AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (id OPERATOR(pg_catalog.+) 1)
-         Tuple data received from node: 496 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
-               Output: id, num, ((id + 1))
-               Sort Key: ((sorted_merge_test.id + 1))
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                     Output: id, num, (id + 1)
-(15 rows)
+         Query: SELECT id, num, (id OPERATOR(pg_catalog.+) N) AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (id OPERATOR(pg_catalog.+) N)
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, num, ((id + N))
+               Sort Key: ((sorted_merge_test.id + N))
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, num, (id + N)
+(16 rows)
 
 -- A10: ORDER BY with LIMIT (existing pushdown, verify no regression)
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
-                                                           QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5');
+                                                         explain_filter
 ---------------------------------------------------------------------
- Limit (actual rows=5 loops=1)
+ Limit (actual rows=N loops=N)
    Output: remote_scan.id
-   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
          Output: remote_scan.id
-         Task Count: 4
-         Tuple data received from nodes: 80 bytes
-         Tasks Shown: One of 4
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
          ->  Task
-               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
-               Tuple data received from node: 20 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Limit (actual rows=5 loops=1)
+               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
                      Output: id
-                     ->  Sort (actual rows=5 loops=1)
+                     ->  Sort (actual rows=N loops=N)
                            Output: id
                            Sort Key: sorted_merge_test.id
-                           Sort Method: top-N heapsort  Memory: 25kB
-                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                           Sort Method: top-N heapsort  Memory: NkB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                  Output: id
-(19 rows)
+(20 rows)
 
 -- =================================================================
 -- Category B: Ineligibility — sort NOT pushed for merge
 -- =================================================================
 SET citus.enable_sorted_merge TO on;
 -- B1: ORDER BY count(*)
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*);
-                                                           QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*)');
+                                                         explain_filter
 ---------------------------------------------------------------------
- Sort (actual rows=105 loops=1)
+ Sort (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.count
    Sort Key: remote_scan.count
-   Sort Method: quicksort  Memory: 28kB
-   ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Sort Method: quicksort  Memory: NkB
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
          Output: remote_scan.id, remote_scan.count
-         Task Count: 4
-         Tuple data received from nodes: 1260 bytes
-         Tasks Shown: One of 4
+         Task Count: N
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
          ->  Task
                Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id
-               Tuple data received from node: 312 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  HashAggregate (actual rows=26 loops=1)
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  HashAggregate (actual rows=N loops=N)
                      Output: id, count(*)
                      Group Key: sorted_merge_test.id
-                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                            Output: id, val, num, ts
 (19 rows)
 
 -- B2: ORDER BY avg(col)
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, avg(num) FROM sorted_merge_test GROUP BY id ORDER BY avg(num);
-                                                          QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, avg(num) FROM sorted_merge_test GROUP BY id ORDER BY avg(num)');
+                                                        explain_filter
 ---------------------------------------------------------------------
- Sort (actual rows=105 loops=1)
+ Sort (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.avg
    Sort Key: remote_scan.avg
-   Sort Method: quicksort  Memory: 28kB
-   ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Sort Method: quicksort  Memory: NkB
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
          Output: remote_scan.id, remote_scan.avg
-         Task Count: 4
-         Tuple data received from nodes: 1556 bytes
-         Tasks Shown: One of 4
+         Task Count: N
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
          ->  Task
                Query: SELECT id, avg(num) AS avg FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id
-               Tuple data received from node: 392 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  HashAggregate (actual rows=26 loops=1)
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  HashAggregate (actual rows=N loops=N)
                      Output: id, avg(num)
                      Group Key: sorted_merge_test.id
-                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                            Output: id, val, num, ts
 (19 rows)
 
 -- B3: GROUP BY non-dist col, ORDER BY non-dist col
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT val, count(*) FROM sorted_merge_test GROUP BY val ORDER BY val;
-                                                               QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT val, count(*) FROM sorted_merge_test GROUP BY val ORDER BY val');
+                                                             explain_filter
 ---------------------------------------------------------------------
- Sort (actual rows=104 loops=1)
-   Output: remote_scan.val, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
+ Sort (actual rows=N loops=N)
+   Output: remote_scan.val, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, 'N'::bigint))
    Sort Key: remote_scan.val
-   Sort Method: quicksort  Memory: 28kB
-   ->  HashAggregate (actual rows=104 loops=1)
-         Output: remote_scan.val, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
+   Sort Method: quicksort  Memory: NkB
+   ->  HashAggregate (actual rows=N loops=N)
+         Output: remote_scan.val, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, 'N'::bigint)
          Group Key: remote_scan.val
-         ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+         ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
                Output: remote_scan.val, remote_scan.count
-               Task Count: 4
-               Tuple data received from nodes: 1447 bytes
-               Tasks Shown: One of 4
+               Task Count: N
+               Tuple data received from nodes: N bytes
+               Tasks Shown: One of N
                ->  Task
                      Query: SELECT val, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY val
-                     Tuple data received from node: 359 bytes
-                     Node: host=localhost port=xxxxx dbname=regression
-                     ->  HashAggregate (actual rows=26 loops=1)
+                     Tuple data received from node: N bytes
+                     Node: host=localhost port=N dbname=regression
+                     ->  HashAggregate (actual rows=N loops=N)
                            Output: val, count(*)
                            Group Key: sorted_merge_test.val
-                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                  Output: id, val, num, ts
 (23 rows)
 
 -- B4: GROUP BY non-dist col, ORDER BY aggregate
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT val, count(*) FROM sorted_merge_test GROUP BY val ORDER BY count(*);
-                                                               QUERY PLAN
----------------------------------------------------------------------
- Sort (actual rows=104 loops=1)
-   Output: remote_scan.val, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
-   Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
-   Sort Method: quicksort  Memory: 28kB
-   ->  HashAggregate (actual rows=104 loops=1)
-         Output: remote_scan.val, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT val, count(*) FROM sorted_merge_test GROUP BY val ORDER BY count(*)');
+                                                             explain_filter
+---------------------------------------------------------------------
+ Sort (actual rows=N loops=N)
+   Output: remote_scan.val, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, 'N'::bigint))
+   Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, 'N'::bigint))
+   Sort Method: quicksort  Memory: NkB
+   ->  HashAggregate (actual rows=N loops=N)
+         Output: remote_scan.val, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, 'N'::bigint)
          Group Key: remote_scan.val
-         ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+         ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
                Output: remote_scan.val, remote_scan.count
-               Task Count: 4
-               Tuple data received from nodes: 1447 bytes
-               Tasks Shown: One of 4
+               Task Count: N
+               Tuple data received from nodes: N bytes
+               Tasks Shown: One of N
                ->  Task
                      Query: SELECT val, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY val
-                     Tuple data received from node: 359 bytes
-                     Node: host=localhost port=xxxxx dbname=regression
-                     ->  HashAggregate (actual rows=26 loops=1)
+                     Tuple data received from node: N bytes
+                     Node: host=localhost port=N dbname=regression
+                     ->  HashAggregate (actual rows=N loops=N)
                            Output: val, count(*)
                            Group Key: sorted_merge_test.val
-                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                  Output: id, val, num, ts
 (23 rows)
 
@@ -901,123 +901,121 @@ SELECT id FROM sorted_merge_test WHERE false ORDER BY id;
 -- =================================================================
 -- F1: Simple LIMIT + ORDER BY: plan unchanged between GUC off and on
 SET citus.enable_sorted_merge TO off;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
-                                                              QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5');
+                                                            explain_filter
 ---------------------------------------------------------------------
- Limit (actual rows=5 loops=1)
+ Limit (actual rows=N loops=N)
    Output: remote_scan.id
-   ->  Sort (actual rows=5 loops=1)
+   ->  Sort (actual rows=N loops=N)
          Output: remote_scan.id
          Sort Key: remote_scan.id
-         Sort Method: top-N heapsort  Memory: 25kB
-         ->  Custom Scan (Citus Adaptive) (actual rows=20 loops=1)
+         Sort Method: top-N heapsort  Memory: NkB
+         ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
                Output: remote_scan.id
-               Task Count: 4
-               Tuple data received from nodes: 80 bytes
-               Tasks Shown: One of 4
+               Task Count: N
+               Tuple data received from nodes: N bytes
+               Tasks Shown: One of N
                ->  Task
-                     Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
-                     Tuple data received from node: 20 bytes
-                     Node: host=localhost port=xxxxx dbname=regression
-                     ->  Limit (actual rows=5 loops=1)
+                     Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+                     Tuple data received from node: N bytes
+                     Node: host=localhost port=N dbname=regression
+                     ->  Limit (actual rows=N loops=N)
                            Output: id
-                           ->  Sort (actual rows=5 loops=1)
+                           ->  Sort (actual rows=N loops=N)
                                  Output: id
                                  Sort Key: sorted_merge_test.id
-                                 Sort Method: top-N heapsort  Memory: 25kB
-                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 Sort Method: top-N heapsort  Memory: NkB
+                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                        Output: id
 (23 rows)
 
 SET citus.enable_sorted_merge TO on;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
-                                                           QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5');
+                                                         explain_filter
 ---------------------------------------------------------------------
- Limit (actual rows=5 loops=1)
+ Limit (actual rows=N loops=N)
    Output: remote_scan.id
-   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
          Output: remote_scan.id
-         Task Count: 4
-         Tuple data received from nodes: 80 bytes
-         Tasks Shown: One of 4
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
          ->  Task
-               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
-               Tuple data received from node: 20 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Limit (actual rows=5 loops=1)
+               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
                      Output: id
-                     ->  Sort (actual rows=5 loops=1)
+                     ->  Sort (actual rows=N loops=N)
                            Output: id
                            Sort Key: sorted_merge_test.id
-                           Sort Method: top-N heapsort  Memory: 25kB
-                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                           Sort Method: top-N heapsort  Memory: NkB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                  Output: id
-(19 rows)
+(20 rows)
 
 -- F2: GROUP BY dist_col + ORDER BY + LIMIT
 SET citus.enable_sorted_merge TO off;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
-                                                                             QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5');
+                                                                           explain_filter
 ---------------------------------------------------------------------
- Limit (actual rows=5 loops=1)
+ Limit (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.count
-   ->  Sort (actual rows=5 loops=1)
+   ->  Sort (actual rows=N loops=N)
          Output: remote_scan.id, remote_scan.count
          Sort Key: remote_scan.id
-         Sort Method: top-N heapsort  Memory: 25kB
-         ->  Custom Scan (Citus Adaptive) (actual rows=20 loops=1)
+         Sort Method: top-N heapsort  Memory: NkB
+         ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
                Output: remote_scan.id, remote_scan.count
-               Task Count: 4
-               Tuple data received from nodes: 240 bytes
-               Tasks Shown: One of 4
+               Task Count: N
+               Tuple data received from nodes: N bytes
+               Tasks Shown: One of N
                ->  Task
-                     Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id LIMIT '5'::bigint
-                     Tuple data received from node: 60 bytes
-                     Node: host=localhost port=xxxxx dbname=regression
-                     ->  Limit (actual rows=5 loops=1)
+                     Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id LIMIT 'N'::bigint
+                     Tuple data received from node: N bytes
+                     Node: host=localhost port=N dbname=regression
+                     ->  Limit (actual rows=N loops=N)
                            Output: id, (count(*))
-                           ->  Sort (actual rows=5 loops=1)
+                           ->  Sort (actual rows=N loops=N)
                                  Output: id, (count(*))
                                  Sort Key: sorted_merge_test.id
-                                 Sort Method: top-N heapsort  Memory: 25kB
-                                 ->  HashAggregate (actual rows=26 loops=1)
+                                 Sort Method: top-N heapsort  Memory: NkB
+                                 ->  HashAggregate (actual rows=N loops=N)
                                        Output: id, count(*)
                                        Group Key: sorted_merge_test.id
-                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                              Output: id, val, num, ts
 (27 rows)
 
 SET citus.enable_sorted_merge TO on;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
-                                                                          QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5');
+                                                                        explain_filter
 ---------------------------------------------------------------------
- Limit (actual rows=5 loops=1)
+ Limit (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.count
-   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
          Output: remote_scan.id, remote_scan.count
-         Task Count: 4
-         Tuple data received from nodes: 240 bytes
-         Tasks Shown: One of 4
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
          ->  Task
-               Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id LIMIT '5'::bigint
-               Tuple data received from node: 60 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Limit (actual rows=5 loops=1)
+               Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
                      Output: id, (count(*))
-                     ->  Sort (actual rows=5 loops=1)
+                     ->  Sort (actual rows=N loops=N)
                            Output: id, (count(*))
                            Sort Key: sorted_merge_test.id
-                           Sort Method: top-N heapsort  Memory: 25kB
-                           ->  HashAggregate (actual rows=26 loops=1)
+                           Sort Method: top-N heapsort  Memory: NkB
+                           ->  HashAggregate (actual rows=N loops=N)
                                  Output: id, count(*)
                                  Group Key: sorted_merge_test.id
-                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                        Output: id, val, num, ts
-(23 rows)
+(24 rows)
 
 -- F3: ORDER BY aggregate + LIMIT (not eligible for merge)
 SET citus.enable_sorted_merge TO off;
@@ -1047,51 +1045,52 @@ SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, i
 -- =================================================================
 -- G1: Sort elision verification — coordinator Sort node absent
 SET citus.enable_sorted_merge TO off;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val FROM sorted_merge_test ORDER BY id;
-                                                 QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY id');
+                                              explain_filter
 ---------------------------------------------------------------------
- Sort (actual rows=105 loops=1)
+ Sort (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.val
    Sort Key: remote_scan.id
-   Sort Method: quicksort  Memory: 28kB
-   ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+   Sort Method: quicksort  Memory: NkB
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
          Output: remote_scan.id, remote_scan.val
-         Task Count: 4
-         Tuple data received from nodes: 1027 bytes
-         Tasks Shown: One of 4
+         Task Count: N
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
          ->  Task
                Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true
-               Tuple data received from node: 255 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                      Output: id, val
 (15 rows)
 
 SET citus.enable_sorted_merge TO on;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val FROM sorted_merge_test ORDER BY id;
-                                                 QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY id');
+                                               explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.val
-   Task Count: 4
-   Tuple data received from nodes: 1027 bytes
-   Tasks Shown: One of 4
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
    ->  Task
          Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id
-         Tuple data received from node: 255 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
                Output: id, val
                Sort Key: sorted_merge_test.id
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                      Output: id, val
-(15 rows)
+(16 rows)
 
 -- G2a: PREPARE with merge ON, EXECUTE after turning OFF
--- Plan-time decision is baked in — cached plan must still merge correctly
+-- Plan-time decision is baked in — cached plan must still merge correctly.
+-- Execute 6+ times to trigger PostgreSQL's generic plan caching, then
+-- verify the plan shape is preserved after toggling the GUC.
 SET citus.enable_sorted_merge TO on;
 PREPARE merge_on_stmt AS SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
 EXECUTE merge_on_stmt;
@@ -1109,7 +1108,100 @@ EXECUTE merge_on_stmt;
  10 | val_10
 (10 rows)
 
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+-- Verify plan shape after caching — no Sort above CustomScan
+EXPLAIN (COSTS OFF) EXECUTE merge_on_stmt;
+                                      QUERY PLAN
+---------------------------------------------------------------------
+ Limit
+   ->  Custom Scan (Citus Adaptive)
+         Task Count: 4
+         Merge Method: sorted merge
+         Tasks Shown: One of 4
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit
+                     ->  Sort
+                           Sort Key: id
+                           ->  Seq Scan on sorted_merge_test_960000 sorted_merge_test
+(11 rows)
+
 SET citus.enable_sorted_merge TO off;
+-- Cached plan retains the sorted merge decision from planning time
 EXECUTE merge_on_stmt;
  id |  val
 ---------------------------------------------------------------------
@@ -1125,9 +1217,25 @@ EXECUTE merge_on_stmt;
  10 | val_10
 (10 rows)
 
+EXPLAIN (COSTS OFF) EXECUTE merge_on_stmt;
+                                      QUERY PLAN
+---------------------------------------------------------------------
+ Limit
+   ->  Custom Scan (Citus Adaptive)
+         Task Count: 4
+         Merge Method: sorted merge
+         Tasks Shown: One of 4
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit
+                     ->  Sort
+                           Sort Key: id
+                           ->  Seq Scan on sorted_merge_test_960000 sorted_merge_test
+(11 rows)
+
 DEALLOCATE merge_on_stmt;
 -- G2b: PREPARE with merge OFF, EXECUTE after turning ON
--- Cached plan has Sort node — must still return sorted results
+-- Cached plan has Sort node — must still return sorted results.
 SET citus.enable_sorted_merge TO off;
 PREPARE merge_off_stmt AS SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
 EXECUTE merge_off_stmt;
@@ -1145,7 +1253,101 @@ EXECUTE merge_off_stmt;
  10 | val_10
 (10 rows)
 
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+-- Verify plan shape after caching — Sort above CustomScan
+EXPLAIN (COSTS OFF) EXECUTE merge_off_stmt;
+                                         QUERY PLAN
+---------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: remote_scan.id
+         ->  Custom Scan (Citus Adaptive)
+               Task Count: 4
+               Tasks Shown: One of 4
+               ->  Task
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  Limit
+                           ->  Sort
+                                 Sort Key: id
+                                 ->  Seq Scan on sorted_merge_test_960000 sorted_merge_test
+(12 rows)
+
 SET citus.enable_sorted_merge TO on;
+-- Cached plan retains the non-merge decision from planning time
 EXECUTE merge_off_stmt;
  id |  val
 ---------------------------------------------------------------------
@@ -1161,6 +1363,23 @@ EXECUTE merge_off_stmt;
  10 | val_10
 (10 rows)
 
+EXPLAIN (COSTS OFF) EXECUTE merge_off_stmt;
+                                         QUERY PLAN
+---------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: remote_scan.id
+         ->  Custom Scan (Citus Adaptive)
+               Task Count: 4
+               Tasks Shown: One of 4
+               ->  Task
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  Limit
+                           ->  Sort
+                                 Sort Key: id
+                                 ->  Seq Scan on sorted_merge_test_960000 sorted_merge_test
+(12 rows)
+
 DEALLOCATE merge_off_stmt;
 -- G3: Cursor with backward scan
 SET citus.enable_sorted_merge TO on;
@@ -1218,30 +1437,30 @@ CLOSE sorted_scroll_cursor;
 COMMIT;
 -- G4: EXPLAIN ANALYZE (sorted merge skipped for EXPLAIN ANALYZE)
 SET citus.enable_sorted_merge TO on;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
-                                                           QUERY PLAN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5');
+                                                         explain_filter
 ---------------------------------------------------------------------
- Limit (actual rows=5 loops=1)
+ Limit (actual rows=N loops=N)
    Output: remote_scan.id
-   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
          Output: remote_scan.id
-         Task Count: 4
-         Tuple data received from nodes: 80 bytes
-         Tasks Shown: One of 4
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
          ->  Task
-               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
-               Tuple data received from node: 20 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Limit (actual rows=5 loops=1)
+               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
                      Output: id
-                     ->  Sort (actual rows=5 loops=1)
+                     ->  Sort (actual rows=N loops=N)
                            Output: id
                            Sort Key: sorted_merge_test.id
-                           Sort Method: top-N heapsort  Memory: 25kB
-                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                           Sort Method: top-N heapsort  Memory: NkB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                  Output: id
-(19 rows)
+(20 rows)
 
 -- G5: ORDER BY aggregate + LIMIT — crash regression test
 -- Previously caused SIGSEGV when sorted merge was enabled because
@@ -1442,37 +1661,36 @@ SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
 -- =================================================================
 SET citus.enable_sorted_merge TO on;
 -- H1 EXPLAIN
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-WITH ordered_cte AS (
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) WITH ordered_cte AS (
     SELECT id, val FROM sorted_merge_test ORDER BY id
 )
-SELECT * FROM ordered_cte ORDER BY id LIMIT 5;
-                                                                                                                                                                                     QUERY PLAN
+SELECT * FROM ordered_cte ORDER BY id LIMIT 5');
+                                                                                                                                                                                   explain_filter
 ---------------------------------------------------------------------
- Limit (actual rows=5 loops=1)
+ Limit (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.val
-   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
          Output: remote_scan.id, remote_scan.val
-         Task Count: 4
-         Tuple data received from nodes: 191 bytes
-         Tasks Shown: One of 4
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
          ->  Task
-               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT ordered_cte.id AS worker_column_1, ordered_cte.val AS worker_column_2 FROM (SELECT sorted_merge_test.id, sorted_merge_test.val FROM public.sorted_merge_test_960000 sorted_merge_test ORDER BY sorted_merge_test.id) ordered_cte) worker_subquery ORDER BY worker_column_1 LIMIT '5'::bigint
-               Tuple data received from node: 47 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Limit (actual rows=5 loops=1)
+               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT ordered_cte.id AS worker_column_1, ordered_cte.val AS worker_column_2 FROM (SELECT sorted_merge_test.id, sorted_merge_test.val FROM public.sorted_merge_test_960000 sorted_merge_test ORDER BY sorted_merge_test.id) ordered_cte) worker_subquery ORDER BY worker_column_1 LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
                      Output: sorted_merge_test.id, sorted_merge_test.val
-                     ->  Sort (actual rows=5 loops=1)
+                     ->  Sort (actual rows=N loops=N)
                            Output: sorted_merge_test.id, sorted_merge_test.val
                            Sort Key: sorted_merge_test.id
-                           Sort Method: top-N heapsort  Memory: 25kB
-                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                           Sort Method: top-N heapsort  Memory: NkB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                  Output: sorted_merge_test.id, sorted_merge_test.val
-(19 rows)
+(20 rows)
 
 -- H2 EXPLAIN
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-WITH eligible_cte AS (
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) WITH eligible_cte AS (
     SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
 ),
 ineligible_cte AS (
@@ -1480,830 +1698,694 @@ ineligible_cte AS (
 )
 SELECT e.id, e.val, i.cnt
 FROM eligible_cte e JOIN ineligible_cte i ON e.id = i.id
-ORDER BY e.id;
-                                                                                                                                                                                                                                 QUERY PLAN
+ORDER BY e.id');
+                                                                                                                                                                                                                               explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.val, remote_scan.cnt
    ->  Distributed Subplan XXX_1
-         Intermediate Data Size: 397 bytes
+         Intermediate Data Size: N bytes
          Result destination: Write locally
-         ->  Limit (actual rows=20 loops=1)
+         ->  Limit (actual rows=N loops=N)
                Output: remote_scan.id, remote_scan.val
-               ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
+               ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
                      Output: remote_scan.id, remote_scan.val
-                     Task Count: 4
-                     Tuple data received from nodes: 791 bytes
-                     Tasks Shown: One of 4
+                     Task Count: N
+                     Merge Method: sorted merge
+                     Tuple data received from nodes: N bytes
+                     Tasks Shown: One of N
                      ->  Task
-                           Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
-                           Tuple data received from node: 197 bytes
-                           Node: host=localhost port=xxxxx dbname=regression
-                           ->  Limit (actual rows=20 loops=1)
+                           Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+                           Tuple data received from node: N bytes
+                           Node: host=localhost port=N dbname=regression
+                           ->  Limit (actual rows=N loops=N)
                                  Output: id, val
-                                 ->  Sort (actual rows=20 loops=1)
+                                 ->  Sort (actual rows=N loops=N)
                                        Output: id, val
                                        Sort Key: sorted_merge_test.id
-                                       Sort Method: quicksort  Memory: 26kB
-                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                       Sort Method: quicksort  Memory: NkB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                              Output: id, val
    ->  Distributed Subplan XXX_2
-         Intermediate Data Size: 330 bytes
+         Intermediate Data Size: N bytes
          Result destination: Write locally
-         ->  Limit (actual rows=15 loops=1)
+         ->  Limit (actual rows=N loops=N)
                Output: remote_scan.id, remote_scan.cnt
-               ->  Sort (actual rows=15 loops=1)
+               ->  Sort (actual rows=N loops=N)
                      Output: remote_scan.id, remote_scan.cnt
                      Sort Key: remote_scan.cnt DESC, remote_scan.id
                      ->  Custom Scan (Citus Adaptive) (never executed)
                            Output: remote_scan.id, remote_scan.cnt
-                           Task Count: 4
-                           Tuple data received from nodes: 720 bytes
-                           Tasks Shown: One of 4
+                           Task Count: N
+                           Tuple data received from nodes: N bytes
+                           Tasks Shown: One of N
                            ->  Task
-                                 Query: SELECT id, count(*) AS cnt FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY (count(*)) DESC, id LIMIT '15'::bigint
-                                 Tuple data received from node: 180 bytes
-                                 Node: host=localhost port=xxxxx dbname=regression
-                                 ->  Limit (actual rows=15 loops=1)
+                                 Query: SELECT id, count(*) AS cnt FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY (count(*)) DESC, id LIMIT 'N'::bigint
+                                 Tuple data received from node: N bytes
+                                 Node: host=localhost port=N dbname=regression
+                                 ->  Limit (actual rows=N loops=N)
                                        Output: id, (count(*))
-                                       ->  Sort (actual rows=15 loops=1)
+                                       ->  Sort (actual rows=N loops=N)
                                              Output: id, (count(*))
                                              Sort Key: (count(*)) DESC, sorted_merge_test.id
-                                             Sort Method: quicksort  Memory: 26kB
-                                             ->  HashAggregate (actual rows=26 loops=1)
+                                             Sort Method: quicksort  Memory: NkB
+                                             ->  HashAggregate (actual rows=N loops=N)
                                                    Output: id, count(*)
                                                    Group Key: sorted_merge_test.id
-                                                   ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                                   ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                                          Output: id, val, num, ts
-   Task Count: 1
-   Tuple data received from nodes: 87 bytes
+   Task Count: N
+   Tuple data received from nodes: N bytes
    Tasks Shown: All
    ->  Task
          Query: SELECT e.id, e.val, i.cnt FROM ((SELECT intermediate_result.id, intermediate_result.val FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text)) e JOIN (SELECT intermediate_result.id, intermediate_result.cnt FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer, cnt bigint)) i ON ((e.id OPERATOR(pg_catalog.=) i.id))) ORDER BY e.id
-         Tuple data received from node: 87 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Merge Join (actual rows=5 loops=1)
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Merge Join (actual rows=N loops=N)
                Output: intermediate_result.id, intermediate_result.val, intermediate_result_1.cnt
                Merge Cond: (intermediate_result.id = intermediate_result_1.id)
-               ->  Sort (actual rows=6 loops=1)
+               ->  Sort (actual rows=N loops=N)
                      Output: intermediate_result.id, intermediate_result.val
                      Sort Key: intermediate_result.id
-                     Sort Method: quicksort  Memory: 25kB
-                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=20 loops=1)
+                     Sort Method: quicksort  Memory: NkB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=N loops=N)
                            Output: intermediate_result.id, intermediate_result.val
                            Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
-               ->  Sort (actual rows=15 loops=1)
+               ->  Sort (actual rows=N loops=N)
                      Output: intermediate_result_1.cnt, intermediate_result_1.id
                      Sort Key: intermediate_result_1.id
-                     Sort Method: quicksort  Memory: 25kB
-                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 (actual rows=15 loops=1)
+                     Sort Method: quicksort  Memory: NkB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 (actual rows=N loops=N)
                            Output: intermediate_result_1.cnt, intermediate_result_1.id
                            Function Call: read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format)
-(77 rows)
+(78 rows)
 
 -- H3 EXPLAIN
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-WITH top_ids AS (
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) WITH top_ids AS (
     SELECT id FROM sorted_merge_test ORDER BY id LIMIT 20
 )
 SELECT t.id, t.val
 FROM sorted_merge_test t
 JOIN top_ids ON t.id = top_ids.id
 ORDER BY t.id
-LIMIT 10;
-                                                                                                                                                                                                                     QUERY PLAN
+LIMIT 10');
+                                                                                                                                                                                                                   explain_filter
 ---------------------------------------------------------------------
- Limit (actual rows=10 loops=1)
+ Limit (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.val
-   ->  Custom Scan (Citus Adaptive) (actual rows=10 loops=1)
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
          Output: remote_scan.id, remote_scan.val
          ->  Distributed Subplan XXX_1
-               Intermediate Data Size: 200 bytes
-               Result destination: Send to 2 nodes
-               ->  Limit (actual rows=20 loops=1)
+               Intermediate Data Size: N bytes
+               Result destination: Send to N nodes
+               ->  Limit (actual rows=N loops=N)
                      Output: remote_scan.id
-                     ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
+                     ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
                            Output: remote_scan.id
-                           Task Count: 4
-                           Tuple data received from nodes: 320 bytes
-                           Tasks Shown: One of 4
+                           Task Count: N
+                           Merge Method: sorted merge
+                           Tuple data received from nodes: N bytes
+                           Tasks Shown: One of N
                            ->  Task
-                                 Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
-                                 Tuple data received from node: 80 bytes
-                                 Node: host=localhost port=xxxxx dbname=regression
-                                 ->  Limit (actual rows=20 loops=1)
+                                 Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+                                 Tuple data received from node: N bytes
+                                 Node: host=localhost port=N dbname=regression
+                                 ->  Limit (actual rows=N loops=N)
                                        Output: id
-                                       ->  Sort (actual rows=20 loops=1)
+                                       ->  Sort (actual rows=N loops=N)
                                              Output: id
                                              Sort Key: sorted_merge_test.id
-                                             Sort Method: quicksort  Memory: 25kB
-                                             ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                             Sort Method: quicksort  Memory: NkB
+                                             ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                                    Output: id
-         Task Count: 4
-         Tuple data received from nodes: 97 bytes
-         Tasks Shown: One of 4
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
          ->  Task
-               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT t.id AS worker_column_1, t.val AS worker_column_2 FROM (public.sorted_merge_test_960000 t JOIN (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) top_ids ON ((t.id OPERATOR(pg_catalog.=) top_ids.id)))) worker_subquery ORDER BY worker_column_1 LIMIT '10'::bigint
-               Tuple data received from node: 97 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Limit (actual rows=10 loops=1)
+               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT t.id AS worker_column_1, t.val AS worker_column_2 FROM (public.sorted_merge_test_960000 t JOIN (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) top_ids ON ((t.id OPERATOR(pg_catalog.=) top_ids.id)))) worker_subquery ORDER BY worker_column_1 LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
                      Output: t.id, t.val
-                     ->  Merge Join (actual rows=10 loops=1)
+                     ->  Merge Join (actual rows=N loops=N)
                            Output: t.id, t.val
                            Merge Cond: (intermediate_result.id = t.id)
-                           ->  Sort (actual rows=10 loops=1)
+                           ->  Sort (actual rows=N loops=N)
                                  Output: intermediate_result.id
                                  Sort Key: intermediate_result.id
-                                 Sort Method: quicksort  Memory: 25kB
-                                 ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=20 loops=1)
+                                 Sort Method: quicksort  Memory: NkB
+                                 ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=N loops=N)
                                        Output: intermediate_result.id
                                        Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
-                           ->  Sort (actual rows=10 loops=1)
+                           ->  Sort (actual rows=N loops=N)
                                  Output: t.id, t.val
                                  Sort Key: t.id
-                                 Sort Method: quicksort  Memory: 25kB
-                                 ->  Seq Scan on public.sorted_merge_test_960000 t (actual rows=26 loops=1)
+                                 Sort Method: quicksort  Memory: NkB
+                                 ->  Seq Scan on public.sorted_merge_test_960000 t (actual rows=N loops=N)
                                        Output: t.id, t.val
-(51 rows)
+(53 rows)
 
 -- H4 EXPLAIN
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val FROM sorted_merge_test
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test
 WHERE id IN (
     SELECT id FROM sorted_merge_events ORDER BY id LIMIT 10
 )
 ORDER BY id
-LIMIT 5;
-                                                                                                                                                                                                                                          QUERY PLAN
+LIMIT 5');
+                                                                                                                                                                                                                                        explain_filter
 ---------------------------------------------------------------------
- Limit (actual rows=3 loops=1)
+ Limit (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.val
-   ->  Custom Scan (Citus Adaptive) (actual rows=3 loops=1)
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
          Output: remote_scan.id, remote_scan.val
          ->  Distributed Subplan XXX_1
-               Intermediate Data Size: 100 bytes
-               Result destination: Send to 2 nodes
-               ->  Limit (actual rows=10 loops=1)
+               Intermediate Data Size: N bytes
+               Result destination: Send to N nodes
+               ->  Limit (actual rows=N loops=N)
                      Output: remote_scan.id
-                     ->  Custom Scan (Citus Adaptive) (actual rows=40 loops=1)
+                     ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
                            Output: remote_scan.id
-                           Task Count: 4
-                           Tuple data received from nodes: 160 bytes
-                           Tasks Shown: One of 4
+                           Task Count: N
+                           Merge Method: sorted merge
+                           Tuple data received from nodes: N bytes
+                           Tasks Shown: One of N
                            ->  Task
-                                 Query: SELECT id FROM public.sorted_merge_events_960004 sorted_merge_events WHERE true ORDER BY id LIMIT '10'::bigint
-                                 Tuple data received from node: 40 bytes
-                                 Node: host=localhost port=xxxxx dbname=regression
-                                 ->  Limit (actual rows=10 loops=1)
+                                 Query: SELECT id FROM public.sorted_merge_events_960004 sorted_merge_events WHERE true ORDER BY id LIMIT 'N'::bigint
+                                 Tuple data received from node: N bytes
+                                 Node: host=localhost port=N dbname=regression
+                                 ->  Limit (actual rows=N loops=N)
                                        Output: id
-                                       ->  Sort (actual rows=10 loops=1)
+                                       ->  Sort (actual rows=N loops=N)
                                              Output: id
                                              Sort Key: sorted_merge_events.id
-                                             Sort Method: top-N heapsort  Memory: 25kB
-                                             ->  Seq Scan on public.sorted_merge_events_960004 sorted_merge_events (actual rows=56 loops=1)
+                                             Sort Method: top-N heapsort  Memory: NkB
+                                             ->  Seq Scan on public.sorted_merge_events_960004 sorted_merge_events (actual rows=N loops=N)
                                                    Output: id
-         Task Count: 4
-         Tuple data received from nodes: 27 bytes
-         Tasks Shown: One of 4
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
          ->  Task
-               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT sorted_merge_test.id AS worker_column_1, sorted_merge_test.val AS worker_column_2 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE (sorted_merge_test.id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)))) worker_subquery ORDER BY worker_column_1 LIMIT '5'::bigint
-               Tuple data received from node: 27 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Limit (actual rows=3 loops=1)
+               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT sorted_merge_test.id AS worker_column_1, sorted_merge_test.val AS worker_column_2 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE (sorted_merge_test.id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)))) worker_subquery ORDER BY worker_column_1 LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
                      Output: sorted_merge_test.id, sorted_merge_test.val
-                     ->  Sort (actual rows=3 loops=1)
+                     ->  Sort (actual rows=N loops=N)
                            Output: sorted_merge_test.id, sorted_merge_test.val
                            Sort Key: sorted_merge_test.id
-                           Sort Method: quicksort  Memory: 25kB
-                           ->  Hash Semi Join (actual rows=3 loops=1)
+                           Sort Method: quicksort  Memory: NkB
+                           ->  Hash Semi Join (actual rows=N loops=N)
                                  Output: sorted_merge_test.id, sorted_merge_test.val
                                  Hash Cond: (sorted_merge_test.id = intermediate_result.id)
-                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                        Output: sorted_merge_test.id, sorted_merge_test.val, sorted_merge_test.num, sorted_merge_test.ts
-                                 ->  Hash (actual rows=10 loops=1)
+                                 ->  Hash (actual rows=N loops=N)
                                        Output: intermediate_result.id
-                                       ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=10 loops=1)
+                                       ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=N loops=N)
                                              Output: intermediate_result.id
                                              Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
-(50 rows)
+(52 rows)
 
 -- H5 EXPLAIN
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-WITH small_cte AS (
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) WITH small_cte AS (
     SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
 )
-SELECT * FROM small_cte ORDER BY id LIMIT 5;
-                                                                                                                  QUERY PLAN
+SELECT * FROM small_cte ORDER BY id LIMIT 5');
+                                                                                                                explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.val
    ->  Distributed Subplan XXX_1
-         Intermediate Data Size: 397 bytes
+         Intermediate Data Size: N bytes
          Result destination: Write locally
-         ->  Limit (actual rows=20 loops=1)
+         ->  Limit (actual rows=N loops=N)
                Output: remote_scan.id, remote_scan.val
-               ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
+               ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
                      Output: remote_scan.id, remote_scan.val
-                     Task Count: 4
-                     Tuple data received from nodes: 791 bytes
-                     Tasks Shown: One of 4
+                     Task Count: N
+                     Merge Method: sorted merge
+                     Tuple data received from nodes: N bytes
+                     Tasks Shown: One of N
                      ->  Task
-                           Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
-                           Tuple data received from node: 197 bytes
-                           Node: host=localhost port=xxxxx dbname=regression
-                           ->  Limit (actual rows=20 loops=1)
+                           Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+                           Tuple data received from node: N bytes
+                           Node: host=localhost port=N dbname=regression
+                           ->  Limit (actual rows=N loops=N)
                                  Output: id, val
-                                 ->  Sort (actual rows=20 loops=1)
+                                 ->  Sort (actual rows=N loops=N)
                                        Output: id, val
                                        Sort Key: sorted_merge_test.id
-                                       Sort Method: quicksort  Memory: 26kB
-                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                       Sort Method: quicksort  Memory: NkB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                              Output: id, val
-   Task Count: 1
-   Tuple data received from nodes: 47 bytes
+   Task Count: N
+   Tuple data received from nodes: N bytes
    Tasks Shown: All
    ->  Task
-         Query: SELECT id, val FROM (SELECT intermediate_result.id, intermediate_result.val FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text)) small_cte ORDER BY id LIMIT 5
-         Tuple data received from node: 47 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Limit (actual rows=5 loops=1)
+         Query: SELECT id, val FROM (SELECT intermediate_result.id, intermediate_result.val FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text)) small_cte ORDER BY id LIMIT N
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Limit (actual rows=N loops=N)
                Output: intermediate_result.id, intermediate_result.val
-               ->  Sort (actual rows=5 loops=1)
+               ->  Sort (actual rows=N loops=N)
                      Output: intermediate_result.id, intermediate_result.val
                      Sort Key: intermediate_result.id
-                     Sort Method: top-N heapsort  Memory: 25kB
-                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=20 loops=1)
+                     Sort Method: top-N heapsort  Memory: NkB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=N loops=N)
                            Output: intermediate_result.id, intermediate_result.val
                            Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
-(40 rows)
+(41 rows)
 
 -- H6 EXPLAIN
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT foo.id, bar.id as bar_id
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT foo.id, bar.id as bar_id
 FROM
     (SELECT id FROM sorted_merge_test ORDER BY id LIMIT 3) as foo,
     (SELECT id FROM sorted_merge_events ORDER BY id LIMIT 3) as bar
 ORDER BY foo.id, bar.id
-LIMIT 5;
-                                                                                                                                                                                    QUERY PLAN
+LIMIT 5');
+                                                                                                                                                                                  explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.bar_id
    ->  Distributed Subplan XXX_1
-         Intermediate Data Size: 30 bytes
+         Intermediate Data Size: N bytes
          Result destination: Write locally
-         ->  Limit (actual rows=3 loops=1)
+         ->  Limit (actual rows=N loops=N)
                Output: remote_scan.id
-               ->  Custom Scan (Citus Adaptive) (actual rows=12 loops=1)
+               ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
                      Output: remote_scan.id
-                     Task Count: 4
-                     Tuple data received from nodes: 48 bytes
-                     Tasks Shown: One of 4
+                     Task Count: N
+                     Merge Method: sorted merge
+                     Tuple data received from nodes: N bytes
+                     Tasks Shown: One of N
                      ->  Task
-                           Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '3'::bigint
-                           Tuple data received from node: 12 bytes
-                           Node: host=localhost port=xxxxx dbname=regression
-                           ->  Limit (actual rows=3 loops=1)
+                           Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+                           Tuple data received from node: N bytes
+                           Node: host=localhost port=N dbname=regression
+                           ->  Limit (actual rows=N loops=N)
                                  Output: id
-                                 ->  Sort (actual rows=3 loops=1)
+                                 ->  Sort (actual rows=N loops=N)
                                        Output: id
                                        Sort Key: sorted_merge_test.id
-                                       Sort Method: top-N heapsort  Memory: 25kB
-                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                       Sort Method: top-N heapsort  Memory: NkB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                              Output: id
    ->  Distributed Subplan XXX_2
-         Intermediate Data Size: 30 bytes
+         Intermediate Data Size: N bytes
          Result destination: Write locally
-         ->  Limit (actual rows=3 loops=1)
+         ->  Limit (actual rows=N loops=N)
                Output: remote_scan.id
-               ->  Custom Scan (Citus Adaptive) (actual rows=12 loops=1)
+               ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
                      Output: remote_scan.id
-                     Task Count: 4
-                     Tuple data received from nodes: 48 bytes
-                     Tasks Shown: One of 4
+                     Task Count: N
+                     Merge Method: sorted merge
+                     Tuple data received from nodes: N bytes
+                     Tasks Shown: One of N
                      ->  Task
-                           Query: SELECT id FROM public.sorted_merge_events_960004 sorted_merge_events WHERE true ORDER BY id LIMIT '3'::bigint
-                           Tuple data received from node: 12 bytes
-                           Node: host=localhost port=xxxxx dbname=regression
-                           ->  Limit (actual rows=3 loops=1)
+                           Query: SELECT id FROM public.sorted_merge_events_960004 sorted_merge_events WHERE true ORDER BY id LIMIT 'N'::bigint
+                           Tuple data received from node: N bytes
+                           Node: host=localhost port=N dbname=regression
+                           ->  Limit (actual rows=N loops=N)
                                  Output: id
-                                 ->  Sort (actual rows=3 loops=1)
+                                 ->  Sort (actual rows=N loops=N)
                                        Output: id
                                        Sort Key: sorted_merge_events.id
-                                       Sort Method: top-N heapsort  Memory: 25kB
-                                       ->  Seq Scan on public.sorted_merge_events_960004 sorted_merge_events (actual rows=56 loops=1)
+                                       Sort Method: top-N heapsort  Memory: NkB
+                                       ->  Seq Scan on public.sorted_merge_events_960004 sorted_merge_events (actual rows=N loops=N)
                                              Output: id
-   Task Count: 1
-   Tuple data received from nodes: 40 bytes
+   Task Count: N
+   Tuple data received from nodes: N bytes
    Tasks Shown: All
    ->  Task
-         Query: SELECT foo.id, bar.id AS bar_id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) foo, (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) bar ORDER BY foo.id, bar.id LIMIT 5
-         Tuple data received from node: 40 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Limit (actual rows=5 loops=1)
+         Query: SELECT foo.id, bar.id AS bar_id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) foo, (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) bar ORDER BY foo.id, bar.id LIMIT N
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Limit (actual rows=N loops=N)
                Output: intermediate_result.id, intermediate_result_1.id
-               ->  Sort (actual rows=5 loops=1)
+               ->  Sort (actual rows=N loops=N)
                      Output: intermediate_result.id, intermediate_result_1.id
                      Sort Key: intermediate_result.id, intermediate_result_1.id
-                     Sort Method: quicksort  Memory: 25kB
-                     ->  Nested Loop (actual rows=9 loops=1)
+                     Sort Method: quicksort  Memory: NkB
+                     ->  Nested Loop (actual rows=N loops=N)
                            Output: intermediate_result.id, intermediate_result_1.id
-                           ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=3 loops=1)
+                           ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=N loops=N)
                                  Output: intermediate_result.id
                                  Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
-                           ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 (actual rows=3 loops=3)
+                           ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 (actual rows=N loops=N)
                                  Output: intermediate_result_1.id
                                  Function Call: read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format)
-(67 rows)
+(69 rows)
 
 -- H7 EXPLAIN — GUC off vs on
 SET citus.enable_sorted_merge TO off;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-WITH cte AS (
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) WITH cte AS (
     SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
 )
-SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
-                                                                                                                                                             QUERY PLAN
+SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5');
+                                                                                                                                                           explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.val, remote_scan.num
    ->  Distributed Subplan XXX_1
-         Intermediate Data Size: 691 bytes
+         Intermediate Data Size: N bytes
          Result destination: Write locally
-         ->  Limit (actual rows=20 loops=1)
+         ->  Limit (actual rows=N loops=N)
                Output: remote_scan.id, remote_scan.val, remote_scan.num
-               ->  Sort (actual rows=20 loops=1)
+               ->  Sort (actual rows=N loops=N)
                      Output: remote_scan.id, remote_scan.val, remote_scan.num
                      Sort Key: remote_scan.id
                      ->  Custom Scan (Citus Adaptive) (never executed)
                            Output: remote_scan.id, remote_scan.val, remote_scan.num
-                           Task Count: 4
-                           Tuple data received from nodes: 1673 bytes
-                           Tasks Shown: One of 4
+                           Task Count: N
+                           Tuple data received from nodes: N bytes
+                           Tasks Shown: One of N
                            ->  Task
-                                 Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
-                                 Tuple data received from node: 419 bytes
-                                 Node: host=localhost port=xxxxx dbname=regression
-                                 ->  Limit (actual rows=20 loops=1)
+                                 Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+                                 Tuple data received from node: N bytes
+                                 Node: host=localhost port=N dbname=regression
+                                 ->  Limit (actual rows=N loops=N)
                                        Output: id, val, num
-                                       ->  Sort (actual rows=20 loops=1)
+                                       ->  Sort (actual rows=N loops=N)
                                              Output: id, val, num
                                              Sort Key: sorted_merge_test.id
-                                             Sort Method: quicksort  Memory: 26kB
-                                             ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                             Sort Method: quicksort  Memory: NkB
+                                             ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                                    Output: id, val, num
-   Task Count: 1
-   Tuple data received from nodes: 103 bytes
+   Task Count: N
+   Tuple data received from nodes: N bytes
    Tasks Shown: All
    ->  Task
-         Query: SELECT id, val, num FROM (SELECT intermediate_result.id, intermediate_result.val, intermediate_result.num FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text, num numeric)) cte WHERE (num OPERATOR(pg_catalog.>) (10)::numeric) ORDER BY id LIMIT 5
-         Tuple data received from node: 103 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Limit (actual rows=5 loops=1)
+         Query: SELECT id, val, num FROM (SELECT intermediate_result.id, intermediate_result.val, intermediate_result.num FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text, num numeric)) cte WHERE (num OPERATOR(pg_catalog.>) (N)::numeric) ORDER BY id LIMIT N
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Limit (actual rows=N loops=N)
                Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
-               ->  Sort (actual rows=5 loops=1)
+               ->  Sort (actual rows=N loops=N)
                      Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
                      Sort Key: intermediate_result.id
-                     Sort Method: top-N heapsort  Memory: 25kB
-                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=14 loops=1)
+                     Sort Method: top-N heapsort  Memory: NkB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=N loops=N)
                            Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
                            Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
-                           Filter: (intermediate_result.num > '10'::numeric)
-                           Rows Removed by Filter: 6
+                           Filter: (intermediate_result.num > 'N'::numeric)
+                           Rows Removed by Filter: N
 (45 rows)
 
 SET citus.enable_sorted_merge TO on;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-WITH cte AS (
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) WITH cte AS (
     SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
 )
-SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
-                                                                                                                                                             QUERY PLAN
+SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5');
+                                                                                                                                                           explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.val, remote_scan.num
    ->  Distributed Subplan XXX_1
-         Intermediate Data Size: 699 bytes
+         Intermediate Data Size: N bytes
          Result destination: Write locally
-         ->  Limit (actual rows=20 loops=1)
+         ->  Limit (actual rows=N loops=N)
                Output: remote_scan.id, remote_scan.val, remote_scan.num
-               ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
+               ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
                      Output: remote_scan.id, remote_scan.val, remote_scan.num
-                     Task Count: 4
-                     Tuple data received from nodes: 1673 bytes
-                     Tasks Shown: One of 4
+                     Task Count: N
+                     Merge Method: sorted merge
+                     Tuple data received from nodes: N bytes
+                     Tasks Shown: One of N
                      ->  Task
-                           Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
-                           Tuple data received from node: 419 bytes
-                           Node: host=localhost port=xxxxx dbname=regression
-                           ->  Limit (actual rows=20 loops=1)
+                           Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+                           Tuple data received from node: N bytes
+                           Node: host=localhost port=N dbname=regression
+                           ->  Limit (actual rows=N loops=N)
                                  Output: id, val, num
-                                 ->  Sort (actual rows=20 loops=1)
+                                 ->  Sort (actual rows=N loops=N)
                                        Output: id, val, num
                                        Sort Key: sorted_merge_test.id
-                                       Sort Method: quicksort  Memory: 26kB
-                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                       Sort Method: quicksort  Memory: NkB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                              Output: id, val, num
-   Task Count: 1
-   Tuple data received from nodes: 101 bytes
+   Task Count: N
+   Tuple data received from nodes: N bytes
    Tasks Shown: All
    ->  Task
-         Query: SELECT id, val, num FROM (SELECT intermediate_result.id, intermediate_result.val, intermediate_result.num FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text, num numeric)) cte WHERE (num OPERATOR(pg_catalog.>) (10)::numeric) ORDER BY id LIMIT 5
-         Tuple data received from node: 101 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Limit (actual rows=5 loops=1)
+         Query: SELECT id, val, num FROM (SELECT intermediate_result.id, intermediate_result.val, intermediate_result.num FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text, num numeric)) cte WHERE (num OPERATOR(pg_catalog.>) (N)::numeric) ORDER BY id LIMIT N
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Limit (actual rows=N loops=N)
                Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
-               ->  Sort (actual rows=5 loops=1)
+               ->  Sort (actual rows=N loops=N)
                      Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
                      Sort Key: intermediate_result.id
-                     Sort Method: top-N heapsort  Memory: 25kB
-                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=18 loops=1)
+                     Sort Method: top-N heapsort  Memory: NkB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=N loops=N)
                            Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
                            Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
-                           Filter: (intermediate_result.num > '10'::numeric)
-                           Rows Removed by Filter: 2
-(42 rows)
+                           Filter: (intermediate_result.num > 'N'::numeric)
+                           Rows Removed by Filter: N
+(43 rows)
 
 -- =================================================================
--- Cleanup
+-- Category I: Distributed Transactions
 -- =================================================================
-SET citus.enable_sorted_merge TO off;
--- Run 2: streaming adapter
-SET citus.enable_streaming_sorted_merge TO on;
-\i sql/multi_orderby_pushdown.sql
---
--- MULTI_SORTED_MERGE
---
--- Tests for the citus.enable_sorted_merge GUC and the sorted merge
--- planner eligibility logic. Verifies that enabling the GUC does not
--- introduce regressions for any query pattern.
---
--- =================================================================
--- 1. GUC basics
--- =================================================================
-SHOW citus.enable_sorted_merge;
- citus.enable_sorted_merge
----------------------------------------------------------------------
- off
-(1 row)
-
+-- Verify sorted merge correctness within multi-statement transactions
+-- where data is modified before the sorted-merge SELECT.
 SET citus.enable_sorted_merge TO on;
-SHOW citus.enable_sorted_merge;
- citus.enable_sorted_merge
+-- I1: INSERT then SELECT within a transaction
+BEGIN;
+INSERT INTO sorted_merge_test (id, val, num) VALUES (900, 'txn_insert', 900.0);
+SELECT id, val FROM sorted_merge_test WHERE id >= 900 ORDER BY id;
+ id  |    val
 ---------------------------------------------------------------------
- on
+ 900 | txn_insert
 (1 row)
 
-SET citus.enable_sorted_merge TO off;
--- =================================================================
--- Category A: Eligibility — sort IS pushed to workers
--- =================================================================
-SET citus.enable_sorted_merge TO on;
--- A1: ORDER BY distribution column
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val FROM sorted_merge_test ORDER BY id;
-                                                 QUERY PLAN
----------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
-   Output: remote_scan.id, remote_scan.val
-   Task Count: 4
-   Tuple data received from nodes: 1027 bytes
-   Tasks Shown: One of 4
-   ->  Task
-         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id
-         Tuple data received from node: 255 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
-               Output: id, val
-               Sort Key: sorted_merge_test.id
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                     Output: id, val
-(15 rows)
-
--- A2: ORDER BY DESC
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id FROM sorted_merge_test ORDER BY id DESC;
-                                                 QUERY PLAN
----------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
-   Output: remote_scan.id
-   Task Count: 4
-   Tuple data received from nodes: 420 bytes
-   Tasks Shown: One of 4
-   ->  Task
-         Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id DESC
-         Tuple data received from node: 104 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
-               Output: id
-               Sort Key: sorted_merge_test.id DESC
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                     Output: id
-(15 rows)
-
--- A3: ORDER BY DESC NULLS LAST
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, num FROM sorted_merge_test ORDER BY num DESC NULLS LAST;
-                                                          QUERY PLAN
----------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
-   Output: remote_scan.id, remote_scan.num
-   Task Count: 4
-   Tuple data received from nodes: 1556 bytes
-   Tasks Shown: One of 4
-   ->  Task
-         Query: SELECT id, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY num DESC NULLS LAST
-         Tuple data received from node: 392 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
-               Output: id, num
-               Sort Key: sorted_merge_test.num DESC NULLS LAST
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                     Output: id, num
-(15 rows)
-
--- A4: ORDER BY non-distribution column
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val FROM sorted_merge_test ORDER BY val;
-                                                  QUERY PLAN
----------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
-   Output: remote_scan.id, remote_scan.val
-   Task Count: 4
-   Tuple data received from nodes: 1027 bytes
-   Tasks Shown: One of 4
-   ->  Task
-         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY val
-         Tuple data received from node: 255 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
-               Output: id, val
-               Sort Key: sorted_merge_test.val
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                     Output: id, val
-(15 rows)
-
--- A5: Multi-column ORDER BY
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val FROM sorted_merge_test ORDER BY id, val;
-                                                    QUERY PLAN
+ROLLBACK;
+-- I2: UPDATE then SELECT within a transaction
+BEGIN;
+UPDATE sorted_merge_test SET val = 'updated' WHERE id = 1;
+SELECT id, val FROM sorted_merge_test WHERE id <= 3 ORDER BY id;
+ id |   val
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
-   Output: remote_scan.id, remote_scan.val
-   Task Count: 4
-   Tuple data received from nodes: 1027 bytes
-   Tasks Shown: One of 4
-   ->  Task
-         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id, val
-         Tuple data received from node: 255 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
-               Output: id, val
-               Sort Key: sorted_merge_test.id, sorted_merge_test.val
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                     Output: id, val
-(15 rows)
+  1 | updated
+  2 | val_2
+  3 | val_3
+(3 rows)
 
--- A6: Mixed directions
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val, num FROM sorted_merge_test ORDER BY id ASC, num DESC;
-                                                         QUERY PLAN
+ROLLBACK;
+-- I3: DELETE then SELECT within a transaction
+BEGIN;
+DELETE FROM sorted_merge_test WHERE id <= 5;
+SELECT id, val FROM sorted_merge_test WHERE id <= 10 ORDER BY id;
+ id |  val
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
-   Output: remote_scan.id, remote_scan.val, remote_scan.num
-   Task Count: 4
-   Tuple data received from nodes: 2163 bytes
-   Tasks Shown: One of 4
-   ->  Task
-         Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id, num DESC
-         Tuple data received from node: 543 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
-               Output: id, val, num
-               Sort Key: sorted_merge_test.id, sorted_merge_test.num DESC
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                     Output: id, val, num
-(15 rows)
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(5 rows)
 
--- A7: GROUP BY dist_col ORDER BY dist_col
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id;
-                                                              QUERY PLAN
----------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
-   Output: remote_scan.id, remote_scan.count
-   Task Count: 4
-   Tuple data received from nodes: 1260 bytes
-   Tasks Shown: One of 4
-   ->  Task
-         Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id
-         Tuple data received from node: 312 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
-               Output: id, (count(*))
-               Sort Key: sorted_merge_test.id
-               Sort Method: quicksort  Memory: 25kB
-               ->  HashAggregate (actual rows=26 loops=1)
-                     Output: id, count(*)
-                     Group Key: sorted_merge_test.id
-                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                           Output: id, val, num, ts
-(19 rows)
+ROLLBACK;
+-- I4: INSERT + UPDATE + SELECT with multi-column ORDER BY
+BEGIN;
+INSERT INTO sorted_merge_test (id, val, num) VALUES (901, 'txn_a', 1.0);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (902, 'txn_b', 2.0);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (903, 'txn_c', 3.0);
+UPDATE sorted_merge_test SET num = 999.0 WHERE id = 901;
+SELECT id, val, num FROM sorted_merge_test WHERE id >= 900 ORDER BY num, id;
+ id  |  val  |  num
+---------------------------------------------------------------------
+ 902 | txn_b |   2.0
+ 903 | txn_c |   3.0
+ 901 | txn_a | 999.0
+(3 rows)
 
--- A8: WHERE clause + ORDER BY
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val FROM sorted_merge_test WHERE num > 50 ORDER BY id;
-                                                                    QUERY PLAN
----------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=67 loops=1)
-   Output: remote_scan.id, remote_scan.val
-   Task Count: 4
-   Tuple data received from nodes: 671 bytes
-   Tasks Shown: One of 4
-   ->  Task
-         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE (num OPERATOR(pg_catalog.>) '50'::numeric) ORDER BY id
-         Tuple data received from node: 130 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=13 loops=1)
-               Output: id, val
-               Sort Key: sorted_merge_test.id
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=13 loops=1)
-                     Output: id, val
-                     Filter: (sorted_merge_test.num > '50'::numeric)
-                     Rows Removed by Filter: 13
-(17 rows)
+ROLLBACK;
+-- I5: Compare results with GUC off vs on in a transaction
+BEGIN;
+INSERT INTO sorted_merge_test (id, val, num) VALUES (910, 'cmp_a', 10.0);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (911, 'cmp_b', 20.0);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (912, 'cmp_c', 30.0);
+SET LOCAL citus.enable_sorted_merge TO off;
+SELECT id, val, num FROM sorted_merge_test WHERE id >= 910 ORDER BY id;
+ id  |  val  | num
+---------------------------------------------------------------------
+ 910 | cmp_a | 10.0
+ 911 | cmp_b | 20.0
+ 912 | cmp_c | 30.0
+(3 rows)
 
--- A9: Expression in ORDER BY (non-aggregate)
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, num FROM sorted_merge_test ORDER BY id + 1;
-                                                                                        QUERY PLAN
+SET LOCAL citus.enable_sorted_merge TO on;
+SELECT id, val, num FROM sorted_merge_test WHERE id >= 910 ORDER BY id;
+ id  |  val  | num
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
-   Output: remote_scan.id, remote_scan.num, remote_scan.worker_column_3
-   Task Count: 4
-   Tuple data received from nodes: 1976 bytes
-   Tasks Shown: One of 4
-   ->  Task
-         Query: SELECT id, num, (id OPERATOR(pg_catalog.+) 1) AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (id OPERATOR(pg_catalog.+) 1)
-         Tuple data received from node: 496 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
-               Output: id, num, ((id + 1))
-               Sort Key: ((sorted_merge_test.id + 1))
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                     Output: id, num, (id + 1)
-(15 rows)
+ 910 | cmp_a | 10.0
+ 911 | cmp_b | 20.0
+ 912 | cmp_c | 30.0
+(3 rows)
 
--- A10: ORDER BY with LIMIT (existing pushdown, verify no regression)
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
-                                                           QUERY PLAN
+ROLLBACK;
+-- I6: DELETE + aggregate in SELECT with ORDER BY
+BEGIN;
+DELETE FROM sorted_merge_test WHERE id > 100 AND id < 200;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+ id | count
 ---------------------------------------------------------------------
- Limit (actual rows=5 loops=1)
-   Output: remote_scan.id
-   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
-         Output: remote_scan.id
-         Task Count: 4
-         Tuple data received from nodes: 80 bytes
-         Tasks Shown: One of 4
-         ->  Task
-               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
-               Tuple data received from node: 20 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Limit (actual rows=5 loops=1)
-                     Output: id
-                     ->  Sort (actual rows=5 loops=1)
-                           Output: id
-                           Sort Key: sorted_merge_test.id
-                           Sort Method: top-N heapsort  Memory: 25kB
-                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                 Output: id
-(19 rows)
+  1 |     1
+  2 |     1
+  3 |     1
+  4 |     1
+  5 |     1
+(5 rows)
 
+ROLLBACK;
 -- =================================================================
--- Category B: Ineligibility — sort NOT pushed for merge
+-- Category J: Coordinator expression evaluation exclusion
 -- =================================================================
+-- Verify that queries with ORDER BY on expressions that need coordinator-side
+-- evaluation are correctly excluded from sorted merge (or handled correctly).
 SET citus.enable_sorted_merge TO on;
--- B1: ORDER BY count(*)
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*);
-                                                           QUERY PLAN
----------------------------------------------------------------------
- Sort (actual rows=105 loops=1)
-   Output: remote_scan.id, remote_scan.count
-   Sort Key: remote_scan.count
-   Sort Method: quicksort  Memory: 28kB
-   ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
-         Output: remote_scan.id, remote_scan.count
-         Task Count: 4
-         Tuple data received from nodes: 1260 bytes
-         Tasks Shown: One of 4
-         ->  Task
-               Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id
-               Tuple data received from node: 312 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  HashAggregate (actual rows=26 loops=1)
-                     Output: id, count(*)
-                     Group Key: sorted_merge_test.id
-                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                           Output: id, val, num, ts
-(19 rows)
+-- J1: ORDER BY expression on aggregate result (ordinal reference)
+-- The ORDER BY references position 2 which is an aggregate — sorted merge
+-- must NOT be used because aggregates are rewritten between worker/coordinator.
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, sum(num) AS total FROM sorted_merge_test GROUP BY id ORDER BY 2 LIMIT 5');
+                                                                               explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.total
+   ->  Sort (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.total
+         Sort Key: remote_scan.total
+         Sort Method: top-N heapsort  Memory: NkB
+         ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+               Output: remote_scan.id, remote_scan.total
+               Task Count: N
+               Tuple data received from nodes: N bytes
+               Tasks Shown: One of N
+               ->  Task
+                     Query: SELECT id, sum(num) AS total FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY (sum(num)) LIMIT 'N'::bigint
+                     Tuple data received from node: N bytes
+                     Node: host=localhost port=N dbname=regression
+                     ->  Limit (actual rows=N loops=N)
+                           Output: id, (sum(num))
+                           ->  Sort (actual rows=N loops=N)
+                                 Output: id, (sum(num))
+                                 Sort Key: (sum(sorted_merge_test.num))
+                                 Sort Method: top-N heapsort  Memory: NkB
+                                 ->  HashAggregate (actual rows=N loops=N)
+                                       Output: id, sum(num)
+                                       Group Key: sorted_merge_test.id
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                             Output: id, val, num, ts
+(27 rows)
 
--- B2: ORDER BY avg(col)
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, avg(num) FROM sorted_merge_test GROUP BY id ORDER BY avg(num);
-                                                          QUERY PLAN
----------------------------------------------------------------------
- Sort (actual rows=105 loops=1)
-   Output: remote_scan.id, remote_scan.avg
-   Sort Key: remote_scan.avg
-   Sort Method: quicksort  Memory: 28kB
-   ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
-         Output: remote_scan.id, remote_scan.avg
-         Task Count: 4
-         Tuple data received from nodes: 1556 bytes
-         Tasks Shown: One of 4
+-- J2: ORDER BY expression wrapping an aggregate
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, sum(num) + 1 AS total_plus FROM sorted_merge_test GROUP BY id ORDER BY sum(num) + 1 LIMIT 5');
+                                                                                                                       explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.total_plus
+   ->  Sort (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.total_plus
+         Sort Key: remote_scan.total_plus
+         Sort Method: top-N heapsort  Memory: NkB
+         ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+               Output: remote_scan.id, remote_scan.total_plus
+               Task Count: N
+               Tuple data received from nodes: N bytes
+               Tasks Shown: One of N
+               ->  Task
+                     Query: SELECT id, (sum(num) OPERATOR(pg_catalog.+) 'N'::numeric) AS total_plus FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY (sum(num) OPERATOR(pg_catalog.+) 'N'::numeric) LIMIT 'N'::bigint
+                     Tuple data received from node: N bytes
+                     Node: host=localhost port=N dbname=regression
+                     ->  Limit (actual rows=N loops=N)
+                           Output: id, ((sum(num) + 'N'::numeric))
+                           ->  Sort (actual rows=N loops=N)
+                                 Output: id, ((sum(num) + 'N'::numeric))
+                                 Sort Key: ((sum(sorted_merge_test.num) + 'N'::numeric))
+                                 Sort Method: top-N heapsort  Memory: NkB
+                                 ->  HashAggregate (actual rows=N loops=N)
+                                       Output: id, (sum(num) + 'N'::numeric)
+                                       Group Key: sorted_merge_test.id
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                             Output: id, val, num, ts
+(27 rows)
+
+-- J3: ORDER BY a non-aggregate expression that can be pushed to workers
+-- This should be eligible for sorted merge — the expression is evaluated
+-- on the worker side and sort order is preserved.
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY id + 0');
+                                                                                      explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val, (id OPERATOR(pg_catalog.+) N) AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (id OPERATOR(pg_catalog.+) N)
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val, ((id + N))
+               Sort Key: ((sorted_merge_test.id + N))
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val, (id + N)
+(16 rows)
+
+-- J4: ORDER BY with CASE expression (no aggregates) — eligible
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY CASE WHEN id < 50 THEN 0 ELSE 1 END, id');
+                                                                                                                    explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val, CASE WHEN (id OPERATOR(pg_catalog.<) N) THEN N ELSE N END AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY CASE WHEN (id OPERATOR(pg_catalog.<) N) THEN N ELSE N END, id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val, (CASE WHEN (id < N) THEN N ELSE N END)
+               Sort Key: (CASE WHEN (sorted_merge_test.id < N) THEN N ELSE N END), sorted_merge_test.id
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val, CASE WHEN (id < N) THEN N ELSE N END
+(16 rows)
+
+-- J5: ORDER BY on an expression that mixes aggregate and non-aggregate
+-- Should be ineligible because the expression contains an aggregate.
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id + count(*)');
+                                                                                      explain_filter
+---------------------------------------------------------------------
+ Sort (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.count, remote_scan.worker_column_3
+   Sort Key: remote_scan.worker_column_3
+   Sort Method: quicksort  Memory: NkB
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.count, remote_scan.worker_column_3
+         Task Count: N
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
          ->  Task
-               Query: SELECT id, avg(num) AS avg FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id
-               Tuple data received from node: 392 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  HashAggregate (actual rows=26 loops=1)
-                     Output: id, avg(num)
+               Query: SELECT id, count(*) AS count, (id OPERATOR(pg_catalog.+) count(*)) AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  HashAggregate (actual rows=N loops=N)
+                     Output: id, count(*), (id + count(*))
                      Group Key: sorted_merge_test.id
-                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                            Output: id, val, num, ts
 (19 rows)
 
--- B3: GROUP BY non-dist col, ORDER BY non-dist col
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT val, count(*) FROM sorted_merge_test GROUP BY val ORDER BY val;
-                                                               QUERY PLAN
----------------------------------------------------------------------
- Sort (actual rows=104 loops=1)
-   Output: remote_scan.val, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
-   Sort Key: remote_scan.val
-   Sort Method: quicksort  Memory: 28kB
-   ->  HashAggregate (actual rows=104 loops=1)
-         Output: remote_scan.val, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
-         Group Key: remote_scan.val
-         ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
-               Output: remote_scan.val, remote_scan.count
-               Task Count: 4
-               Tuple data received from nodes: 1447 bytes
-               Tasks Shown: One of 4
-               ->  Task
-                     Query: SELECT val, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY val
-                     Tuple data received from node: 359 bytes
-                     Node: host=localhost port=xxxxx dbname=regression
-                     ->  HashAggregate (actual rows=26 loops=1)
-                           Output: val, count(*)
-                           Group Key: sorted_merge_test.val
-                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                 Output: id, val, num, ts
-(23 rows)
-
--- B4: GROUP BY non-dist col, ORDER BY aggregate
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT val, count(*) FROM sorted_merge_test GROUP BY val ORDER BY count(*);
-                                                               QUERY PLAN
----------------------------------------------------------------------
- Sort (actual rows=104 loops=1)
-   Output: remote_scan.val, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
-   Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint))
-   Sort Method: quicksort  Memory: 28kB
-   ->  HashAggregate (actual rows=104 loops=1)
-         Output: remote_scan.val, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, '0'::bigint)
-         Group Key: remote_scan.val
-         ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
-               Output: remote_scan.val, remote_scan.count
-               Task Count: 4
-               Tuple data received from nodes: 1447 bytes
-               Tasks Shown: One of 4
-               ->  Task
-                     Query: SELECT val, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY val
-                     Tuple data received from node: 359 bytes
-                     Node: host=localhost port=xxxxx dbname=regression
-                     ->  HashAggregate (actual rows=26 loops=1)
-                           Output: val, count(*)
-                           Group Key: sorted_merge_test.val
-                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                 Output: id, val, num, ts
-(23 rows)
-
--- =================================================================
--- Category C: Correctness — results match GUC off vs on
--- =================================================================
--- C1: Simple ORDER BY
+-- J6: Correctness comparison — expression ORDER BY, GUC off vs on
 SET citus.enable_sorted_merge TO off;
-SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
+SELECT id, val FROM sorted_merge_test ORDER BY id + 0 LIMIT 5;
  id |  val
 ---------------------------------------------------------------------
   1 | val_1
@@ -2311,15 +2393,10 @@ SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
   3 | val_3
   4 | val_4
   5 | val_5
-  6 | val_6
-  7 | val_7
-  8 | val_8
-  9 | val_9
- 10 | val_10
-(10 rows)
+(5 rows)
 
 SET citus.enable_sorted_merge TO on;
-SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
+SELECT id, val FROM sorted_merge_test ORDER BY id + 0 LIMIT 5;
  id |  val
 ---------------------------------------------------------------------
   1 | val_1
@@ -2327,50 +2404,91 @@ SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
   3 | val_3
   4 | val_4
   5 | val_5
-  6 | val_6
-  7 | val_7
-  8 | val_8
-  9 | val_9
- 10 | val_10
-(10 rows)
-
--- C2: ORDER BY DESC
-SET citus.enable_sorted_merge TO off;
-SELECT id FROM sorted_merge_test ORDER BY id DESC LIMIT 5;
- id
----------------------------------------------------------------------
- 202
- 201
- 200
- 102
- 101
 (5 rows)
 
+-- -----------------------------------------------------------------
+-- J7–J12: Additional pushable expressions (no aggregates)
+-- -----------------------------------------------------------------
 SET citus.enable_sorted_merge TO on;
-SELECT id FROM sorted_merge_test ORDER BY id DESC LIMIT 5;
- id
+-- J7: ORDER BY function call on column
+SELECT id, val FROM sorted_merge_test ORDER BY upper(val) LIMIT 5;
+ id  |  val
 ---------------------------------------------------------------------
- 202
- 201
- 200
- 102
- 101
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+   1 | val_1
+  10 | val_10
 (5 rows)
 
--- C3: Multi-column ORDER BY
-SET citus.enable_sorted_merge TO off;
-SELECT id, num FROM sorted_merge_test ORDER BY num, id LIMIT 5;
- id | num
+-- J8: ORDER BY COALESCE
+SELECT id, num FROM sorted_merge_test ORDER BY COALESCE(num, 0) LIMIT 5;
+ id  | num
 ---------------------------------------------------------------------
-  1 | 1.5
-  2 | 3.0
-  3 | 4.5
-  4 | 6.0
-  5 | 7.5
+ 101 |
+ 102 |
+   1 | 1.5
+   2 | 3.0
+   3 | 4.5
 (5 rows)
 
-SET citus.enable_sorted_merge TO on;
-SELECT id, num FROM sorted_merge_test ORDER BY num, id LIMIT 5;
+-- J9: ORDER BY negation
+SELECT id, num FROM sorted_merge_test ORDER BY -num LIMIT 5;
+ id  |  num
+---------------------------------------------------------------------
+ 100 | 150.0
+  99 | 148.5
+  98 | 147.0
+  97 | 145.5
+  96 | 144.0
+(5 rows)
+
+-- J10: ORDER BY concatenation
+SELECT id, val FROM sorted_merge_test ORDER BY val || '_suffix' LIMIT 5;
+ id  |   val
+---------------------------------------------------------------------
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+ 100 | val_100
+  10 | val_10
+(5 rows)
+
+-- J11: ORDER BY mathematical function (abs distance)
+SELECT id, num FROM sorted_merge_test ORDER BY abs(num - 25), id LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+ 17 | 25.5
+ 16 | 24.0
+ 18 | 27.0
+ 15 | 22.5
+ 19 | 28.5
+(5 rows)
+
+-- J12: ORDER BY expression not in SELECT list
+SELECT id FROM sorted_merge_test ORDER BY num + 1 LIMIT 5;
+ id
+---------------------------------------------------------------------
+  1
+  2
+  3
+  4
+  5
+(5 rows)
+
+-- J13: ORDER BY expression referencing multiple columns
+SELECT id, val FROM sorted_merge_test ORDER BY id * num LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- J14: ORDER BY with type cast
+SELECT id, num FROM sorted_merge_test ORDER BY num::int LIMIT 5;
  id | num
 ---------------------------------------------------------------------
   1 | 1.5
@@ -2380,9 +2498,34 @@ SELECT id, num FROM sorted_merge_test ORDER BY num, id LIMIT 5;
   5 | 7.5
 (5 rows)
 
--- C4: ORDER BY non-distribution column
+-- J15: ORDER BY with subexpression in SELECT and different expression in ORDER BY
+SELECT id, num + 1 as n1 FROM sorted_merge_test ORDER BY num + 2 LIMIT 5;
+ id | n1
+---------------------------------------------------------------------
+  1 | 2.5
+  2 | 4.0
+  3 | 5.5
+  4 | 7.0
+  5 | 8.5
+(5 rows)
+
+-- J16: ORDER BY column alias
+SELECT id, num * 2 as doubled FROM sorted_merge_test ORDER BY doubled LIMIT 5;
+ id | doubled
+---------------------------------------------------------------------
+  1 |     3.0
+  2 |     6.0
+  3 |     9.0
+  4 |    12.0
+  5 |    15.0
+(5 rows)
+
+-- -----------------------------------------------------------------
+-- J17–J21: Correctness — GUC off vs on for expression ORDER BY
+-- -----------------------------------------------------------------
+-- J17: function call
 SET citus.enable_sorted_merge TO off;
-SELECT id, val FROM sorted_merge_test WHERE val IS NOT NULL ORDER BY val LIMIT 5;
+SELECT id, val FROM sorted_merge_test ORDER BY upper(val) LIMIT 5;
  id  |  val
 ---------------------------------------------------------------------
  200 | dup_a
@@ -2393,7 +2536,7 @@ SELECT id, val FROM sorted_merge_test WHERE val IS NOT NULL ORDER BY val LIMIT 5
 (5 rows)
 
 SET citus.enable_sorted_merge TO on;
-SELECT id, val FROM sorted_merge_test WHERE val IS NOT NULL ORDER BY val LIMIT 5;
+SELECT id, val FROM sorted_merge_test ORDER BY upper(val) LIMIT 5;
  id  |  val
 ---------------------------------------------------------------------
  200 | dup_a
@@ -2403,106 +2546,244 @@ SELECT id, val FROM sorted_merge_test WHERE val IS NOT NULL ORDER BY val LIMIT 5
   10 | val_10
 (5 rows)
 
--- C5: GROUP BY dist_col ORDER BY dist_col
+-- J18: CASE expression
 SET citus.enable_sorted_merge TO off;
-SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
- id | count
----------------------------------------------------------------------
-  1 |     1
-  2 |     1
-  3 |     1
-  4 |     1
-  5 |     1
-(5 rows)
+SELECT id, CASE WHEN num > 50 THEN 'high' ELSE 'low' END as cat
+FROM sorted_merge_test ORDER BY CASE WHEN num > 50 THEN 'high' ELSE 'low' END, id LIMIT 10;
+ id | cat
+---------------------------------------------------------------------
+ 34 | high
+ 35 | high
+ 36 | high
+ 37 | high
+ 38 | high
+ 39 | high
+ 40 | high
+ 41 | high
+ 42 | high
+ 43 | high
+(10 rows)
 
 SET citus.enable_sorted_merge TO on;
-SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
- id | count
----------------------------------------------------------------------
-  1 |     1
-  2 |     1
-  3 |     1
-  4 |     1
-  5 |     1
-(5 rows)
+SELECT id, CASE WHEN num > 50 THEN 'high' ELSE 'low' END as cat
+FROM sorted_merge_test ORDER BY CASE WHEN num > 50 THEN 'high' ELSE 'low' END, id LIMIT 10;
+ id | cat
+---------------------------------------------------------------------
+ 34 | high
+ 35 | high
+ 36 | high
+ 37 | high
+ 38 | high
+ 39 | high
+ 40 | high
+ 41 | high
+ 42 | high
+ 43 | high
+(10 rows)
 
--- C6: Mixed directions
+-- J19: COALESCE
 SET citus.enable_sorted_merge TO off;
-SELECT id, num FROM sorted_merge_test WHERE num IS NOT NULL ORDER BY id ASC, num DESC LIMIT 5;
- id | num
+SELECT id, num FROM sorted_merge_test ORDER BY COALESCE(num, 0), id LIMIT 5;
+ id  | num
 ---------------------------------------------------------------------
-  1 | 1.5
-  2 | 3.0
-  3 | 4.5
-  4 | 6.0
-  5 | 7.5
+ 101 |
+ 102 |
+   1 | 1.5
+   2 | 3.0
+   3 | 4.5
 (5 rows)
 
 SET citus.enable_sorted_merge TO on;
-SELECT id, num FROM sorted_merge_test WHERE num IS NOT NULL ORDER BY id ASC, num DESC LIMIT 5;
- id | num
+SELECT id, num FROM sorted_merge_test ORDER BY COALESCE(num, 0), id LIMIT 5;
+ id  | num
 ---------------------------------------------------------------------
-  1 | 1.5
-  2 | 3.0
-  3 | 4.5
-  4 | 6.0
-  5 | 7.5
+ 101 |
+ 102 |
+   1 | 1.5
+   2 | 3.0
+   3 | 4.5
 (5 rows)
 
--- C7: WHERE + ORDER BY
+-- J20: abs() distance function
 SET citus.enable_sorted_merge TO off;
-SELECT id, val FROM sorted_merge_test WHERE num > 100 ORDER BY id LIMIT 5;
- id |  val
+SELECT id, num FROM sorted_merge_test ORDER BY abs(num - 25), id LIMIT 5;
+ id | num
 ---------------------------------------------------------------------
- 67 | val_67
- 68 | val_68
- 69 | val_69
- 70 | val_70
- 71 | val_71
+ 17 | 25.5
+ 16 | 24.0
+ 18 | 27.0
+ 15 | 22.5
+ 19 | 28.5
 (5 rows)
 
 SET citus.enable_sorted_merge TO on;
-SELECT id, val FROM sorted_merge_test WHERE num > 100 ORDER BY id LIMIT 5;
- id |  val
+SELECT id, num FROM sorted_merge_test ORDER BY abs(num - 25), id LIMIT 5;
+ id | num
 ---------------------------------------------------------------------
- 67 | val_67
- 68 | val_68
- 69 | val_69
- 70 | val_70
- 71 | val_71
+ 17 | 25.5
+ 16 | 24.0
+ 18 | 27.0
+ 15 | 22.5
+ 19 | 28.5
 (5 rows)
 
--- C8: Aggregates in SELECT, ORDER BY on dist_col (GROUP BY dist_col)
+-- -----------------------------------------------------------------
+-- J21–J22: More ineligibility — aggregate inside expressions
+-- -----------------------------------------------------------------
+SET citus.enable_sorted_merge TO on;
+-- J21: ORDER BY CASE wrapping an aggregate
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY CASE WHEN count(*) > 1 THEN 0 ELSE 1 END, id LIMIT 5');
+                                                                                                                                                      explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.count, remote_scan.worker_column_3
+   ->  Sort (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.count, remote_scan.worker_column_3
+         Sort Key: remote_scan.worker_column_3, remote_scan.id
+         Sort Method: top-N heapsort  Memory: NkB
+         ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+               Output: remote_scan.id, remote_scan.count, remote_scan.worker_column_3
+               Task Count: N
+               Tuple data received from nodes: N bytes
+               Tasks Shown: One of N
+               ->  Task
+                     Query: SELECT id, count(*) AS count, CASE WHEN (count(*) OPERATOR(pg_catalog.>) N) THEN N ELSE N END AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY CASE WHEN (count(*) OPERATOR(pg_catalog.>) N) THEN N ELSE N END, id LIMIT 'N'::bigint
+                     Tuple data received from node: N bytes
+                     Node: host=localhost port=N dbname=regression
+                     ->  Limit (actual rows=N loops=N)
+                           Output: id, (count(*)), (CASE WHEN (count(*) > N) THEN N ELSE N END)
+                           ->  Sort (actual rows=N loops=N)
+                                 Output: id, (count(*)), (CASE WHEN (count(*) > N) THEN N ELSE N END)
+                                 Sort Key: (CASE WHEN (count(*) > N) THEN N ELSE N END), sorted_merge_test.id
+                                 Sort Method: top-N heapsort  Memory: NkB
+                                 ->  HashAggregate (actual rows=N loops=N)
+                                       Output: id, count(*), CASE WHEN (count(*) > N) THEN N ELSE N END
+                                       Group Key: sorted_merge_test.id
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                             Output: id, val, num, ts
+(27 rows)
+
+-- J22: ORDER BY aggregate expression (sum + 1) — correctness
 SET citus.enable_sorted_merge TO off;
-SELECT id, count(*), sum(num), avg(num) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
- id | count | sum |          avg
----------------------------------------------------------------------
-  1 |     1 | 1.5 | 1.50000000000000000000
-  2 |     1 | 3.0 |     3.0000000000000000
-  3 |     1 | 4.5 |     4.5000000000000000
-  4 |     1 | 6.0 |     6.0000000000000000
-  5 |     1 | 7.5 |     7.5000000000000000
+SELECT id, sum(num) + 1 as s FROM sorted_merge_test GROUP BY id ORDER BY sum(num) + 1 LIMIT 5;
+ id |  s
+---------------------------------------------------------------------
+  1 | 2.5
+  2 | 4.0
+  3 | 5.5
+  4 | 7.0
+  5 | 8.5
 (5 rows)
 
 SET citus.enable_sorted_merge TO on;
-SELECT id, count(*), sum(num), avg(num) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
- id | count | sum |          avg
----------------------------------------------------------------------
-  1 |     1 | 1.5 | 1.50000000000000000000
-  2 |     1 | 3.0 |     3.0000000000000000
-  3 |     1 | 4.5 |     4.5000000000000000
-  4 |     1 | 6.0 |     6.0000000000000000
-  5 |     1 | 7.5 |     7.5000000000000000
+SELECT id, sum(num) + 1 as s FROM sorted_merge_test GROUP BY id ORDER BY sum(num) + 1 LIMIT 5;
+ id |  s
+---------------------------------------------------------------------
+  1 | 2.5
+  2 | 4.0
+  3 | 5.5
+  4 | 7.0
+  5 | 8.5
 (5 rows)
 
+-- -----------------------------------------------------------------
+-- J23–J24: EXPLAIN plans for pushable expression patterns
+-- -----------------------------------------------------------------
+SET citus.enable_sorted_merge TO on;
+-- J23: Does function-call ORDER BY get pushed to workers?
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY upper(val) LIMIT 5');
+                                                                                explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
+         ->  Task
+               Query: SELECT id, val, upper(val) AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (upper(val)) LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
+                     Output: id, val, (upper(val))
+                     ->  Sort (actual rows=N loops=N)
+                           Output: id, val, (upper(val))
+                           Sort Key: (upper(sorted_merge_test.val))
+                           Sort Method: top-N heapsort  Memory: NkB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                 Output: id, val, upper(val)
+(20 rows)
+
+-- J24: ORDER BY expression not in SELECT list — pushed to workers?
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id FROM sorted_merge_test ORDER BY num + 1 LIMIT 5');
+                                                                                                           explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.worker_column_2
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.worker_column_2
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
+         ->  Task
+               Query: SELECT id, (num OPERATOR(pg_catalog.+) 'N'::numeric) AS worker_column_2 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (num OPERATOR(pg_catalog.+) 'N'::numeric) LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
+                     Output: id, ((num + 'N'::numeric))
+                     ->  Sort (actual rows=N loops=N)
+                           Output: id, ((num + 'N'::numeric))
+                           Sort Key: ((sorted_merge_test.num + 'N'::numeric))
+                           Sort Method: top-N heapsort  Memory: NkB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                 Output: id, (num + 'N'::numeric)
+(20 rows)
+
 -- =================================================================
--- Category D: Complex queries — regression guards
+-- Category K: Index-based sort avoidance
 -- =================================================================
+-- When an index exists on the ORDER BY column, PostgreSQL's worker-side
+-- planner should choose an Index Scan instead of Sort + Seq Scan, making
+-- the worker-side sort essentially free. This is the best-case scenario
+-- for sorted merge: zero worker sort cost + zero coordinator sort cost.
+--
+-- We disable enable_seqscan to force the worker planner to prefer the
+-- index, since the test table is small enough that Seq Scan + Sort
+-- would otherwise be cheaper.
+CREATE INDEX sorted_merge_test_id_idx ON sorted_merge_test(id);
+-- Use a transaction with SET LOCAL to propagate enable_seqscan=off to workers,
+-- forcing the worker planner to use the index instead of Seq Scan + Sort.
+SET citus.propagate_set_commands TO 'local';
+-- K1: EXPLAIN with index — worker uses Index Scan, no Sort node
 SET citus.enable_sorted_merge TO on;
--- D1: Subquery in FROM with ORDER BY
-SELECT * FROM (
-    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 5
-) sub ORDER BY id;
+BEGIN;
+SET LOCAL enable_seqscan TO off;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY id');
+                                                               explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Index Scan using sorted_merge_test_id_idx_960000 on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+               Output: id, val
+(12 rows)
+
+COMMIT;
+-- K2: Correctness with index — GUC off vs on
+BEGIN;
+SET LOCAL enable_seqscan TO off;
+SET LOCAL citus.enable_sorted_merge TO off;
+SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 5;
  id |  val
 ---------------------------------------------------------------------
   1 | val_1
@@ -2512,11 +2793,8 @@ SELECT * FROM (
   5 | val_5
 (5 rows)
 
--- D2: CTE with ORDER BY
-WITH top5 AS (
-    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 5
-)
-SELECT * FROM top5 ORDER BY id;
+SET LOCAL citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 5;
  id |  val
 ---------------------------------------------------------------------
   1 | val_1
@@ -2526,447 +2804,585 @@ SELECT * FROM top5 ORDER BY id;
   5 | val_5
 (5 rows)
 
--- D3: Co-located JOIN + ORDER BY
-SELECT t.id, t.val, e.event_type
-FROM sorted_merge_test t
-JOIN sorted_merge_events e ON t.id = e.id
-WHERE t.id <= 5
-ORDER BY t.id, e.event_type
-LIMIT 10;
- id |  val  | event_type
----------------------------------------------------------------------
-  1 | val_1 | buy
-  1 | val_1 | buy
-  1 | val_1 | click
-  1 | val_1 | view
-  2 | val_2 | buy
-  2 | val_2 | click
-  2 | val_2 | view
-  2 | val_2 | view
-  3 | val_3 | buy
-  3 | val_3 | buy
-(10 rows)
-
--- D4: UNION ALL + ORDER BY
-SELECT id, val FROM sorted_merge_test WHERE id <= 3
-UNION ALL
-SELECT id, val FROM sorted_merge_test WHERE id BETWEEN 98 AND 100
-ORDER BY id;
- id  |   val
+COMMIT;
+-- K3: Multi-column index
+CREATE INDEX sorted_merge_test_num_id_idx ON sorted_merge_test(num, id);
+SET citus.enable_sorted_merge TO on;
+BEGIN;
+SET LOCAL enable_seqscan TO off;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, num FROM sorted_merge_test ORDER BY num, id');
+                                                                    explain_filter
 ---------------------------------------------------------------------
-   1 | val_1
-   2 | val_2
-   3 | val_3
-  98 | val_98
-  99 | val_99
- 100 | val_100
-(6 rows)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.num
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY num, id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Index Only Scan using sorted_merge_test_num_id_idx_960000 on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+               Output: id, num
+               Heap Fetches: N
+(13 rows)
 
--- D5: DISTINCT + ORDER BY
-SELECT DISTINCT id FROM sorted_merge_test WHERE id <= 10 ORDER BY id;
- id
+COMMIT;
+-- K4: Correctness with multi-column index — GUC off vs on
+BEGIN;
+SET LOCAL enable_seqscan TO off;
+SET LOCAL citus.enable_sorted_merge TO off;
+SELECT id, num FROM sorted_merge_test ORDER BY num, id LIMIT 5;
+ id | num
 ---------------------------------------------------------------------
-  1
-  2
-  3
-  4
-  5
-  6
-  7
-  8
-  9
- 10
-(10 rows)
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
 
--- D6: DISTINCT ON + ORDER BY
-SELECT DISTINCT ON (id) id, val, num
-FROM sorted_merge_test
-WHERE id <= 5
-ORDER BY id, num DESC;
- id |  val  | num
+SET LOCAL citus.enable_sorted_merge TO on;
+SELECT id, num FROM sorted_merge_test ORDER BY num, id LIMIT 5;
+ id | num
 ---------------------------------------------------------------------
-  1 | val_1 | 1.5
-  2 | val_2 | 3.0
-  3 | val_3 | 4.5
-  4 | val_4 | 6.0
-  5 | val_5 | 7.5
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
 (5 rows)
 
--- D7: EXISTS subquery + ORDER BY
-SELECT id, val FROM sorted_merge_test t
-WHERE EXISTS (SELECT 1 FROM sorted_merge_events e WHERE e.id = t.id)
-ORDER BY id LIMIT 5;
- id |  val
+COMMIT;
+-- K5: DESC ordering with index
+SET citus.enable_sorted_merge TO on;
+BEGIN;
+SET LOCAL enable_seqscan TO off;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY id DESC');
+                                                                    explain_filter
 ---------------------------------------------------------------------
-  1 | val_1
-  2 | val_2
-  3 | val_3
-  4 | val_4
-  5 | val_5
-(5 rows)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id DESC
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Index Scan Backward using sorted_merge_test_id_idx_960000 on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+               Output: id, val
+(12 rows)
 
--- D8: IN subquery + ORDER BY
-SELECT id, val FROM sorted_merge_test
-WHERE id IN (SELECT id FROM sorted_merge_events WHERE event_type = 'click')
-ORDER BY id LIMIT 5;
- id |  val
----------------------------------------------------------------------
-  1 | val_1
-  2 | val_2
-  3 | val_3
-  4 | val_4
-  5 | val_5
-(5 rows)
+COMMIT;
+RESET citus.propagate_set_commands;
+DROP INDEX sorted_merge_test_id_idx;
+DROP INDEX sorted_merge_test_num_id_idx;
+-- =================================================================
+-- Category L: Volatile and stable functions in ORDER BY
+-- Tests that ORDER BY with functions works correctly with sorted merge.
+-- Volatile functions (random, clock_timestamp, timeofday) are pushed
+-- to workers as computed columns — sorted merge uses the materialized
+-- worker values, which is semantically equivalent to coordinator Sort.
+-- =================================================================
+-- L1: STABLE function — now() in expression with column
+-- now() returns the same value on all workers within a transaction,
+-- so the merge is globally consistent. Sorted merge should be used.
+SET citus.enable_sorted_merge TO on;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY now() - ts, id');
+                                                                                            explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val, (now() OPERATOR(pg_catalog.-) ts) AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (now() OPERATOR(pg_catalog.-) ts), id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val, ((now() - ts))
+               Sort Key: ((now() - sorted_merge_test.ts)), sorted_merge_test.id
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val, (now() - ts)
+(16 rows)
+
+-- L2: VOLATILE function — random() in ORDER BY
+-- random() is pushed to workers as worker_column_3; each worker sorts
+-- by its own random values. The merge interleaves using materialized
+-- values — semantically equivalent to coordinator Sort on worker_column_3.
+-- Test plan shape only (result is non-deterministic).
+SET citus.enable_sorted_merge TO on;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY random(), id');
+                                                                    explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val, random() AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (random()), id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val, (random())
+               Sort Key: (random()), sorted_merge_test.id
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val, random()
+(16 rows)
+
+-- L3: VOLATILE function — clock_timestamp() in ORDER BY
+-- Same mechanics as random(): pushed to workers, sorted locally, merged.
+SET citus.enable_sorted_merge TO on;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY clock_timestamp(), id');
+                                                                             explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val, clock_timestamp() AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (clock_timestamp()), id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val, (clock_timestamp())
+               Sort Key: (clock_timestamp()), sorted_merge_test.id
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val, clock_timestamp()
+(16 rows)
+
+-- L4: nextval() in ORDER BY with sorted merge ON — expected ERROR
+-- nextval() cannot be pushed to workers (CanPushDownExpression blocks it).
+-- The sort clause references a target entry missing from the worker target
+-- list, causing a plan-time error. This is a pre-existing Citus limitation.
+CREATE SEQUENCE sorted_merge_test_seq;
+SET citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test ORDER BY nextval('sorted_merge_test_seq');
+psql:sql/multi_orderby_pushdown.sql:777: ERROR:  ORDER/GROUP BY expression not found in targetlist
+-- L4b: nextval() in ORDER BY with sorted merge OFF but LIMIT present
+-- Same error — demonstrates this is NOT a sorted merge regression.
+SET citus.enable_sorted_merge TO off;
+SELECT id, val FROM sorted_merge_test ORDER BY nextval('sorted_merge_test_seq') LIMIT 5;
+psql:sql/multi_orderby_pushdown.sql:782: ERROR:  ORDER/GROUP BY expression not found in targetlist
+DROP SEQUENCE sorted_merge_test_seq;
+-- L5: STABLE function alone (constant-fold case)
+-- current_timestamp is constant-folded by the planner; the sort key
+-- effectively becomes just 'id'. Sorted merge should be used.
+SET citus.enable_sorted_merge TO on;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY current_timestamp, id');
+                                                                            explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val, CURRENT_TIMESTAMP AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY CURRENT_TIMESTAMP, id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val, (CURRENT_TIMESTAMP)
+               Sort Key: sorted_merge_test.id
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val, CURRENT_TIMESTAMP
+(16 rows)
 
--- D9: Multiple aggregates, GROUP BY dist_col, ORDER BY dist_col
-SELECT id, count(*), sum(num), avg(num), min(val), max(val)
-FROM sorted_merge_test
-GROUP BY id
-ORDER BY id
-LIMIT 5;
- id | count | sum |          avg           |  min  |  max
+SET citus.enable_sorted_merge TO off;
+-- =================================================================
+-- Cleanup
+-- =================================================================
+SET citus.enable_sorted_merge TO off;
+-- Run 2: streaming adapter
+SET citus.enable_streaming_sorted_merge TO on;
+\i sql/multi_orderby_pushdown.sql
+--
+-- MULTI_SORTED_MERGE
+--
+-- Tests for the citus.enable_sorted_merge GUC and the sorted merge
+-- planner eligibility logic. Verifies that enabling the GUC does not
+-- introduce regressions for any query pattern.
+--
+-- MX verification: this test has been verified to pass with zero diffs
+-- under check-base-mx (MX mode), confirming sorted merge works correctly
+-- when any node in the cluster acts as coordinator.
+--
+-- =================================================================
+-- 1. GUC basics
+-- =================================================================
+SHOW citus.enable_sorted_merge;
+ citus.enable_sorted_merge
 ---------------------------------------------------------------------
-  1 |     1 | 1.5 | 1.50000000000000000000 | val_1 | val_1
-  2 |     1 | 3.0 |     3.0000000000000000 | val_2 | val_2
-  3 |     1 | 4.5 |     4.5000000000000000 | val_3 | val_3
-  4 |     1 | 6.0 |     6.0000000000000000 | val_4 | val_4
-  5 |     1 | 7.5 |     7.5000000000000000 | val_5 | val_5
-(5 rows)
+ off
+(1 row)
 
--- D10: CASE expression in SELECT + ORDER BY
-SELECT id,
-       CASE WHEN num > 75 THEN 'high' WHEN num > 25 THEN 'mid' ELSE 'low' END as bucket
-FROM sorted_merge_test
-WHERE num IS NOT NULL
-ORDER BY id
-LIMIT 10;
- id | bucket
+SET citus.enable_sorted_merge TO on;
+SHOW citus.enable_sorted_merge;
+ citus.enable_sorted_merge
 ---------------------------------------------------------------------
-  1 | low
-  2 | low
-  3 | low
-  4 | low
-  5 | low
-  6 | low
-  7 | low
-  8 | low
-  9 | low
- 10 | low
-(10 rows)
+ on
+(1 row)
 
--- D11: NULL values ordering
-SELECT id, num FROM sorted_merge_test ORDER BY num NULLS FIRST, id LIMIT 5;
- id  | num
+SET citus.enable_sorted_merge TO off;
+-- =================================================================
+-- Category A: Eligibility — sort IS pushed to workers
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- A1: ORDER BY distribution column
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY id');
+                                               explain_filter
 ---------------------------------------------------------------------
- 101 |
- 102 |
-   1 | 1.5
-   2 | 3.0
-   3 | 4.5
-(5 rows)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val
+               Sort Key: sorted_merge_test.id
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val
+(16 rows)
 
-SELECT id, num FROM sorted_merge_test ORDER BY num NULLS LAST, id LIMIT 5;
- id | num
+-- A2: ORDER BY DESC
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id FROM sorted_merge_test ORDER BY id DESC');
+                                               explain_filter
 ---------------------------------------------------------------------
-  1 | 1.5
-  2 | 3.0
-  3 | 4.5
-  4 | 6.0
-  5 | 7.5
-(5 rows)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id DESC
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id
+               Sort Key: sorted_merge_test.id DESC
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id
+(16 rows)
 
-SELECT id, num FROM sorted_merge_test ORDER BY num DESC NULLS FIRST, id LIMIT 5;
- id  |  num
+-- A3: ORDER BY DESC NULLS LAST
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, num FROM sorted_merge_test ORDER BY num DESC NULLS LAST');
+                                                        explain_filter
 ---------------------------------------------------------------------
- 101 |
- 102 |
- 100 | 150.0
-  99 | 148.5
-  98 | 147.0
-(5 rows)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.num
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY num DESC NULLS LAST
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, num
+               Sort Key: sorted_merge_test.num DESC NULLS LAST
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, num
+(16 rows)
 
-SELECT id, num FROM sorted_merge_test ORDER BY num DESC NULLS LAST, id DESC LIMIT 5;
- id  |  num
+-- A4: ORDER BY non-distribution column
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY val');
+                                                explain_filter
 ---------------------------------------------------------------------
- 100 | 150.0
-  99 | 148.5
-  98 | 147.0
-  97 | 145.5
-  96 | 144.0
-(5 rows)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY val
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val
+               Sort Key: sorted_merge_test.val
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val
+(16 rows)
 
--- D12: Large OFFSET
-SELECT id FROM sorted_merge_test ORDER BY id OFFSET 100 LIMIT 5;
- id
+-- A5: Multi-column ORDER BY
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY id, val');
+                                                  explain_filter
 ---------------------------------------------------------------------
- 101
- 102
- 200
- 201
- 202
-(5 rows)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id, val
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val
+               Sort Key: sorted_merge_test.id, sorted_merge_test.val
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val
+(16 rows)
 
--- D13: ORDER BY ordinal position
-SELECT id, val FROM sorted_merge_test ORDER BY 2, 1 LIMIT 5;
- id  |  val
+-- A6: Mixed directions
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val, num FROM sorted_merge_test ORDER BY id ASC, num DESC');
+                                                       explain_filter
 ---------------------------------------------------------------------
- 200 | dup_a
- 201 | dup_b
- 202 | dup_c
-   1 | val_1
-  10 | val_10
-(5 rows)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.num
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id, num DESC
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val, num
+               Sort Key: sorted_merge_test.id, sorted_merge_test.num DESC
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val, num
+(16 rows)
 
--- =================================================================
--- Category E: Edge cases
--- =================================================================
-SET citus.enable_sorted_merge TO on;
--- E1: Empty result set
-SELECT id FROM sorted_merge_test WHERE id < 0 ORDER BY id;
- id
+-- A7: GROUP BY dist_col ORDER BY dist_col
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id');
+                                                            explain_filter
 ---------------------------------------------------------------------
-(0 rows)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.count
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  GroupAggregate (actual rows=N loops=N)
+               Output: id, count(*)
+               Group Key: sorted_merge_test.id
+               ->  Sort (actual rows=N loops=N)
+                     Output: id
+                     Sort Key: sorted_merge_test.id
+                     Sort Method: quicksort  Memory: NkB
+                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                           Output: id
+(19 rows)
 
--- E2: Single row (may go through router planner)
-SELECT id, val FROM sorted_merge_test WHERE id = 42 ORDER BY id;
- id |  val
+-- A8: WHERE clause + ORDER BY
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test WHERE num > 50 ORDER BY id');
+                                                                  explain_filter
 ---------------------------------------------------------------------
- 42 | val_42
-(1 row)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE (num OPERATOR(pg_catalog.>) 'N'::numeric) ORDER BY id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val
+               Sort Key: sorted_merge_test.id
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val
+                     Filter: (sorted_merge_test.num > 'N'::numeric)
+                     Rows Removed by Filter: N
+(18 rows)
 
--- E3: All rows with same sort value
-SELECT id, num FROM sorted_merge_test WHERE num = 10.5 ORDER BY num, id;
- id  | num
+-- A9: Expression in ORDER BY (non-aggregate)
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, num FROM sorted_merge_test ORDER BY id + 1');
+                                                                                      explain_filter
 ---------------------------------------------------------------------
-   7 | 10.5
- 200 | 10.5
- 201 | 10.5
- 202 | 10.5
-(4 rows)
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.num, remote_scan.worker_column_3
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, num, (id OPERATOR(pg_catalog.+) N) AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (id OPERATOR(pg_catalog.+) N)
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, num, ((id + N))
+               Sort Key: ((sorted_merge_test.id + N))
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, num, (id + N)
+(16 rows)
 
--- E4: Wide sort key (4 columns)
-SELECT id, val, num FROM sorted_merge_test
-WHERE id <= 5
-ORDER BY num, val, id
-LIMIT 5;
- id |  val  | num
+-- A10: ORDER BY with LIMIT (existing pushdown, verify no regression)
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5');
+                                                         explain_filter
 ---------------------------------------------------------------------
-  1 | val_1 | 1.5
-  2 | val_2 | 3.0
-  3 | val_3 | 4.5
-  4 | val_4 | 6.0
-  5 | val_5 | 7.5
-(5 rows)
-
--- E5: Zero-task defensive path
--- CreatePerTaskDispatchDest handles taskCount=0 gracefully (returns a no-op
--- destination). This cannot be triggered via normal SQL because distributed
--- tables always have at least one shard. The closest we can test is an
--- empty-result query through the sorted merge path to verify no crash.
-SELECT id FROM sorted_merge_test WHERE false ORDER BY id;
- id
----------------------------------------------------------------------
-(0 rows)
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
+         ->  Task
+               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
+                     Output: id
+                     ->  Sort (actual rows=N loops=N)
+                           Output: id
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: top-N heapsort  Memory: NkB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                 Output: id
+(20 rows)
 
 -- =================================================================
--- Category F: Existing LIMIT pushdown stability
+-- Category B: Ineligibility — sort NOT pushed for merge
 -- =================================================================
--- F1: Simple LIMIT + ORDER BY: plan unchanged between GUC off and on
-SET citus.enable_sorted_merge TO off;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
-                                                              QUERY PLAN
----------------------------------------------------------------------
- Limit (actual rows=5 loops=1)
-   Output: remote_scan.id
-   ->  Sort (actual rows=5 loops=1)
-         Output: remote_scan.id
-         Sort Key: remote_scan.id
-         Sort Method: top-N heapsort  Memory: 25kB
-         ->  Custom Scan (Citus Adaptive) (actual rows=20 loops=1)
-               Output: remote_scan.id
-               Task Count: 4
-               Tuple data received from nodes: 80 bytes
-               Tasks Shown: One of 4
-               ->  Task
-                     Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
-                     Tuple data received from node: 20 bytes
-                     Node: host=localhost port=xxxxx dbname=regression
-                     ->  Limit (actual rows=5 loops=1)
-                           Output: id
-                           ->  Sort (actual rows=5 loops=1)
-                                 Output: id
-                                 Sort Key: sorted_merge_test.id
-                                 Sort Method: top-N heapsort  Memory: 25kB
-                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                       Output: id
-(23 rows)
-
 SET citus.enable_sorted_merge TO on;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
-                                                           QUERY PLAN
----------------------------------------------------------------------
- Limit (actual rows=5 loops=1)
-   Output: remote_scan.id
-   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
-         Output: remote_scan.id
-         Task Count: 4
-         Tuple data received from nodes: 80 bytes
-         Tasks Shown: One of 4
-         ->  Task
-               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
-               Tuple data received from node: 20 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Limit (actual rows=5 loops=1)
-                     Output: id
-                     ->  Sort (actual rows=5 loops=1)
-                           Output: id
-                           Sort Key: sorted_merge_test.id
-                           Sort Method: top-N heapsort  Memory: 25kB
-                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                 Output: id
-(19 rows)
-
--- F2: GROUP BY dist_col + ORDER BY + LIMIT
-SET citus.enable_sorted_merge TO off;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
-                                                                             QUERY PLAN
+-- B1: ORDER BY count(*)
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*)');
+                                                         explain_filter
 ---------------------------------------------------------------------
- Limit (actual rows=5 loops=1)
+ Sort (actual rows=N loops=N)
    Output: remote_scan.id, remote_scan.count
-   ->  Sort (actual rows=5 loops=1)
+   Sort Key: remote_scan.count
+   Sort Method: quicksort  Memory: NkB
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
          Output: remote_scan.id, remote_scan.count
-         Sort Key: remote_scan.id
-         Sort Method: top-N heapsort  Memory: 25kB
-         ->  Custom Scan (Citus Adaptive) (actual rows=20 loops=1)
-               Output: remote_scan.id, remote_scan.count
-               Task Count: 4
-               Tuple data received from nodes: 240 bytes
-               Tasks Shown: One of 4
-               ->  Task
-                     Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id LIMIT '5'::bigint
-                     Tuple data received from node: 60 bytes
-                     Node: host=localhost port=xxxxx dbname=regression
-                     ->  Limit (actual rows=5 loops=1)
-                           Output: id, (count(*))
-                           ->  Sort (actual rows=5 loops=1)
-                                 Output: id, (count(*))
-                                 Sort Key: sorted_merge_test.id
-                                 Sort Method: top-N heapsort  Memory: 25kB
-                                 ->  HashAggregate (actual rows=26 loops=1)
-                                       Output: id, count(*)
-                                       Group Key: sorted_merge_test.id
-                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                             Output: id, val, num, ts
-(27 rows)
+         Task Count: N
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
+         ->  Task
+               Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  HashAggregate (actual rows=N loops=N)
+                     Output: id, count(*)
+                     Group Key: sorted_merge_test.id
+                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                           Output: id, val, num, ts
+(19 rows)
 
-SET citus.enable_sorted_merge TO on;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
-                                                                          QUERY PLAN
+-- B2: ORDER BY avg(col)
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, avg(num) FROM sorted_merge_test GROUP BY id ORDER BY avg(num)');
+                                                        explain_filter
 ---------------------------------------------------------------------
- Limit (actual rows=5 loops=1)
-   Output: remote_scan.id, remote_scan.count
-   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
-         Output: remote_scan.id, remote_scan.count
-         Task Count: 4
-         Tuple data received from nodes: 240 bytes
-         Tasks Shown: One of 4
+ Sort (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.avg
+   Sort Key: remote_scan.avg
+   Sort Method: quicksort  Memory: NkB
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.avg
+         Task Count: N
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
          ->  Task
-               Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id LIMIT '5'::bigint
-               Tuple data received from node: 60 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Limit (actual rows=5 loops=1)
-                     Output: id, (count(*))
-                     ->  Sort (actual rows=5 loops=1)
-                           Output: id, (count(*))
-                           Sort Key: sorted_merge_test.id
-                           Sort Method: top-N heapsort  Memory: 25kB
-                           ->  HashAggregate (actual rows=26 loops=1)
-                                 Output: id, count(*)
-                                 Group Key: sorted_merge_test.id
-                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                       Output: id, val, num, ts
-(23 rows)
+               Query: SELECT id, avg(num) AS avg FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  HashAggregate (actual rows=N loops=N)
+                     Output: id, avg(num)
+                     Group Key: sorted_merge_test.id
+                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                           Output: id, val, num, ts
+(19 rows)
 
--- F3: ORDER BY aggregate + LIMIT (not eligible for merge)
-SET citus.enable_sorted_merge TO off;
-SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 5;
- id | count
+-- B3: GROUP BY non-dist col, ORDER BY non-dist col
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT val, count(*) FROM sorted_merge_test GROUP BY val ORDER BY val');
+                                                             explain_filter
 ---------------------------------------------------------------------
-  1 |     1
-  2 |     1
-  3 |     1
-  4 |     1
-  5 |     1
-(5 rows)
+ Sort (actual rows=N loops=N)
+   Output: remote_scan.val, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, 'N'::bigint))
+   Sort Key: remote_scan.val
+   Sort Method: quicksort  Memory: NkB
+   ->  HashAggregate (actual rows=N loops=N)
+         Output: remote_scan.val, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, 'N'::bigint)
+         Group Key: remote_scan.val
+         ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+               Output: remote_scan.val, remote_scan.count
+               Task Count: N
+               Tuple data received from nodes: N bytes
+               Tasks Shown: One of N
+               ->  Task
+                     Query: SELECT val, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY val
+                     Tuple data received from node: N bytes
+                     Node: host=localhost port=N dbname=regression
+                     ->  HashAggregate (actual rows=N loops=N)
+                           Output: val, count(*)
+                           Group Key: sorted_merge_test.val
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                 Output: id, val, num, ts
+(23 rows)
 
-SET citus.enable_sorted_merge TO on;
-SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 5;
- id | count
----------------------------------------------------------------------
-  1 |     1
-  2 |     1
-  3 |     1
-  4 |     1
-  5 |     1
-(5 rows)
+-- B4: GROUP BY non-dist col, ORDER BY aggregate
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT val, count(*) FROM sorted_merge_test GROUP BY val ORDER BY count(*)');
+                                                             explain_filter
+---------------------------------------------------------------------
+ Sort (actual rows=N loops=N)
+   Output: remote_scan.val, (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, 'N'::bigint))
+   Sort Key: (COALESCE((pg_catalog.sum(remote_scan.count))::bigint, 'N'::bigint))
+   Sort Method: quicksort  Memory: NkB
+   ->  HashAggregate (actual rows=N loops=N)
+         Output: remote_scan.val, COALESCE((pg_catalog.sum(remote_scan.count))::bigint, 'N'::bigint)
+         Group Key: remote_scan.val
+         ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+               Output: remote_scan.val, remote_scan.count
+               Task Count: N
+               Tuple data received from nodes: N bytes
+               Tasks Shown: One of N
+               ->  Task
+                     Query: SELECT val, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY val
+                     Tuple data received from node: N bytes
+                     Node: host=localhost port=N dbname=regression
+                     ->  HashAggregate (actual rows=N loops=N)
+                           Output: val, count(*)
+                           Group Key: sorted_merge_test.val
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                 Output: id, val, num, ts
+(23 rows)
 
 -- =================================================================
--- Category G: Phase 4 — Sort elision and advanced scenarios
+-- Category C: Correctness — results match GUC off vs on
 -- =================================================================
--- G1: Sort elision verification — coordinator Sort node absent
+-- C1: Simple ORDER BY
 SET citus.enable_sorted_merge TO off;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val FROM sorted_merge_test ORDER BY id;
-                                                 QUERY PLAN
----------------------------------------------------------------------
- Sort (actual rows=105 loops=1)
-   Output: remote_scan.id, remote_scan.val
-   Sort Key: remote_scan.id
-   Sort Method: quicksort  Memory: 28kB
-   ->  Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
-         Output: remote_scan.id, remote_scan.val
-         Task Count: 4
-         Tuple data received from nodes: 1027 bytes
-         Tasks Shown: One of 4
-         ->  Task
-               Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true
-               Tuple data received from node: 255 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                     Output: id, val
-(15 rows)
-
-SET citus.enable_sorted_merge TO on;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val FROM sorted_merge_test ORDER BY id;
-                                                 QUERY PLAN
----------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=105 loops=1)
-   Output: remote_scan.id, remote_scan.val
-   Task Count: 4
-   Tuple data received from nodes: 1027 bytes
-   Tasks Shown: One of 4
-   ->  Task
-         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id
-         Tuple data received from node: 255 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Sort (actual rows=26 loops=1)
-               Output: id, val
-               Sort Key: sorted_merge_test.id
-               Sort Method: quicksort  Memory: 25kB
-               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                     Output: id, val
-(15 rows)
-
--- G2a: PREPARE with merge ON, EXECUTE after turning OFF
--- Plan-time decision is baked in — cached plan must still merge correctly
-SET citus.enable_sorted_merge TO on;
-PREPARE merge_on_stmt AS SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
-EXECUTE merge_on_stmt;
+SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
  id |  val
 ---------------------------------------------------------------------
   1 | val_1
@@ -2981,8 +3397,8 @@ EXECUTE merge_on_stmt;
  10 | val_10
 (10 rows)
 
-SET citus.enable_sorted_merge TO off;
-EXECUTE merge_on_stmt;
+SET citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
  id |  val
 ---------------------------------------------------------------------
   1 | val_1
@@ -2997,133 +3413,236 @@ EXECUTE merge_on_stmt;
  10 | val_10
 (10 rows)
 
-DEALLOCATE merge_on_stmt;
--- G2b: PREPARE with merge OFF, EXECUTE after turning ON
--- Cached plan has Sort node — must still return sorted results
+-- C2: ORDER BY DESC
 SET citus.enable_sorted_merge TO off;
-PREPARE merge_off_stmt AS SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
-EXECUTE merge_off_stmt;
- id |  val
+SELECT id FROM sorted_merge_test ORDER BY id DESC LIMIT 5;
+ id
 ---------------------------------------------------------------------
-  1 | val_1
-  2 | val_2
-  3 | val_3
-  4 | val_4
-  5 | val_5
-  6 | val_6
-  7 | val_7
-  8 | val_8
-  9 | val_9
- 10 | val_10
-(10 rows)
+ 202
+ 201
+ 200
+ 102
+ 101
+(5 rows)
 
 SET citus.enable_sorted_merge TO on;
-EXECUTE merge_off_stmt;
- id |  val
+SELECT id FROM sorted_merge_test ORDER BY id DESC LIMIT 5;
+ id
 ---------------------------------------------------------------------
-  1 | val_1
-  2 | val_2
-  3 | val_3
-  4 | val_4
-  5 | val_5
-  6 | val_6
-  7 | val_7
-  8 | val_8
-  9 | val_9
- 10 | val_10
-(10 rows)
+ 202
+ 201
+ 200
+ 102
+ 101
+(5 rows)
 
-DEALLOCATE merge_off_stmt;
--- G3: Cursor with backward scan
-SET citus.enable_sorted_merge TO on;
-BEGIN;
-DECLARE sorted_cursor CURSOR FOR SELECT id FROM sorted_merge_test ORDER BY id;
-FETCH 3 FROM sorted_cursor;
- id
+-- C3: Multi-column ORDER BY
+SET citus.enable_sorted_merge TO off;
+SELECT id, num FROM sorted_merge_test ORDER BY num, id LIMIT 5;
+ id | num
 ---------------------------------------------------------------------
-  1
-  2
-  3
-(3 rows)
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
 
-FETCH BACKWARD 1 FROM sorted_cursor;
-psql:sql/multi_orderby_pushdown.sql:319: ERROR:  cursor can only scan forward
-HINT:  Declare it with SCROLL option to enable backward scan.
-FETCH 2 FROM sorted_cursor;
-psql:sql/multi_orderby_pushdown.sql:320: ERROR:  current transaction is aborted, commands ignored until end of transaction block
-CLOSE sorted_cursor;
-psql:sql/multi_orderby_pushdown.sql:321: ERROR:  current transaction is aborted, commands ignored until end of transaction block
-COMMIT;
--- G3b: SCROLL cursor with backward scan
 SET citus.enable_sorted_merge TO on;
-BEGIN;
-DECLARE sorted_scroll_cursor SCROLL CURSOR FOR SELECT id FROM sorted_merge_test ORDER BY id;
-FETCH 3 FROM sorted_scroll_cursor;
- id
+SELECT id, num FROM sorted_merge_test ORDER BY num, id LIMIT 5;
+ id | num
 ---------------------------------------------------------------------
-  1
-  2
-  3
-(3 rows)
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
 
-FETCH BACKWARD 1 FROM sorted_scroll_cursor;
- id
+-- C4: ORDER BY non-distribution column
+SET citus.enable_sorted_merge TO off;
+SELECT id, val FROM sorted_merge_test WHERE val IS NOT NULL ORDER BY val LIMIT 5;
+ id  |  val
 ---------------------------------------------------------------------
-  2
-(1 row)
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+   1 | val_1
+  10 | val_10
+(5 rows)
 
-FETCH 2 FROM sorted_scroll_cursor;
- id
+SET citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test WHERE val IS NOT NULL ORDER BY val LIMIT 5;
+ id  |  val
 ---------------------------------------------------------------------
-  3
-  4
-(2 rows)
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+   1 | val_1
+  10 | val_10
+(5 rows)
 
-CLOSE sorted_scroll_cursor;
-COMMIT;
--- G4: EXPLAIN ANALYZE (sorted merge skipped for EXPLAIN ANALYZE)
-SET citus.enable_sorted_merge TO on;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5;
-                                                           QUERY PLAN
+-- C5: GROUP BY dist_col ORDER BY dist_col
+SET citus.enable_sorted_merge TO off;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+ id | count
 ---------------------------------------------------------------------
- Limit (actual rows=5 loops=1)
-   Output: remote_scan.id
-   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
-         Output: remote_scan.id
-         Task Count: 4
-         Tuple data received from nodes: 80 bytes
-         Tasks Shown: One of 4
-         ->  Task
-               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '5'::bigint
-               Tuple data received from node: 20 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Limit (actual rows=5 loops=1)
-                     Output: id
-                     ->  Sort (actual rows=5 loops=1)
-                           Output: id
-                           Sort Key: sorted_merge_test.id
-                           Sort Method: top-N heapsort  Memory: 25kB
-                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                 Output: id
-(19 rows)
+  1 |     1
+  2 |     1
+  3 |     1
+  4 |     1
+  5 |     1
+(5 rows)
 
--- G5: ORDER BY aggregate + LIMIT — crash regression test
--- Previously caused SIGSEGV when sorted merge was enabled because
--- aggregate ORDER BY was erroneously tagged as merge-eligible.
 SET citus.enable_sorted_merge TO on;
-SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 3;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
  id | count
 ---------------------------------------------------------------------
   1 |     1
   2 |     1
   3 |     1
-(3 rows)
+  4 |     1
+  5 |     1
+(5 rows)
+
+-- C6: Mixed directions
+SET citus.enable_sorted_merge TO off;
+SELECT id, num FROM sorted_merge_test WHERE num IS NOT NULL ORDER BY id ASC, num DESC LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
 
--- G6: Small work_mem with many tasks (32 shards)
 SET citus.enable_sorted_merge TO on;
-SET work_mem TO '64kB';
-SELECT id FROM sorted_merge_test ORDER BY id LIMIT 10;
+SELECT id, num FROM sorted_merge_test WHERE num IS NOT NULL ORDER BY id ASC, num DESC LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
+
+-- C7: WHERE + ORDER BY
+SET citus.enable_sorted_merge TO off;
+SELECT id, val FROM sorted_merge_test WHERE num > 100 ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+ 67 | val_67
+ 68 | val_68
+ 69 | val_69
+ 70 | val_70
+ 71 | val_71
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test WHERE num > 100 ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+ 67 | val_67
+ 68 | val_68
+ 69 | val_69
+ 70 | val_70
+ 71 | val_71
+(5 rows)
+
+-- C8: Aggregates in SELECT, ORDER BY on dist_col (GROUP BY dist_col)
+SET citus.enable_sorted_merge TO off;
+SELECT id, count(*), sum(num), avg(num) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+ id | count | sum |          avg
+---------------------------------------------------------------------
+  1 |     1 | 1.5 | 1.50000000000000000000
+  2 |     1 | 3.0 |     3.0000000000000000
+  3 |     1 | 4.5 |     4.5000000000000000
+  4 |     1 | 6.0 |     6.0000000000000000
+  5 |     1 | 7.5 |     7.5000000000000000
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, count(*), sum(num), avg(num) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+ id | count | sum |          avg
+---------------------------------------------------------------------
+  1 |     1 | 1.5 | 1.50000000000000000000
+  2 |     1 | 3.0 |     3.0000000000000000
+  3 |     1 | 4.5 |     4.5000000000000000
+  4 |     1 | 6.0 |     6.0000000000000000
+  5 |     1 | 7.5 |     7.5000000000000000
+(5 rows)
+
+-- =================================================================
+-- Category D: Complex queries — regression guards
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- D1: Subquery in FROM with ORDER BY
+SELECT * FROM (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 5
+) sub ORDER BY id;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- D2: CTE with ORDER BY
+WITH top5 AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 5
+)
+SELECT * FROM top5 ORDER BY id;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- D3: Co-located JOIN + ORDER BY
+SELECT t.id, t.val, e.event_type
+FROM sorted_merge_test t
+JOIN sorted_merge_events e ON t.id = e.id
+WHERE t.id <= 5
+ORDER BY t.id, e.event_type
+LIMIT 10;
+ id |  val  | event_type
+---------------------------------------------------------------------
+  1 | val_1 | buy
+  1 | val_1 | buy
+  1 | val_1 | click
+  1 | val_1 | view
+  2 | val_2 | buy
+  2 | val_2 | click
+  2 | val_2 | view
+  2 | val_2 | view
+  3 | val_3 | buy
+  3 | val_3 | buy
+(10 rows)
+
+-- D4: UNION ALL + ORDER BY
+SELECT id, val FROM sorted_merge_test WHERE id <= 3
+UNION ALL
+SELECT id, val FROM sorted_merge_test WHERE id BETWEEN 98 AND 100
+ORDER BY id;
+ id  |   val
+---------------------------------------------------------------------
+   1 | val_1
+   2 | val_2
+   3 | val_3
+  98 | val_98
+  99 | val_99
+ 100 | val_100
+(6 rows)
+
+-- D5: DISTINCT + ORDER BY
+SELECT DISTINCT id FROM sorted_merge_test WHERE id <= 10 ORDER BY id;
  id
 ---------------------------------------------------------------------
   1
@@ -3138,12 +3657,24 @@ SELECT id FROM sorted_merge_test ORDER BY id LIMIT 10;
  10
 (10 rows)
 
-RESET work_mem;
--- G7: max_intermediate_result_size with CTE subplan
-SET citus.enable_sorted_merge TO on;
-SET citus.max_intermediate_result_size TO '4kB';
-WITH cte AS (SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 50)
-SELECT * FROM cte ORDER BY id LIMIT 5;
+-- D6: DISTINCT ON + ORDER BY
+SELECT DISTINCT ON (id) id, val, num
+FROM sorted_merge_test
+WHERE id <= 5
+ORDER BY id, num DESC;
+ id |  val  | num
+---------------------------------------------------------------------
+  1 | val_1 | 1.5
+  2 | val_2 | 3.0
+  3 | val_3 | 4.5
+  4 | val_4 | 6.0
+  5 | val_5 | 7.5
+(5 rows)
+
+-- D7: EXISTS subquery + ORDER BY
+SELECT id, val FROM sorted_merge_test t
+WHERE EXISTS (SELECT 1 FROM sorted_merge_events e WHERE e.id = t.id)
+ORDER BY id LIMIT 5;
  id |  val
 ---------------------------------------------------------------------
   1 | val_1
@@ -3153,17 +3684,10 @@ SELECT * FROM cte ORDER BY id LIMIT 5;
   5 | val_5
 (5 rows)
 
-RESET citus.max_intermediate_result_size;
--- =================================================================
--- Category H: Subplan + Sorted Merge interactions
--- =================================================================
-SET citus.enable_sorted_merge TO on;
--- H1: CTE subplan with simple ORDER BY — eligible for sorted merge
--- The CTE becomes a subplan; its DistributedPlan may have useSortedMerge=true
-WITH ordered_cte AS (
-    SELECT id, val FROM sorted_merge_test ORDER BY id
-)
-SELECT * FROM ordered_cte ORDER BY id LIMIT 5;
+-- D8: IN subquery + ORDER BY
+SELECT id, val FROM sorted_merge_test
+WHERE id IN (SELECT id FROM sorted_merge_events WHERE event_type = 'click')
+ORDER BY id LIMIT 5;
  id |  val
 ---------------------------------------------------------------------
   1 | val_1
@@ -3173,622 +3697,2256 @@ SELECT * FROM ordered_cte ORDER BY id LIMIT 5;
   5 | val_5
 (5 rows)
 
--- H2: Multiple CTEs — one eligible (ORDER BY col), one ineligible (ORDER BY agg)
-WITH eligible_cte AS (
-    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
-),
-ineligible_cte AS (
-    SELECT id, count(*) as cnt FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 15
-)
-SELECT e.id, e.val, i.cnt
-FROM eligible_cte e JOIN ineligible_cte i ON e.id = i.id
-ORDER BY e.id;
- id |  val   | cnt
+-- D9: Multiple aggregates, GROUP BY dist_col, ORDER BY dist_col
+SELECT id, count(*), sum(num), avg(num), min(val), max(val)
+FROM sorted_merge_test
+GROUP BY id
+ORDER BY id
+LIMIT 5;
+ id | count | sum |          avg           |  min  |  max
 ---------------------------------------------------------------------
-  1 | val_1  |   1
-  2 | val_2  |   1
-  3 | val_3  |   1
-  4 | val_4  |   1
-  5 | val_5  |   1
-  6 | val_6  |   1
-  7 | val_7  |   1
-  8 | val_8  |   1
-  9 | val_9  |   1
- 10 | val_10 |   1
- 11 | val_11 |   1
- 12 | val_12 |   1
- 13 | val_13 |   1
- 14 | val_14 |   1
- 15 | val_15 |   1
-(15 rows)
+  1 |     1 | 1.5 | 1.50000000000000000000 | val_1 | val_1
+  2 |     1 | 3.0 |     3.0000000000000000 | val_2 | val_2
+  3 |     1 | 4.5 |     4.5000000000000000 | val_3 | val_3
+  4 |     1 | 6.0 |     6.0000000000000000 | val_4 | val_4
+  5 |     1 | 7.5 |     7.5000000000000000 | val_5 | val_5
+(5 rows)
 
--- H3: CTE subplan feeding outer ORDER BY — both levels may merge independently
-WITH top_ids AS (
-    SELECT id FROM sorted_merge_test ORDER BY id LIMIT 20
-)
-SELECT t.id, t.val
-FROM sorted_merge_test t
-JOIN top_ids ON t.id = top_ids.id
-ORDER BY t.id
+-- D10: CASE expression in SELECT + ORDER BY
+SELECT id,
+       CASE WHEN num > 75 THEN 'high' WHEN num > 25 THEN 'mid' ELSE 'low' END as bucket
+FROM sorted_merge_test
+WHERE num IS NOT NULL
+ORDER BY id
 LIMIT 10;
- id |  val
+ id | bucket
 ---------------------------------------------------------------------
-  1 | val_1
-  2 | val_2
-  3 | val_3
-  4 | val_4
-  5 | val_5
-  6 | val_6
-  7 | val_7
-  8 | val_8
-  9 | val_9
- 10 | val_10
+  1 | low
+  2 | low
+  3 | low
+  4 | low
+  5 | low
+  6 | low
+  7 | low
+  8 | low
+  9 | low
+ 10 | low
 (10 rows)
 
--- H4: Subquery in WHERE with ORDER BY + LIMIT — becomes subplan with merge
-SELECT id, val FROM sorted_merge_test
-WHERE id IN (
-    SELECT id FROM sorted_merge_events ORDER BY id LIMIT 10
-)
-ORDER BY id
-LIMIT 5;
- id |  val
+-- D11: NULL values ordering
+SELECT id, num FROM sorted_merge_test ORDER BY num NULLS FIRST, id LIMIT 5;
+ id  | num
 ---------------------------------------------------------------------
-  1 | val_1
-  2 | val_2
-  3 | val_3
-(3 rows)
+ 101 |
+ 102 |
+   1 | 1.5
+   2 | 3.0
+   3 | 4.5
+(5 rows)
 
--- H5: CTE subplan with max_intermediate_result_size enforcement
--- Tests that EnsureIntermediateSizeLimitNotExceeded works through per-task dispatch
-SET citus.max_intermediate_result_size TO '4kB';
-WITH small_cte AS (
-    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
-)
-SELECT * FROM small_cte ORDER BY id LIMIT 5;
- id |  val
+SELECT id, num FROM sorted_merge_test ORDER BY num NULLS LAST, id LIMIT 5;
+ id | num
 ---------------------------------------------------------------------
-  1 | val_1
-  2 | val_2
-  3 | val_3
-  4 | val_4
-  5 | val_5
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
 (5 rows)
 
-RESET citus.max_intermediate_result_size;
--- H6: Cross-join subplan with non-aggregate ORDER BY (crash regression variant)
--- Similar pattern to subquery_complex_target_list but without aggregate ORDER BY
-SELECT foo.id, bar.id as bar_id
-FROM
-    (SELECT id FROM sorted_merge_test ORDER BY id LIMIT 3) as foo,
-    (SELECT id FROM sorted_merge_events ORDER BY id LIMIT 3) as bar
-ORDER BY foo.id, bar.id
-LIMIT 5;
- id | bar_id
+SELECT id, num FROM sorted_merge_test ORDER BY num DESC NULLS FIRST, id LIMIT 5;
+ id  |  num
 ---------------------------------------------------------------------
-  1 |      1
-  1 |      1
-  1 |      1
-  2 |      1
-  2 |      1
+ 101 |
+ 102 |
+ 100 | 150.0
+  99 | 148.5
+  98 | 147.0
 (5 rows)
 
--- H7: CTE correctness comparison — GUC off vs on must produce identical results
-SET citus.enable_sorted_merge TO off;
-WITH cte AS (
-    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
-)
-SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
- id |  val   | num
+SELECT id, num FROM sorted_merge_test ORDER BY num DESC NULLS LAST, id DESC LIMIT 5;
+ id  |  num
 ---------------------------------------------------------------------
-  7 | val_7  | 10.5
-  8 | val_8  | 12.0
-  9 | val_9  | 13.5
- 10 | val_10 | 15.0
- 11 | val_11 | 16.5
+ 100 | 150.0
+  99 | 148.5
+  98 | 147.0
+  97 | 145.5
+  96 | 144.0
 (5 rows)
 
-SET citus.enable_sorted_merge TO on;
-WITH cte AS (
-    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
-)
-SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
- id |  val   | num
+-- D12: Large OFFSET
+SELECT id FROM sorted_merge_test ORDER BY id OFFSET 100 LIMIT 5;
+ id
 ---------------------------------------------------------------------
-  7 | val_7  | 10.5
-  8 | val_8  | 12.0
-  9 | val_9  | 13.5
- 10 | val_10 | 15.0
- 11 | val_11 | 16.5
+ 101
+ 102
+ 200
+ 201
+ 202
+(5 rows)
+
+-- D13: ORDER BY ordinal position
+SELECT id, val FROM sorted_merge_test ORDER BY 2, 1 LIMIT 5;
+ id  |  val
+---------------------------------------------------------------------
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+   1 | val_1
+  10 | val_10
 (5 rows)
 
 -- =================================================================
--- Category H EXPLAIN: Query plans for subplan + sorted merge
+-- Category E: Edge cases
 -- =================================================================
 SET citus.enable_sorted_merge TO on;
--- H1 EXPLAIN
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-WITH ordered_cte AS (
-    SELECT id, val FROM sorted_merge_test ORDER BY id
-)
-SELECT * FROM ordered_cte ORDER BY id LIMIT 5;
-                                                                                                                                                                                     QUERY PLAN
+-- E1: Empty result set
+SELECT id FROM sorted_merge_test WHERE id < 0 ORDER BY id;
+ id
 ---------------------------------------------------------------------
- Limit (actual rows=5 loops=1)
-   Output: remote_scan.id, remote_scan.val
-   ->  Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
-         Output: remote_scan.id, remote_scan.val
-         Task Count: 4
-         Tuple data received from nodes: 191 bytes
-         Tasks Shown: One of 4
-         ->  Task
-               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT ordered_cte.id AS worker_column_1, ordered_cte.val AS worker_column_2 FROM (SELECT sorted_merge_test.id, sorted_merge_test.val FROM public.sorted_merge_test_960000 sorted_merge_test ORDER BY sorted_merge_test.id) ordered_cte) worker_subquery ORDER BY worker_column_1 LIMIT '5'::bigint
-               Tuple data received from node: 47 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Limit (actual rows=5 loops=1)
-                     Output: sorted_merge_test.id, sorted_merge_test.val
-                     ->  Sort (actual rows=5 loops=1)
-                           Output: sorted_merge_test.id, sorted_merge_test.val
-                           Sort Key: sorted_merge_test.id
-                           Sort Method: top-N heapsort  Memory: 25kB
-                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                 Output: sorted_merge_test.id, sorted_merge_test.val
-(19 rows)
+(0 rows)
 
--- H2 EXPLAIN
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-WITH eligible_cte AS (
-    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
-),
-ineligible_cte AS (
-    SELECT id, count(*) as cnt FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 15
-)
-SELECT e.id, e.val, i.cnt
-FROM eligible_cte e JOIN ineligible_cte i ON e.id = i.id
-ORDER BY e.id;
-                                                                                                                                                                                                                                 QUERY PLAN
+-- E2: Single row (may go through router planner)
+SELECT id, val FROM sorted_merge_test WHERE id = 42 ORDER BY id;
+ id |  val
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
-   Output: remote_scan.id, remote_scan.val, remote_scan.cnt
-   ->  Distributed Subplan XXX_1
-         Intermediate Data Size: 397 bytes
-         Result destination: Write locally
-         ->  Limit (actual rows=20 loops=1)
-               Output: remote_scan.id, remote_scan.val
-               ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
-                     Output: remote_scan.id, remote_scan.val
-                     Task Count: 4
-                     Tuple data received from nodes: 791 bytes
-                     Tasks Shown: One of 4
-                     ->  Task
-                           Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
-                           Tuple data received from node: 197 bytes
-                           Node: host=localhost port=xxxxx dbname=regression
-                           ->  Limit (actual rows=20 loops=1)
-                                 Output: id, val
-                                 ->  Sort (actual rows=20 loops=1)
-                                       Output: id, val
-                                       Sort Key: sorted_merge_test.id
-                                       Sort Method: quicksort  Memory: 26kB
-                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                             Output: id, val
-   ->  Distributed Subplan XXX_2
-         Intermediate Data Size: 330 bytes
-         Result destination: Write locally
-         ->  Limit (actual rows=15 loops=1)
-               Output: remote_scan.id, remote_scan.cnt
-               ->  Sort (actual rows=15 loops=1)
-                     Output: remote_scan.id, remote_scan.cnt
-                     Sort Key: remote_scan.cnt DESC, remote_scan.id
-                     ->  Custom Scan (Citus Adaptive) (never executed)
-                           Output: remote_scan.id, remote_scan.cnt
-                           Task Count: 4
-                           Tuple data received from nodes: 720 bytes
-                           Tasks Shown: One of 4
-                           ->  Task
-                                 Query: SELECT id, count(*) AS cnt FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY (count(*)) DESC, id LIMIT '15'::bigint
-                                 Tuple data received from node: 180 bytes
-                                 Node: host=localhost port=xxxxx dbname=regression
-                                 ->  Limit (actual rows=15 loops=1)
-                                       Output: id, (count(*))
-                                       ->  Sort (actual rows=15 loops=1)
-                                             Output: id, (count(*))
-                                             Sort Key: (count(*)) DESC, sorted_merge_test.id
-                                             Sort Method: quicksort  Memory: 26kB
-                                             ->  HashAggregate (actual rows=26 loops=1)
-                                                   Output: id, count(*)
-                                                   Group Key: sorted_merge_test.id
-                                                   ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                                         Output: id, val, num, ts
-   Task Count: 1
-   Tuple data received from nodes: 87 bytes
-   Tasks Shown: All
-   ->  Task
-         Query: SELECT e.id, e.val, i.cnt FROM ((SELECT intermediate_result.id, intermediate_result.val FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text)) e JOIN (SELECT intermediate_result.id, intermediate_result.cnt FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer, cnt bigint)) i ON ((e.id OPERATOR(pg_catalog.=) i.id))) ORDER BY e.id
-         Tuple data received from node: 87 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Merge Join (actual rows=5 loops=1)
-               Output: intermediate_result.id, intermediate_result.val, intermediate_result_1.cnt
-               Merge Cond: (intermediate_result.id = intermediate_result_1.id)
-               ->  Sort (actual rows=6 loops=1)
-                     Output: intermediate_result.id, intermediate_result.val
-                     Sort Key: intermediate_result.id
-                     Sort Method: quicksort  Memory: 25kB
-                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=20 loops=1)
-                           Output: intermediate_result.id, intermediate_result.val
-                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
-               ->  Sort (actual rows=15 loops=1)
-                     Output: intermediate_result_1.cnt, intermediate_result_1.id
-                     Sort Key: intermediate_result_1.id
-                     Sort Method: quicksort  Memory: 25kB
-                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 (actual rows=15 loops=1)
-                           Output: intermediate_result_1.cnt, intermediate_result_1.id
-                           Function Call: read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format)
-(77 rows)
+ 42 | val_42
+(1 row)
 
--- H3 EXPLAIN
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-WITH top_ids AS (
-    SELECT id FROM sorted_merge_test ORDER BY id LIMIT 20
-)
-SELECT t.id, t.val
-FROM sorted_merge_test t
-JOIN top_ids ON t.id = top_ids.id
-ORDER BY t.id
-LIMIT 10;
-                                                                                                                                                                                                                     QUERY PLAN
+-- E3: All rows with same sort value
+SELECT id, num FROM sorted_merge_test WHERE num = 10.5 ORDER BY num, id;
+ id  | num
 ---------------------------------------------------------------------
- Limit (actual rows=10 loops=1)
-   Output: remote_scan.id, remote_scan.val
-   ->  Custom Scan (Citus Adaptive) (actual rows=10 loops=1)
-         Output: remote_scan.id, remote_scan.val
-         ->  Distributed Subplan XXX_1
-               Intermediate Data Size: 200 bytes
-               Result destination: Send to 2 nodes
-               ->  Limit (actual rows=20 loops=1)
-                     Output: remote_scan.id
-                     ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
-                           Output: remote_scan.id
-                           Task Count: 4
-                           Tuple data received from nodes: 320 bytes
-                           Tasks Shown: One of 4
-                           ->  Task
-                                 Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
-                                 Tuple data received from node: 80 bytes
-                                 Node: host=localhost port=xxxxx dbname=regression
-                                 ->  Limit (actual rows=20 loops=1)
-                                       Output: id
-                                       ->  Sort (actual rows=20 loops=1)
-                                             Output: id
-                                             Sort Key: sorted_merge_test.id
-                                             Sort Method: quicksort  Memory: 25kB
-                                             ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                                   Output: id
-         Task Count: 4
-         Tuple data received from nodes: 97 bytes
-         Tasks Shown: One of 4
-         ->  Task
-               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT t.id AS worker_column_1, t.val AS worker_column_2 FROM (public.sorted_merge_test_960000 t JOIN (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) top_ids ON ((t.id OPERATOR(pg_catalog.=) top_ids.id)))) worker_subquery ORDER BY worker_column_1 LIMIT '10'::bigint
-               Tuple data received from node: 97 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Limit (actual rows=10 loops=1)
-                     Output: t.id, t.val
-                     ->  Merge Join (actual rows=10 loops=1)
-                           Output: t.id, t.val
-                           Merge Cond: (intermediate_result.id = t.id)
-                           ->  Sort (actual rows=10 loops=1)
-                                 Output: intermediate_result.id
-                                 Sort Key: intermediate_result.id
-                                 Sort Method: quicksort  Memory: 25kB
-                                 ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=20 loops=1)
-                                       Output: intermediate_result.id
-                                       Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
-                           ->  Sort (actual rows=10 loops=1)
-                                 Output: t.id, t.val
-                                 Sort Key: t.id
-                                 Sort Method: quicksort  Memory: 25kB
-                                 ->  Seq Scan on public.sorted_merge_test_960000 t (actual rows=26 loops=1)
-                                       Output: t.id, t.val
-(51 rows)
+   7 | 10.5
+ 200 | 10.5
+ 201 | 10.5
+ 202 | 10.5
+(4 rows)
 
--- H4 EXPLAIN
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT id, val FROM sorted_merge_test
-WHERE id IN (
-    SELECT id FROM sorted_merge_events ORDER BY id LIMIT 10
-)
-ORDER BY id
+-- E4: Wide sort key (4 columns)
+SELECT id, val, num FROM sorted_merge_test
+WHERE id <= 5
+ORDER BY num, val, id
 LIMIT 5;
-                                                                                                                                                                                                                                          QUERY PLAN
+ id |  val  | num
 ---------------------------------------------------------------------
- Limit (actual rows=3 loops=1)
-   Output: remote_scan.id, remote_scan.val
-   ->  Custom Scan (Citus Adaptive) (actual rows=3 loops=1)
-         Output: remote_scan.id, remote_scan.val
-         ->  Distributed Subplan XXX_1
-               Intermediate Data Size: 100 bytes
-               Result destination: Send to 2 nodes
-               ->  Limit (actual rows=10 loops=1)
-                     Output: remote_scan.id
-                     ->  Custom Scan (Citus Adaptive) (actual rows=40 loops=1)
-                           Output: remote_scan.id
-                           Task Count: 4
-                           Tuple data received from nodes: 160 bytes
-                           Tasks Shown: One of 4
-                           ->  Task
-                                 Query: SELECT id FROM public.sorted_merge_events_960004 sorted_merge_events WHERE true ORDER BY id LIMIT '10'::bigint
-                                 Tuple data received from node: 40 bytes
-                                 Node: host=localhost port=xxxxx dbname=regression
-                                 ->  Limit (actual rows=10 loops=1)
-                                       Output: id
-                                       ->  Sort (actual rows=10 loops=1)
-                                             Output: id
-                                             Sort Key: sorted_merge_events.id
-                                             Sort Method: top-N heapsort  Memory: 25kB
-                                             ->  Seq Scan on public.sorted_merge_events_960004 sorted_merge_events (actual rows=56 loops=1)
-                                                   Output: id
-         Task Count: 4
-         Tuple data received from nodes: 27 bytes
-         Tasks Shown: One of 4
-         ->  Task
-               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT sorted_merge_test.id AS worker_column_1, sorted_merge_test.val AS worker_column_2 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE (sorted_merge_test.id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)))) worker_subquery ORDER BY worker_column_1 LIMIT '5'::bigint
-               Tuple data received from node: 27 bytes
-               Node: host=localhost port=xxxxx dbname=regression
-               ->  Limit (actual rows=3 loops=1)
-                     Output: sorted_merge_test.id, sorted_merge_test.val
-                     ->  Sort (actual rows=3 loops=1)
-                           Output: sorted_merge_test.id, sorted_merge_test.val
-                           Sort Key: sorted_merge_test.id
-                           Sort Method: quicksort  Memory: 25kB
-                           ->  Hash Semi Join (actual rows=3 loops=1)
-                                 Output: sorted_merge_test.id, sorted_merge_test.val
-                                 Hash Cond: (sorted_merge_test.id = intermediate_result.id)
-                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                       Output: sorted_merge_test.id, sorted_merge_test.val, sorted_merge_test.num, sorted_merge_test.ts
-                                 ->  Hash (actual rows=10 loops=1)
-                                       Output: intermediate_result.id
-                                       ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=10 loops=1)
-                                             Output: intermediate_result.id
-                                             Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
-(50 rows)
+  1 | val_1 | 1.5
+  2 | val_2 | 3.0
+  3 | val_3 | 4.5
+  4 | val_4 | 6.0
+  5 | val_5 | 7.5
+(5 rows)
 
--- H5 EXPLAIN
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-WITH small_cte AS (
-    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
-)
-SELECT * FROM small_cte ORDER BY id LIMIT 5;
-                                                                                                                  QUERY PLAN
+-- E5: Zero-task defensive path
+-- CreatePerTaskDispatchDest handles taskCount=0 gracefully (returns a no-op
+-- destination). This cannot be triggered via normal SQL because distributed
+-- tables always have at least one shard. The closest we can test is an
+-- empty-result query through the sorted merge path to verify no crash.
+SELECT id FROM sorted_merge_test WHERE false ORDER BY id;
+ id
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
-   Output: remote_scan.id, remote_scan.val
-   ->  Distributed Subplan XXX_1
-         Intermediate Data Size: 397 bytes
-         Result destination: Write locally
-         ->  Limit (actual rows=20 loops=1)
-               Output: remote_scan.id, remote_scan.val
-               ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
-                     Output: remote_scan.id, remote_scan.val
-                     Task Count: 4
-                     Tuple data received from nodes: 791 bytes
-                     Tasks Shown: One of 4
-                     ->  Task
-                           Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
-                           Tuple data received from node: 197 bytes
-                           Node: host=localhost port=xxxxx dbname=regression
-                           ->  Limit (actual rows=20 loops=1)
-                                 Output: id, val
-                                 ->  Sort (actual rows=20 loops=1)
-                                       Output: id, val
-                                       Sort Key: sorted_merge_test.id
-                                       Sort Method: quicksort  Memory: 26kB
-                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                             Output: id, val
-   Task Count: 1
-   Tuple data received from nodes: 47 bytes
-   Tasks Shown: All
-   ->  Task
-         Query: SELECT id, val FROM (SELECT intermediate_result.id, intermediate_result.val FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text)) small_cte ORDER BY id LIMIT 5
-         Tuple data received from node: 47 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Limit (actual rows=5 loops=1)
-               Output: intermediate_result.id, intermediate_result.val
-               ->  Sort (actual rows=5 loops=1)
-                     Output: intermediate_result.id, intermediate_result.val
-                     Sort Key: intermediate_result.id
-                     Sort Method: top-N heapsort  Memory: 25kB
-                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=20 loops=1)
-                           Output: intermediate_result.id, intermediate_result.val
-                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
-(40 rows)
+(0 rows)
 
--- H6 EXPLAIN
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-SELECT foo.id, bar.id as bar_id
-FROM
-    (SELECT id FROM sorted_merge_test ORDER BY id LIMIT 3) as foo,
-    (SELECT id FROM sorted_merge_events ORDER BY id LIMIT 3) as bar
-ORDER BY foo.id, bar.id
-LIMIT 5;
-                                                                                                                                                                                    QUERY PLAN
+-- =================================================================
+-- Category F: Existing LIMIT pushdown stability
+-- =================================================================
+-- F1: Simple LIMIT + ORDER BY: plan unchanged between GUC off and on
+SET citus.enable_sorted_merge TO off;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5');
+                                                            explain_filter
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
-   Output: remote_scan.id, remote_scan.bar_id
-   ->  Distributed Subplan XXX_1
-         Intermediate Data Size: 30 bytes
-         Result destination: Write locally
-         ->  Limit (actual rows=3 loops=1)
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id
+   ->  Sort (actual rows=N loops=N)
+         Output: remote_scan.id
+         Sort Key: remote_scan.id
+         Sort Method: top-N heapsort  Memory: NkB
+         ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
                Output: remote_scan.id
-               ->  Custom Scan (Citus Adaptive) (actual rows=12 loops=1)
-                     Output: remote_scan.id
-                     Task Count: 4
-                     Tuple data received from nodes: 48 bytes
-                     Tasks Shown: One of 4
-                     ->  Task
-                           Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '3'::bigint
-                           Tuple data received from node: 12 bytes
-                           Node: host=localhost port=xxxxx dbname=regression
-                           ->  Limit (actual rows=3 loops=1)
+               Task Count: N
+               Tuple data received from nodes: N bytes
+               Tasks Shown: One of N
+               ->  Task
+                     Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+                     Tuple data received from node: N bytes
+                     Node: host=localhost port=N dbname=regression
+                     ->  Limit (actual rows=N loops=N)
+                           Output: id
+                           ->  Sort (actual rows=N loops=N)
+                                 Output: id
+                                 Sort Key: sorted_merge_test.id
+                                 Sort Method: top-N heapsort  Memory: NkB
+                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                       Output: id
+(23 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5');
+                                                         explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
+         ->  Task
+               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
+                     Output: id
+                     ->  Sort (actual rows=N loops=N)
+                           Output: id
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: top-N heapsort  Memory: NkB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                  Output: id
-                                 ->  Sort (actual rows=3 loops=1)
+(20 rows)
+
+-- F2: GROUP BY dist_col + ORDER BY + LIMIT
+SET citus.enable_sorted_merge TO off;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5');
+                                                                           explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.count
+   ->  Sort (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.count
+         Sort Key: remote_scan.id
+         Sort Method: top-N heapsort  Memory: NkB
+         ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+               Output: remote_scan.id, remote_scan.count
+               Task Count: N
+               Tuple data received from nodes: N bytes
+               Tasks Shown: One of N
+               ->  Task
+                     Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id LIMIT 'N'::bigint
+                     Tuple data received from node: N bytes
+                     Node: host=localhost port=N dbname=regression
+                     ->  Limit (actual rows=N loops=N)
+                           Output: id, (count(*))
+                           ->  GroupAggregate (actual rows=N loops=N)
+                                 Output: id, count(*)
+                                 Group Key: sorted_merge_test.id
+                                 ->  Sort (actual rows=N loops=N)
                                        Output: id
                                        Sort Key: sorted_merge_test.id
-                                       Sort Method: top-N heapsort  Memory: 25kB
-                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
+                                       Sort Method: quicksort  Memory: NkB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                              Output: id
-   ->  Distributed Subplan XXX_2
-         Intermediate Data Size: 30 bytes
-         Result destination: Write locally
-         ->  Limit (actual rows=3 loops=1)
-               Output: remote_scan.id
-               ->  Custom Scan (Citus Adaptive) (actual rows=12 loops=1)
-                     Output: remote_scan.id
-                     Task Count: 4
-                     Tuple data received from nodes: 48 bytes
-                     Tasks Shown: One of 4
-                     ->  Task
-                           Query: SELECT id FROM public.sorted_merge_events_960004 sorted_merge_events WHERE true ORDER BY id LIMIT '3'::bigint
-                           Tuple data received from node: 12 bytes
-                           Node: host=localhost port=xxxxx dbname=regression
-                           ->  Limit (actual rows=3 loops=1)
+(26 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5');
+                                                                        explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.count
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.count
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
+         ->  Task
+               Query: SELECT id, count(*) AS count FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY id LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
+                     Output: id, (count(*))
+                     ->  GroupAggregate (actual rows=N loops=N)
+                           Output: id, count(*)
+                           Group Key: sorted_merge_test.id
+                           ->  Sort (actual rows=N loops=N)
                                  Output: id
-                                 ->  Sort (actual rows=3 loops=1)
+                                 Sort Key: sorted_merge_test.id
+                                 Sort Method: quicksort  Memory: NkB
+                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
                                        Output: id
-                                       Sort Key: sorted_merge_events.id
-                                       Sort Method: top-N heapsort  Memory: 25kB
-                                       ->  Seq Scan on public.sorted_merge_events_960004 sorted_merge_events (actual rows=56 loops=1)
-                                             Output: id
-   Task Count: 1
-   Tuple data received from nodes: 40 bytes
-   Tasks Shown: All
-   ->  Task
-         Query: SELECT foo.id, bar.id AS bar_id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) foo, (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) bar ORDER BY foo.id, bar.id LIMIT 5
-         Tuple data received from node: 40 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Limit (actual rows=5 loops=1)
-               Output: intermediate_result.id, intermediate_result_1.id
-               ->  Sort (actual rows=5 loops=1)
-                     Output: intermediate_result.id, intermediate_result_1.id
-                     Sort Key: intermediate_result.id, intermediate_result_1.id
-                     Sort Method: quicksort  Memory: 25kB
-                     ->  Nested Loop (actual rows=9 loops=1)
-                           Output: intermediate_result.id, intermediate_result_1.id
-                           ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=3 loops=1)
-                                 Output: intermediate_result.id
-                                 Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
-                           ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 (actual rows=3 loops=3)
-                                 Output: intermediate_result_1.id
-                                 Function Call: read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format)
-(67 rows)
+(23 rows)
 
--- H7 EXPLAIN — GUC off vs on
+-- F3: ORDER BY aggregate + LIMIT (not eligible for merge)
 SET citus.enable_sorted_merge TO off;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-WITH cte AS (
-    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
-)
-SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
-                                                                                                                                                             QUERY PLAN
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 5;
+ id | count
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
-   Output: remote_scan.id, remote_scan.val, remote_scan.num
-   ->  Distributed Subplan XXX_1
-         Intermediate Data Size: 691 bytes
-         Result destination: Write locally
-         ->  Limit (actual rows=20 loops=1)
-               Output: remote_scan.id, remote_scan.val, remote_scan.num
-               ->  Sort (actual rows=20 loops=1)
-                     Output: remote_scan.id, remote_scan.val, remote_scan.num
-                     Sort Key: remote_scan.id
-                     ->  Custom Scan (Citus Adaptive) (never executed)
-                           Output: remote_scan.id, remote_scan.val, remote_scan.num
-                           Task Count: 4
-                           Tuple data received from nodes: 1673 bytes
-                           Tasks Shown: One of 4
-                           ->  Task
-                                 Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
-                                 Tuple data received from node: 419 bytes
-                                 Node: host=localhost port=xxxxx dbname=regression
-                                 ->  Limit (actual rows=20 loops=1)
-                                       Output: id, val, num
-                                       ->  Sort (actual rows=20 loops=1)
-                                             Output: id, val, num
-                                             Sort Key: sorted_merge_test.id
-                                             Sort Method: quicksort  Memory: 26kB
-                                             ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                                   Output: id, val, num
-   Task Count: 1
-   Tuple data received from nodes: 103 bytes
-   Tasks Shown: All
-   ->  Task
-         Query: SELECT id, val, num FROM (SELECT intermediate_result.id, intermediate_result.val, intermediate_result.num FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text, num numeric)) cte WHERE (num OPERATOR(pg_catalog.>) (10)::numeric) ORDER BY id LIMIT 5
-         Tuple data received from node: 103 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Limit (actual rows=5 loops=1)
-               Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
-               ->  Sort (actual rows=5 loops=1)
-                     Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
-                     Sort Key: intermediate_result.id
-                     Sort Method: top-N heapsort  Memory: 25kB
-                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=14 loops=1)
-                           Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
-                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
-                           Filter: (intermediate_result.num > '10'::numeric)
-                           Rows Removed by Filter: 6
-(45 rows)
+  1 |     1
+  2 |     1
+  3 |     1
+  4 |     1
+  5 |     1
+(5 rows)
 
 SET citus.enable_sorted_merge TO on;
-EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF)
-WITH cte AS (
-    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
-)
-SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
-                                                                                                                                                             QUERY PLAN
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 5;
+ id | count
 ---------------------------------------------------------------------
- Custom Scan (Citus Adaptive) (actual rows=5 loops=1)
-   Output: remote_scan.id, remote_scan.val, remote_scan.num
-   ->  Distributed Subplan XXX_1
-         Intermediate Data Size: 699 bytes
-         Result destination: Write locally
-         ->  Limit (actual rows=20 loops=1)
-               Output: remote_scan.id, remote_scan.val, remote_scan.num
-               ->  Custom Scan (Citus Adaptive) (actual rows=80 loops=1)
-                     Output: remote_scan.id, remote_scan.val, remote_scan.num
-                     Task Count: 4
-                     Tuple data received from nodes: 1673 bytes
-                     Tasks Shown: One of 4
-                     ->  Task
-                           Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT '20'::bigint
-                           Tuple data received from node: 419 bytes
-                           Node: host=localhost port=xxxxx dbname=regression
-                           ->  Limit (actual rows=20 loops=1)
-                                 Output: id, val, num
-                                 ->  Sort (actual rows=20 loops=1)
-                                       Output: id, val, num
-                                       Sort Key: sorted_merge_test.id
-                                       Sort Method: quicksort  Memory: 26kB
-                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=26 loops=1)
-                                             Output: id, val, num
-   Task Count: 1
-   Tuple data received from nodes: 101 bytes
-   Tasks Shown: All
+  1 |     1
+  2 |     1
+  3 |     1
+  4 |     1
+  5 |     1
+(5 rows)
+
+-- =================================================================
+-- Category G: Phase 4 — Sort elision and advanced scenarios
+-- =================================================================
+-- G1: Sort elision verification — coordinator Sort node absent
+SET citus.enable_sorted_merge TO off;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY id');
+                                              explain_filter
+---------------------------------------------------------------------
+ Sort (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val
+   Sort Key: remote_scan.id
+   Sort Method: quicksort  Memory: NkB
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.val
+         Task Count: N
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
+         ->  Task
+               Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val
+(15 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY id');
+                                               explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
    ->  Task
-         Query: SELECT id, val, num FROM (SELECT intermediate_result.id, intermediate_result.val, intermediate_result.num FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text, num numeric)) cte WHERE (num OPERATOR(pg_catalog.>) (10)::numeric) ORDER BY id LIMIT 5
-         Tuple data received from node: 101 bytes
-         Node: host=localhost port=xxxxx dbname=regression
-         ->  Limit (actual rows=5 loops=1)
-               Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
-               ->  Sort (actual rows=5 loops=1)
-                     Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
-                     Sort Key: intermediate_result.id
-                     Sort Method: top-N heapsort  Memory: 25kB
-                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=18 loops=1)
-                           Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
-                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
-                           Filter: (intermediate_result.num > '10'::numeric)
-                           Rows Removed by Filter: 2
-(42 rows)
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val
+               Sort Key: sorted_merge_test.id
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val
+(16 rows)
 
+-- G2a: PREPARE with merge ON, EXECUTE after turning OFF
+-- Plan-time decision is baked in — cached plan must still merge correctly.
+-- Execute 6+ times to trigger PostgreSQL's generic plan caching, then
+-- verify the plan shape is preserved after toggling the GUC.
+SET citus.enable_sorted_merge TO on;
+PREPARE merge_on_stmt AS SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+-- Verify plan shape after caching — no Sort above CustomScan
+EXPLAIN (COSTS OFF) EXECUTE merge_on_stmt;
+                                      QUERY PLAN
+---------------------------------------------------------------------
+ Limit
+   ->  Custom Scan (Citus Adaptive)
+         Task Count: 4
+         Merge Method: sorted merge
+         Tasks Shown: One of 4
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit
+                     ->  Sort
+                           Sort Key: id
+                           ->  Seq Scan on sorted_merge_test_960000 sorted_merge_test
+(11 rows)
+
+SET citus.enable_sorted_merge TO off;
+-- Cached plan retains the sorted merge decision from planning time
+EXECUTE merge_on_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXPLAIN (COSTS OFF) EXECUTE merge_on_stmt;
+                                      QUERY PLAN
+---------------------------------------------------------------------
+ Limit
+   ->  Custom Scan (Citus Adaptive)
+         Task Count: 4
+         Merge Method: sorted merge
+         Tasks Shown: One of 4
+         ->  Task
+               Node: host=localhost port=xxxxx dbname=regression
+               ->  Limit
+                     ->  Sort
+                           Sort Key: id
+                           ->  Seq Scan on sorted_merge_test_960000 sorted_merge_test
+(11 rows)
+
+DEALLOCATE merge_on_stmt;
+-- G2b: PREPARE with merge OFF, EXECUTE after turning ON
+-- Cached plan has Sort node — must still return sorted results.
+SET citus.enable_sorted_merge TO off;
+PREPARE merge_off_stmt AS SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 10;
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+-- Verify plan shape after caching — Sort above CustomScan
+EXPLAIN (COSTS OFF) EXECUTE merge_off_stmt;
+                                         QUERY PLAN
+---------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: remote_scan.id
+         ->  Custom Scan (Citus Adaptive)
+               Task Count: 4
+               Tasks Shown: One of 4
+               ->  Task
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  Limit
+                           ->  Sort
+                                 Sort Key: id
+                                 ->  Seq Scan on sorted_merge_test_960000 sorted_merge_test
+(12 rows)
+
+SET citus.enable_sorted_merge TO on;
+-- Cached plan retains the non-merge decision from planning time
+EXECUTE merge_off_stmt;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+EXPLAIN (COSTS OFF) EXECUTE merge_off_stmt;
+                                         QUERY PLAN
+---------------------------------------------------------------------
+ Limit
+   ->  Sort
+         Sort Key: remote_scan.id
+         ->  Custom Scan (Citus Adaptive)
+               Task Count: 4
+               Tasks Shown: One of 4
+               ->  Task
+                     Node: host=localhost port=xxxxx dbname=regression
+                     ->  Limit
+                           ->  Sort
+                                 Sort Key: id
+                                 ->  Seq Scan on sorted_merge_test_960000 sorted_merge_test
+(12 rows)
+
+DEALLOCATE merge_off_stmt;
+-- G3: Cursor with backward scan
+SET citus.enable_sorted_merge TO on;
+BEGIN;
+DECLARE sorted_cursor CURSOR FOR SELECT id FROM sorted_merge_test ORDER BY id;
+FETCH 3 FROM sorted_cursor;
+ id
+---------------------------------------------------------------------
+  1
+  2
+  3
+(3 rows)
+
+FETCH BACKWARD 1 FROM sorted_cursor;
+psql:sql/multi_orderby_pushdown.sql:323: ERROR:  cursor can only scan forward
+HINT:  Declare it with SCROLL option to enable backward scan.
+FETCH 2 FROM sorted_cursor;
+psql:sql/multi_orderby_pushdown.sql:324: ERROR:  current transaction is aborted, commands ignored until end of transaction block
+CLOSE sorted_cursor;
+psql:sql/multi_orderby_pushdown.sql:325: ERROR:  current transaction is aborted, commands ignored until end of transaction block
+COMMIT;
+-- G3b: SCROLL cursor with backward scan
+SET citus.enable_sorted_merge TO on;
+BEGIN;
+DECLARE sorted_scroll_cursor SCROLL CURSOR FOR SELECT id FROM sorted_merge_test ORDER BY id;
+FETCH 3 FROM sorted_scroll_cursor;
+ id
+---------------------------------------------------------------------
+  1
+  2
+  3
+(3 rows)
+
+FETCH BACKWARD 1 FROM sorted_scroll_cursor;
+ id
+---------------------------------------------------------------------
+  2
+(1 row)
+
+FETCH 2 FROM sorted_scroll_cursor;
+ id
+---------------------------------------------------------------------
+  3
+  4
+(2 rows)
+
+CLOSE sorted_scroll_cursor;
+COMMIT;
+-- G4: EXPLAIN ANALYZE (sorted merge skipped for EXPLAIN ANALYZE)
+SET citus.enable_sorted_merge TO on;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id FROM sorted_merge_test ORDER BY id LIMIT 5');
+                                                         explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
+         ->  Task
+               Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
+                     Output: id
+                     ->  Sort (actual rows=N loops=N)
+                           Output: id
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: top-N heapsort  Memory: NkB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                 Output: id
+(20 rows)
+
+-- G5: ORDER BY aggregate + LIMIT — crash regression test
+-- Previously caused SIGSEGV when sorted merge was enabled because
+-- aggregate ORDER BY was erroneously tagged as merge-eligible.
+SET citus.enable_sorted_merge TO on;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 3;
+ id | count
+---------------------------------------------------------------------
+  1 |     1
+  2 |     1
+  3 |     1
+(3 rows)
+
+-- G6: Small work_mem with many tasks (32 shards)
+SET citus.enable_sorted_merge TO on;
+SET work_mem TO '64kB';
+SELECT id FROM sorted_merge_test ORDER BY id LIMIT 10;
+ id
+---------------------------------------------------------------------
+  1
+  2
+  3
+  4
+  5
+  6
+  7
+  8
+  9
+ 10
+(10 rows)
+
+RESET work_mem;
+-- G7: max_intermediate_result_size with CTE subplan
+SET citus.enable_sorted_merge TO on;
+SET citus.max_intermediate_result_size TO '4kB';
+WITH cte AS (SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 50)
+SELECT * FROM cte ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+RESET citus.max_intermediate_result_size;
+-- =================================================================
+-- Category H: Subplan + Sorted Merge interactions
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- H1: CTE subplan with simple ORDER BY — eligible for sorted merge
+-- The CTE becomes a subplan; its DistributedPlan may have useSortedMerge=true
+WITH ordered_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id
+)
+SELECT * FROM ordered_cte ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- H2: Multiple CTEs — one eligible (ORDER BY col), one ineligible (ORDER BY agg)
+WITH eligible_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
+),
+ineligible_cte AS (
+    SELECT id, count(*) as cnt FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 15
+)
+SELECT e.id, e.val, i.cnt
+FROM eligible_cte e JOIN ineligible_cte i ON e.id = i.id
+ORDER BY e.id;
+ id |  val   | cnt
+---------------------------------------------------------------------
+  1 | val_1  |   1
+  2 | val_2  |   1
+  3 | val_3  |   1
+  4 | val_4  |   1
+  5 | val_5  |   1
+  6 | val_6  |   1
+  7 | val_7  |   1
+  8 | val_8  |   1
+  9 | val_9  |   1
+ 10 | val_10 |   1
+ 11 | val_11 |   1
+ 12 | val_12 |   1
+ 13 | val_13 |   1
+ 14 | val_14 |   1
+ 15 | val_15 |   1
+(15 rows)
+
+-- H3: CTE subplan feeding outer ORDER BY — both levels may merge independently
+WITH top_ids AS (
+    SELECT id FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT t.id, t.val
+FROM sorted_merge_test t
+JOIN top_ids ON t.id = top_ids.id
+ORDER BY t.id
+LIMIT 10;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(10 rows)
+
+-- H4: Subquery in WHERE with ORDER BY + LIMIT — becomes subplan with merge
+SELECT id, val FROM sorted_merge_test
+WHERE id IN (
+    SELECT id FROM sorted_merge_events ORDER BY id LIMIT 10
+)
+ORDER BY id
+LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+(3 rows)
+
+-- H5: CTE subplan with max_intermediate_result_size enforcement
+-- Tests that EnsureIntermediateSizeLimitNotExceeded works through per-task dispatch
+SET citus.max_intermediate_result_size TO '4kB';
+WITH small_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM small_cte ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+RESET citus.max_intermediate_result_size;
+-- H6: Cross-join subplan with non-aggregate ORDER BY (crash regression variant)
+-- Similar pattern to subquery_complex_target_list but without aggregate ORDER BY
+SELECT foo.id, bar.id as bar_id
+FROM
+    (SELECT id FROM sorted_merge_test ORDER BY id LIMIT 3) as foo,
+    (SELECT id FROM sorted_merge_events ORDER BY id LIMIT 3) as bar
+ORDER BY foo.id, bar.id
+LIMIT 5;
+ id | bar_id
+---------------------------------------------------------------------
+  1 |      1
+  1 |      1
+  1 |      1
+  2 |      1
+  2 |      1
+(5 rows)
+
+-- H7: CTE correctness comparison — GUC off vs on must produce identical results
+SET citus.enable_sorted_merge TO off;
+WITH cte AS (
+    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
+ id |  val   | num
+---------------------------------------------------------------------
+  7 | val_7  | 10.5
+  8 | val_8  | 12.0
+  9 | val_9  | 13.5
+ 10 | val_10 | 15.0
+ 11 | val_11 | 16.5
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+WITH cte AS (
+    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5;
+ id |  val   | num
+---------------------------------------------------------------------
+  7 | val_7  | 10.5
+  8 | val_8  | 12.0
+  9 | val_9  | 13.5
+ 10 | val_10 | 15.0
+ 11 | val_11 | 16.5
+(5 rows)
+
+-- =================================================================
+-- Category H EXPLAIN: Query plans for subplan + sorted merge
+-- =================================================================
+SET citus.enable_sorted_merge TO on;
+-- H1 EXPLAIN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) WITH ordered_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id
+)
+SELECT * FROM ordered_cte ORDER BY id LIMIT 5');
+                                                                                                                                                                                   explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.val
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
+         ->  Task
+               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT ordered_cte.id AS worker_column_1, ordered_cte.val AS worker_column_2 FROM (SELECT sorted_merge_test.id, sorted_merge_test.val FROM public.sorted_merge_test_960000 sorted_merge_test ORDER BY sorted_merge_test.id) ordered_cte) worker_subquery ORDER BY worker_column_1 LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
+                     Output: sorted_merge_test.id, sorted_merge_test.val
+                     ->  Sort (actual rows=N loops=N)
+                           Output: sorted_merge_test.id, sorted_merge_test.val
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: top-N heapsort  Memory: NkB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                 Output: sorted_merge_test.id, sorted_merge_test.val
+(20 rows)
+
+-- H2 EXPLAIN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) WITH eligible_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
+),
+ineligible_cte AS (
+    SELECT id, count(*) as cnt FROM sorted_merge_test GROUP BY id ORDER BY count(*) DESC, id LIMIT 15
+)
+SELECT e.id, e.val, i.cnt
+FROM eligible_cte e JOIN ineligible_cte i ON e.id = i.id
+ORDER BY e.id');
+                                                                                                                                                                                                                               explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.cnt
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: N bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=N loops=N)
+               Output: remote_scan.id, remote_scan.val
+               ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+                     Output: remote_scan.id, remote_scan.val
+                     Task Count: N
+                     Merge Method: sorted merge
+                     Tuple data received from nodes: N bytes
+                     Tasks Shown: One of N
+                     ->  Task
+                           Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+                           Tuple data received from node: N bytes
+                           Node: host=localhost port=N dbname=regression
+                           ->  Limit (actual rows=N loops=N)
+                                 Output: id, val
+                                 ->  Sort (actual rows=N loops=N)
+                                       Output: id, val
+                                       Sort Key: sorted_merge_test.id
+                                       Sort Method: quicksort  Memory: NkB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                             Output: id, val
+   ->  Distributed Subplan XXX_2
+         Intermediate Data Size: N bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=N loops=N)
+               Output: remote_scan.id, remote_scan.cnt
+               ->  Sort (actual rows=N loops=N)
+                     Output: remote_scan.id, remote_scan.cnt
+                     Sort Key: remote_scan.cnt DESC, remote_scan.id
+                     ->  Custom Scan (Citus Adaptive) (never executed)
+                           Output: remote_scan.id, remote_scan.cnt
+                           Task Count: N
+                           Tuple data received from nodes: N bytes
+                           Tasks Shown: One of N
+                           ->  Task
+                                 Query: SELECT id, count(*) AS cnt FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY (count(*)) DESC, id LIMIT 'N'::bigint
+                                 Tuple data received from node: N bytes
+                                 Node: host=localhost port=N dbname=regression
+                                 ->  Limit (actual rows=N loops=N)
+                                       Output: id, (count(*))
+                                       ->  Sort (actual rows=N loops=N)
+                                             Output: id, (count(*))
+                                             Sort Key: (count(*)) DESC, sorted_merge_test.id
+                                             Sort Method: quicksort  Memory: NkB
+                                             ->  HashAggregate (actual rows=N loops=N)
+                                                   Output: id, count(*)
+                                                   Group Key: sorted_merge_test.id
+                                                   ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                                         Output: id, val, num, ts
+   Task Count: N
+   Tuple data received from nodes: N bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT e.id, e.val, i.cnt FROM ((SELECT intermediate_result.id, intermediate_result.val FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text)) e JOIN (SELECT intermediate_result.id, intermediate_result.cnt FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer, cnt bigint)) i ON ((e.id OPERATOR(pg_catalog.=) i.id))) ORDER BY e.id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Merge Join (actual rows=N loops=N)
+               Output: intermediate_result.id, intermediate_result.val, intermediate_result_1.cnt
+               Merge Cond: (intermediate_result.id = intermediate_result_1.id)
+               ->  Sort (actual rows=N loops=N)
+                     Output: intermediate_result.id, intermediate_result.val
+                     Sort Key: intermediate_result.id
+                     Sort Method: quicksort  Memory: NkB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=N loops=N)
+                           Output: intermediate_result.id, intermediate_result.val
+                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+               ->  Sort (actual rows=N loops=N)
+                     Output: intermediate_result_1.cnt, intermediate_result_1.id
+                     Sort Key: intermediate_result_1.id
+                     Sort Method: quicksort  Memory: NkB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 (actual rows=N loops=N)
+                           Output: intermediate_result_1.cnt, intermediate_result_1.id
+                           Function Call: read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format)
+(78 rows)
+
+-- H3 EXPLAIN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) WITH top_ids AS (
+    SELECT id FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT t.id, t.val
+FROM sorted_merge_test t
+JOIN top_ids ON t.id = top_ids.id
+ORDER BY t.id
+LIMIT 10');
+                                                                                                                                                                                                                   explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.val
+         ->  Distributed Subplan XXX_1
+               Intermediate Data Size: N bytes
+               Result destination: Send to N nodes
+               ->  Limit (actual rows=N loops=N)
+                     Output: remote_scan.id
+                     ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+                           Output: remote_scan.id
+                           Task Count: N
+                           Merge Method: sorted merge
+                           Tuple data received from nodes: N bytes
+                           Tasks Shown: One of N
+                           ->  Task
+                                 Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+                                 Tuple data received from node: N bytes
+                                 Node: host=localhost port=N dbname=regression
+                                 ->  Limit (actual rows=N loops=N)
+                                       Output: id
+                                       ->  Sort (actual rows=N loops=N)
+                                             Output: id
+                                             Sort Key: sorted_merge_test.id
+                                             Sort Method: quicksort  Memory: NkB
+                                             ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                                   Output: id
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
+         ->  Task
+               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT t.id AS worker_column_1, t.val AS worker_column_2 FROM (public.sorted_merge_test_960000 t JOIN (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) top_ids ON ((t.id OPERATOR(pg_catalog.=) top_ids.id)))) worker_subquery ORDER BY worker_column_1 LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
+                     Output: t.id, t.val
+                     ->  Merge Join (actual rows=N loops=N)
+                           Output: t.id, t.val
+                           Merge Cond: (t.id = intermediate_result.id)
+                           ->  Sort (actual rows=N loops=N)
+                                 Output: t.id, t.val
+                                 Sort Key: t.id
+                                 Sort Method: quicksort  Memory: NkB
+                                 ->  Seq Scan on public.sorted_merge_test_960000 t (actual rows=N loops=N)
+                                       Output: t.id, t.val
+                           ->  Sort (actual rows=N loops=N)
+                                 Output: intermediate_result.id
+                                 Sort Key: intermediate_result.id
+                                 Sort Method: quicksort  Memory: NkB
+                                 ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=N loops=N)
+                                       Output: intermediate_result.id
+                                       Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+(53 rows)
+
+-- H4 EXPLAIN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test
+WHERE id IN (
+    SELECT id FROM sorted_merge_events ORDER BY id LIMIT 10
+)
+ORDER BY id
+LIMIT 5');
+                                                                                                                                                                                                                                        explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.val
+         ->  Distributed Subplan XXX_1
+               Intermediate Data Size: N bytes
+               Result destination: Send to N nodes
+               ->  Limit (actual rows=N loops=N)
+                     Output: remote_scan.id
+                     ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+                           Output: remote_scan.id
+                           Task Count: N
+                           Merge Method: sorted merge
+                           Tuple data received from nodes: N bytes
+                           Tasks Shown: One of N
+                           ->  Task
+                                 Query: SELECT id FROM public.sorted_merge_events_960004 sorted_merge_events WHERE true ORDER BY id LIMIT 'N'::bigint
+                                 Tuple data received from node: N bytes
+                                 Node: host=localhost port=N dbname=regression
+                                 ->  Limit (actual rows=N loops=N)
+                                       Output: id
+                                       ->  Sort (actual rows=N loops=N)
+                                             Output: id
+                                             Sort Key: sorted_merge_events.id
+                                             Sort Method: top-N heapsort  Memory: NkB
+                                             ->  Seq Scan on public.sorted_merge_events_960004 sorted_merge_events (actual rows=N loops=N)
+                                                   Output: id
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
+         ->  Task
+               Query: SELECT worker_column_1 AS id, worker_column_2 AS val FROM (SELECT sorted_merge_test.id AS worker_column_1, sorted_merge_test.val AS worker_column_2 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE (sorted_merge_test.id OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)))) worker_subquery ORDER BY worker_column_1 LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
+                     Output: sorted_merge_test.id, sorted_merge_test.val
+                     ->  Sort (actual rows=N loops=N)
+                           Output: sorted_merge_test.id, sorted_merge_test.val
+                           Sort Key: sorted_merge_test.id
+                           Sort Method: quicksort  Memory: NkB
+                           ->  Hash Semi Join (actual rows=N loops=N)
+                                 Output: sorted_merge_test.id, sorted_merge_test.val
+                                 Hash Cond: (sorted_merge_test.id = intermediate_result.id)
+                                 ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                       Output: sorted_merge_test.id, sorted_merge_test.val, sorted_merge_test.num, sorted_merge_test.ts
+                                 ->  Hash (actual rows=N loops=N)
+                                       Output: intermediate_result.id
+                                       ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=N loops=N)
+                                             Output: intermediate_result.id
+                                             Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+(52 rows)
+
+-- H5 EXPLAIN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) WITH small_cte AS (
+    SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM small_cte ORDER BY id LIMIT 5');
+                                                                                                                explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: N bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=N loops=N)
+               Output: remote_scan.id, remote_scan.val
+               ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+                     Output: remote_scan.id, remote_scan.val
+                     Task Count: N
+                     Merge Method: sorted merge
+                     Tuple data received from nodes: N bytes
+                     Tasks Shown: One of N
+                     ->  Task
+                           Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+                           Tuple data received from node: N bytes
+                           Node: host=localhost port=N dbname=regression
+                           ->  Limit (actual rows=N loops=N)
+                                 Output: id, val
+                                 ->  Sort (actual rows=N loops=N)
+                                       Output: id, val
+                                       Sort Key: sorted_merge_test.id
+                                       Sort Method: quicksort  Memory: NkB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                             Output: id, val
+   Task Count: N
+   Tuple data received from nodes: N bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT id, val FROM (SELECT intermediate_result.id, intermediate_result.val FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text)) small_cte ORDER BY id LIMIT N
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Limit (actual rows=N loops=N)
+               Output: intermediate_result.id, intermediate_result.val
+               ->  Sort (actual rows=N loops=N)
+                     Output: intermediate_result.id, intermediate_result.val
+                     Sort Key: intermediate_result.id
+                     Sort Method: top-N heapsort  Memory: NkB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=N loops=N)
+                           Output: intermediate_result.id, intermediate_result.val
+                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+(41 rows)
+
+-- H6 EXPLAIN
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT foo.id, bar.id as bar_id
+FROM
+    (SELECT id FROM sorted_merge_test ORDER BY id LIMIT 3) as foo,
+    (SELECT id FROM sorted_merge_events ORDER BY id LIMIT 3) as bar
+ORDER BY foo.id, bar.id
+LIMIT 5');
+                                                                                                                                                                                  explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.bar_id
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: N bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=N loops=N)
+               Output: remote_scan.id
+               ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+                     Output: remote_scan.id
+                     Task Count: N
+                     Merge Method: sorted merge
+                     Tuple data received from nodes: N bytes
+                     Tasks Shown: One of N
+                     ->  Task
+                           Query: SELECT id FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+                           Tuple data received from node: N bytes
+                           Node: host=localhost port=N dbname=regression
+                           ->  Limit (actual rows=N loops=N)
+                                 Output: id
+                                 ->  Sort (actual rows=N loops=N)
+                                       Output: id
+                                       Sort Key: sorted_merge_test.id
+                                       Sort Method: top-N heapsort  Memory: NkB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                             Output: id
+   ->  Distributed Subplan XXX_2
+         Intermediate Data Size: N bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=N loops=N)
+               Output: remote_scan.id
+               ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+                     Output: remote_scan.id
+                     Task Count: N
+                     Merge Method: sorted merge
+                     Tuple data received from nodes: N bytes
+                     Tasks Shown: One of N
+                     ->  Task
+                           Query: SELECT id FROM public.sorted_merge_events_960004 sorted_merge_events WHERE true ORDER BY id LIMIT 'N'::bigint
+                           Tuple data received from node: N bytes
+                           Node: host=localhost port=N dbname=regression
+                           ->  Limit (actual rows=N loops=N)
+                                 Output: id
+                                 ->  Sort (actual rows=N loops=N)
+                                       Output: id
+                                       Sort Key: sorted_merge_events.id
+                                       Sort Method: top-N heapsort  Memory: NkB
+                                       ->  Seq Scan on public.sorted_merge_events_960004 sorted_merge_events (actual rows=N loops=N)
+                                             Output: id
+   Task Count: N
+   Tuple data received from nodes: N bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT foo.id, bar.id AS bar_id FROM (SELECT intermediate_result.id FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) foo, (SELECT intermediate_result.id FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(id integer)) bar ORDER BY foo.id, bar.id LIMIT N
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Limit (actual rows=N loops=N)
+               Output: intermediate_result.id, intermediate_result_1.id
+               ->  Sort (actual rows=N loops=N)
+                     Output: intermediate_result.id, intermediate_result_1.id
+                     Sort Key: intermediate_result.id, intermediate_result_1.id
+                     Sort Method: quicksort  Memory: NkB
+                     ->  Nested Loop (actual rows=N loops=N)
+                           Output: intermediate_result.id, intermediate_result_1.id
+                           ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=N loops=N)
+                                 Output: intermediate_result.id
+                                 Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+                           ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result_1 (actual rows=N loops=N)
+                                 Output: intermediate_result_1.id
+                                 Function Call: read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format)
+(69 rows)
+
+-- H7 EXPLAIN — GUC off vs on
+SET citus.enable_sorted_merge TO off;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) WITH cte AS (
+    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5');
+                                                                                                                                                           explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.num
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: N bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=N loops=N)
+               Output: remote_scan.id, remote_scan.val, remote_scan.num
+               ->  Sort (actual rows=N loops=N)
+                     Output: remote_scan.id, remote_scan.val, remote_scan.num
+                     Sort Key: remote_scan.id
+                     ->  Custom Scan (Citus Adaptive) (never executed)
+                           Output: remote_scan.id, remote_scan.val, remote_scan.num
+                           Task Count: N
+                           Tuple data received from nodes: N bytes
+                           Tasks Shown: One of N
+                           ->  Task
+                                 Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+                                 Tuple data received from node: N bytes
+                                 Node: host=localhost port=N dbname=regression
+                                 ->  Limit (actual rows=N loops=N)
+                                       Output: id, val, num
+                                       ->  Sort (actual rows=N loops=N)
+                                             Output: id, val, num
+                                             Sort Key: sorted_merge_test.id
+                                             Sort Method: quicksort  Memory: NkB
+                                             ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                                   Output: id, val, num
+   Task Count: N
+   Tuple data received from nodes: N bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT id, val, num FROM (SELECT intermediate_result.id, intermediate_result.val, intermediate_result.num FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text, num numeric)) cte WHERE (num OPERATOR(pg_catalog.>) (N)::numeric) ORDER BY id LIMIT N
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Limit (actual rows=N loops=N)
+               Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+               ->  Sort (actual rows=N loops=N)
+                     Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+                     Sort Key: intermediate_result.id
+                     Sort Method: top-N heapsort  Memory: NkB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=N loops=N)
+                           Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+                           Filter: (intermediate_result.num > 'N'::numeric)
+                           Rows Removed by Filter: N
+(45 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) WITH cte AS (
+    SELECT id, val, num FROM sorted_merge_test ORDER BY id LIMIT 20
+)
+SELECT * FROM cte WHERE num > 10 ORDER BY id LIMIT 5');
+                                                                                                                                                           explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.num
+   ->  Distributed Subplan XXX_1
+         Intermediate Data Size: N bytes
+         Result destination: Write locally
+         ->  Limit (actual rows=N loops=N)
+               Output: remote_scan.id, remote_scan.val, remote_scan.num
+               ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+                     Output: remote_scan.id, remote_scan.val, remote_scan.num
+                     Task Count: N
+                     Merge Method: sorted merge
+                     Tuple data received from nodes: N bytes
+                     Tasks Shown: One of N
+                     ->  Task
+                           Query: SELECT id, val, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id LIMIT 'N'::bigint
+                           Tuple data received from node: N bytes
+                           Node: host=localhost port=N dbname=regression
+                           ->  Limit (actual rows=N loops=N)
+                                 Output: id, val, num
+                                 ->  Sort (actual rows=N loops=N)
+                                       Output: id, val, num
+                                       Sort Key: sorted_merge_test.id
+                                       Sort Method: quicksort  Memory: NkB
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                             Output: id, val, num
+   Task Count: N
+   Tuple data received from nodes: N bytes
+   Tasks Shown: All
+   ->  Task
+         Query: SELECT id, val, num FROM (SELECT intermediate_result.id, intermediate_result.val, intermediate_result.num FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(id integer, val text, num numeric)) cte WHERE (num OPERATOR(pg_catalog.>) (N)::numeric) ORDER BY id LIMIT N
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Limit (actual rows=N loops=N)
+               Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+               ->  Sort (actual rows=N loops=N)
+                     Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+                     Sort Key: intermediate_result.id
+                     Sort Method: top-N heapsort  Memory: NkB
+                     ->  Function Scan on pg_catalog.read_intermediate_result intermediate_result (actual rows=N loops=N)
+                           Output: intermediate_result.id, intermediate_result.val, intermediate_result.num
+                           Function Call: read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format)
+                           Filter: (intermediate_result.num > 'N'::numeric)
+                           Rows Removed by Filter: N
+(43 rows)
+
+-- =================================================================
+-- Category I: Distributed Transactions
+-- =================================================================
+-- Verify sorted merge correctness within multi-statement transactions
+-- where data is modified before the sorted-merge SELECT.
+SET citus.enable_sorted_merge TO on;
+-- I1: INSERT then SELECT within a transaction
+BEGIN;
+INSERT INTO sorted_merge_test (id, val, num) VALUES (900, 'txn_insert', 900.0);
+SELECT id, val FROM sorted_merge_test WHERE id >= 900 ORDER BY id;
+ id  |    val
+---------------------------------------------------------------------
+ 900 | txn_insert
+(1 row)
+
+ROLLBACK;
+-- I2: UPDATE then SELECT within a transaction
+BEGIN;
+UPDATE sorted_merge_test SET val = 'updated' WHERE id = 1;
+SELECT id, val FROM sorted_merge_test WHERE id <= 3 ORDER BY id;
+ id |   val
+---------------------------------------------------------------------
+  1 | updated
+  2 | val_2
+  3 | val_3
+(3 rows)
+
+ROLLBACK;
+-- I3: DELETE then SELECT within a transaction
+BEGIN;
+DELETE FROM sorted_merge_test WHERE id <= 5;
+SELECT id, val FROM sorted_merge_test WHERE id <= 10 ORDER BY id;
+ id |  val
+---------------------------------------------------------------------
+  6 | val_6
+  7 | val_7
+  8 | val_8
+  9 | val_9
+ 10 | val_10
+(5 rows)
+
+ROLLBACK;
+-- I4: INSERT + UPDATE + SELECT with multi-column ORDER BY
+BEGIN;
+INSERT INTO sorted_merge_test (id, val, num) VALUES (901, 'txn_a', 1.0);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (902, 'txn_b', 2.0);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (903, 'txn_c', 3.0);
+UPDATE sorted_merge_test SET num = 999.0 WHERE id = 901;
+SELECT id, val, num FROM sorted_merge_test WHERE id >= 900 ORDER BY num, id;
+ id  |  val  |  num
+---------------------------------------------------------------------
+ 902 | txn_b |   2.0
+ 903 | txn_c |   3.0
+ 901 | txn_a | 999.0
+(3 rows)
+
+ROLLBACK;
+-- I5: Compare results with GUC off vs on in a transaction
+BEGIN;
+INSERT INTO sorted_merge_test (id, val, num) VALUES (910, 'cmp_a', 10.0);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (911, 'cmp_b', 20.0);
+INSERT INTO sorted_merge_test (id, val, num) VALUES (912, 'cmp_c', 30.0);
+SET LOCAL citus.enable_sorted_merge TO off;
+SELECT id, val, num FROM sorted_merge_test WHERE id >= 910 ORDER BY id;
+ id  |  val  | num
+---------------------------------------------------------------------
+ 910 | cmp_a | 10.0
+ 911 | cmp_b | 20.0
+ 912 | cmp_c | 30.0
+(3 rows)
+
+SET LOCAL citus.enable_sorted_merge TO on;
+SELECT id, val, num FROM sorted_merge_test WHERE id >= 910 ORDER BY id;
+ id  |  val  | num
+---------------------------------------------------------------------
+ 910 | cmp_a | 10.0
+ 911 | cmp_b | 20.0
+ 912 | cmp_c | 30.0
+(3 rows)
+
+ROLLBACK;
+-- I6: DELETE + aggregate in SELECT with ORDER BY
+BEGIN;
+DELETE FROM sorted_merge_test WHERE id > 100 AND id < 200;
+SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id LIMIT 5;
+ id | count
+---------------------------------------------------------------------
+  1 |     1
+  2 |     1
+  3 |     1
+  4 |     1
+  5 |     1
+(5 rows)
+
+ROLLBACK;
+-- =================================================================
+-- Category J: Coordinator expression evaluation exclusion
+-- =================================================================
+-- Verify that queries with ORDER BY on expressions that need coordinator-side
+-- evaluation are correctly excluded from sorted merge (or handled correctly).
+SET citus.enable_sorted_merge TO on;
+-- J1: ORDER BY expression on aggregate result (ordinal reference)
+-- The ORDER BY references position 2 which is an aggregate — sorted merge
+-- must NOT be used because aggregates are rewritten between worker/coordinator.
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, sum(num) AS total FROM sorted_merge_test GROUP BY id ORDER BY 2 LIMIT 5');
+                                                                               explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.total
+   ->  Sort (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.total
+         Sort Key: remote_scan.total
+         Sort Method: top-N heapsort  Memory: NkB
+         ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+               Output: remote_scan.id, remote_scan.total
+               Task Count: N
+               Tuple data received from nodes: N bytes
+               Tasks Shown: One of N
+               ->  Task
+                     Query: SELECT id, sum(num) AS total FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY (sum(num)) LIMIT 'N'::bigint
+                     Tuple data received from node: N bytes
+                     Node: host=localhost port=N dbname=regression
+                     ->  Limit (actual rows=N loops=N)
+                           Output: id, (sum(num))
+                           ->  Sort (actual rows=N loops=N)
+                                 Output: id, (sum(num))
+                                 Sort Key: (sum(sorted_merge_test.num))
+                                 Sort Method: top-N heapsort  Memory: NkB
+                                 ->  HashAggregate (actual rows=N loops=N)
+                                       Output: id, sum(num)
+                                       Group Key: sorted_merge_test.id
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                             Output: id, val, num, ts
+(27 rows)
+
+-- J2: ORDER BY expression wrapping an aggregate
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, sum(num) + 1 AS total_plus FROM sorted_merge_test GROUP BY id ORDER BY sum(num) + 1 LIMIT 5');
+                                                                                                                       explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.total_plus
+   ->  Sort (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.total_plus
+         Sort Key: remote_scan.total_plus
+         Sort Method: top-N heapsort  Memory: NkB
+         ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+               Output: remote_scan.id, remote_scan.total_plus
+               Task Count: N
+               Tuple data received from nodes: N bytes
+               Tasks Shown: One of N
+               ->  Task
+                     Query: SELECT id, (sum(num) OPERATOR(pg_catalog.+) 'N'::numeric) AS total_plus FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY (sum(num) OPERATOR(pg_catalog.+) 'N'::numeric) LIMIT 'N'::bigint
+                     Tuple data received from node: N bytes
+                     Node: host=localhost port=N dbname=regression
+                     ->  Limit (actual rows=N loops=N)
+                           Output: id, ((sum(num) + 'N'::numeric))
+                           ->  Sort (actual rows=N loops=N)
+                                 Output: id, ((sum(num) + 'N'::numeric))
+                                 Sort Key: ((sum(sorted_merge_test.num) + 'N'::numeric))
+                                 Sort Method: top-N heapsort  Memory: NkB
+                                 ->  HashAggregate (actual rows=N loops=N)
+                                       Output: id, (sum(num) + 'N'::numeric)
+                                       Group Key: sorted_merge_test.id
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                             Output: id, val, num, ts
+(27 rows)
+
+-- J3: ORDER BY a non-aggregate expression that can be pushed to workers
+-- This should be eligible for sorted merge — the expression is evaluated
+-- on the worker side and sort order is preserved.
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY id + 0');
+                                                                                      explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val, (id OPERATOR(pg_catalog.+) N) AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (id OPERATOR(pg_catalog.+) N)
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val, ((id + N))
+               Sort Key: ((sorted_merge_test.id + N))
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val, (id + N)
+(16 rows)
+
+-- J4: ORDER BY with CASE expression (no aggregates) — eligible
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY CASE WHEN id < 50 THEN 0 ELSE 1 END, id');
+                                                                                                                    explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val, CASE WHEN (id OPERATOR(pg_catalog.<) N) THEN N ELSE N END AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY CASE WHEN (id OPERATOR(pg_catalog.<) N) THEN N ELSE N END, id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val, (CASE WHEN (id < N) THEN N ELSE N END)
+               Sort Key: (CASE WHEN (sorted_merge_test.id < N) THEN N ELSE N END), sorted_merge_test.id
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val, CASE WHEN (id < N) THEN N ELSE N END
+(16 rows)
+
+-- J5: ORDER BY on an expression that mixes aggregate and non-aggregate
+-- Should be ineligible because the expression contains an aggregate.
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY id + count(*)');
+                                                                                      explain_filter
+---------------------------------------------------------------------
+ Sort (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.count, remote_scan.worker_column_3
+   Sort Key: remote_scan.worker_column_3
+   Sort Method: quicksort  Memory: NkB
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.count, remote_scan.worker_column_3
+         Task Count: N
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
+         ->  Task
+               Query: SELECT id, count(*) AS count, (id OPERATOR(pg_catalog.+) count(*)) AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  HashAggregate (actual rows=N loops=N)
+                     Output: id, count(*), (id + count(*))
+                     Group Key: sorted_merge_test.id
+                     ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                           Output: id, val, num, ts
+(19 rows)
+
+-- J6: Correctness comparison — expression ORDER BY, GUC off vs on
+SET citus.enable_sorted_merge TO off;
+SELECT id, val FROM sorted_merge_test ORDER BY id + 0 LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test ORDER BY id + 0 LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- -----------------------------------------------------------------
+-- J7–J12: Additional pushable expressions (no aggregates)
+-- -----------------------------------------------------------------
+SET citus.enable_sorted_merge TO on;
+-- J7: ORDER BY function call on column
+SELECT id, val FROM sorted_merge_test ORDER BY upper(val) LIMIT 5;
+ id  |  val
+---------------------------------------------------------------------
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+   1 | val_1
+  10 | val_10
+(5 rows)
+
+-- J8: ORDER BY COALESCE
+SELECT id, num FROM sorted_merge_test ORDER BY COALESCE(num, 0) LIMIT 5;
+ id  | num
+---------------------------------------------------------------------
+ 101 |
+ 102 |
+   1 | 1.5
+   2 | 3.0
+   3 | 4.5
+(5 rows)
+
+-- J9: ORDER BY negation
+SELECT id, num FROM sorted_merge_test ORDER BY -num LIMIT 5;
+ id  |  num
+---------------------------------------------------------------------
+ 100 | 150.0
+  99 | 148.5
+  98 | 147.0
+  97 | 145.5
+  96 | 144.0
+(5 rows)
+
+-- J10: ORDER BY concatenation
+SELECT id, val FROM sorted_merge_test ORDER BY val || '_suffix' LIMIT 5;
+ id  |   val
+---------------------------------------------------------------------
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+ 100 | val_100
+  10 | val_10
+(5 rows)
+
+-- J11: ORDER BY mathematical function (abs distance)
+SELECT id, num FROM sorted_merge_test ORDER BY abs(num - 25), id LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+ 17 | 25.5
+ 16 | 24.0
+ 18 | 27.0
+ 15 | 22.5
+ 19 | 28.5
+(5 rows)
+
+-- J12: ORDER BY expression not in SELECT list
+SELECT id FROM sorted_merge_test ORDER BY num + 1 LIMIT 5;
+ id
+---------------------------------------------------------------------
+  1
+  2
+  3
+  4
+  5
+(5 rows)
+
+-- J13: ORDER BY expression referencing multiple columns
+SELECT id, val FROM sorted_merge_test ORDER BY id * num LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+-- J14: ORDER BY with type cast
+SELECT id, num FROM sorted_merge_test ORDER BY num::int LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
+
+-- J15: ORDER BY with subexpression in SELECT and different expression in ORDER BY
+SELECT id, num + 1 as n1 FROM sorted_merge_test ORDER BY num + 2 LIMIT 5;
+ id | n1
+---------------------------------------------------------------------
+  1 | 2.5
+  2 | 4.0
+  3 | 5.5
+  4 | 7.0
+  5 | 8.5
+(5 rows)
+
+-- J16: ORDER BY column alias
+SELECT id, num * 2 as doubled FROM sorted_merge_test ORDER BY doubled LIMIT 5;
+ id | doubled
+---------------------------------------------------------------------
+  1 |     3.0
+  2 |     6.0
+  3 |     9.0
+  4 |    12.0
+  5 |    15.0
+(5 rows)
+
+-- -----------------------------------------------------------------
+-- J17–J21: Correctness — GUC off vs on for expression ORDER BY
+-- -----------------------------------------------------------------
+-- J17: function call
+SET citus.enable_sorted_merge TO off;
+SELECT id, val FROM sorted_merge_test ORDER BY upper(val) LIMIT 5;
+ id  |  val
+---------------------------------------------------------------------
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+   1 | val_1
+  10 | val_10
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test ORDER BY upper(val) LIMIT 5;
+ id  |  val
+---------------------------------------------------------------------
+ 200 | dup_a
+ 201 | dup_b
+ 202 | dup_c
+   1 | val_1
+  10 | val_10
+(5 rows)
+
+-- J18: CASE expression
+SET citus.enable_sorted_merge TO off;
+SELECT id, CASE WHEN num > 50 THEN 'high' ELSE 'low' END as cat
+FROM sorted_merge_test ORDER BY CASE WHEN num > 50 THEN 'high' ELSE 'low' END, id LIMIT 10;
+ id | cat
+---------------------------------------------------------------------
+ 34 | high
+ 35 | high
+ 36 | high
+ 37 | high
+ 38 | high
+ 39 | high
+ 40 | high
+ 41 | high
+ 42 | high
+ 43 | high
+(10 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, CASE WHEN num > 50 THEN 'high' ELSE 'low' END as cat
+FROM sorted_merge_test ORDER BY CASE WHEN num > 50 THEN 'high' ELSE 'low' END, id LIMIT 10;
+ id | cat
+---------------------------------------------------------------------
+ 34 | high
+ 35 | high
+ 36 | high
+ 37 | high
+ 38 | high
+ 39 | high
+ 40 | high
+ 41 | high
+ 42 | high
+ 43 | high
+(10 rows)
+
+-- J19: COALESCE
+SET citus.enable_sorted_merge TO off;
+SELECT id, num FROM sorted_merge_test ORDER BY COALESCE(num, 0), id LIMIT 5;
+ id  | num
+---------------------------------------------------------------------
+ 101 |
+ 102 |
+   1 | 1.5
+   2 | 3.0
+   3 | 4.5
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, num FROM sorted_merge_test ORDER BY COALESCE(num, 0), id LIMIT 5;
+ id  | num
+---------------------------------------------------------------------
+ 101 |
+ 102 |
+   1 | 1.5
+   2 | 3.0
+   3 | 4.5
+(5 rows)
+
+-- J20: abs() distance function
+SET citus.enable_sorted_merge TO off;
+SELECT id, num FROM sorted_merge_test ORDER BY abs(num - 25), id LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+ 17 | 25.5
+ 16 | 24.0
+ 18 | 27.0
+ 15 | 22.5
+ 19 | 28.5
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, num FROM sorted_merge_test ORDER BY abs(num - 25), id LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+ 17 | 25.5
+ 16 | 24.0
+ 18 | 27.0
+ 15 | 22.5
+ 19 | 28.5
+(5 rows)
+
+-- -----------------------------------------------------------------
+-- J21–J22: More ineligibility — aggregate inside expressions
+-- -----------------------------------------------------------------
+SET citus.enable_sorted_merge TO on;
+-- J21: ORDER BY CASE wrapping an aggregate
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, count(*) FROM sorted_merge_test GROUP BY id ORDER BY CASE WHEN count(*) > 1 THEN 0 ELSE 1 END, id LIMIT 5');
+                                                                                                                                                      explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.count, remote_scan.worker_column_3
+   ->  Sort (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.count, remote_scan.worker_column_3
+         Sort Key: remote_scan.worker_column_3, remote_scan.id
+         Sort Method: top-N heapsort  Memory: NkB
+         ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+               Output: remote_scan.id, remote_scan.count, remote_scan.worker_column_3
+               Task Count: N
+               Tuple data received from nodes: N bytes
+               Tasks Shown: One of N
+               ->  Task
+                     Query: SELECT id, count(*) AS count, CASE WHEN (count(*) OPERATOR(pg_catalog.>) N) THEN N ELSE N END AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true GROUP BY id ORDER BY CASE WHEN (count(*) OPERATOR(pg_catalog.>) N) THEN N ELSE N END, id LIMIT 'N'::bigint
+                     Tuple data received from node: N bytes
+                     Node: host=localhost port=N dbname=regression
+                     ->  Limit (actual rows=N loops=N)
+                           Output: id, (count(*)), (CASE WHEN (count(*) > N) THEN N ELSE N END)
+                           ->  Sort (actual rows=N loops=N)
+                                 Output: id, (count(*)), (CASE WHEN (count(*) > N) THEN N ELSE N END)
+                                 Sort Key: (CASE WHEN (count(*) > N) THEN N ELSE N END), sorted_merge_test.id
+                                 Sort Method: top-N heapsort  Memory: NkB
+                                 ->  HashAggregate (actual rows=N loops=N)
+                                       Output: id, count(*), CASE WHEN (count(*) > N) THEN N ELSE N END
+                                       Group Key: sorted_merge_test.id
+                                       ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                             Output: id, val, num, ts
+(27 rows)
+
+-- J22: ORDER BY aggregate expression (sum + 1) — correctness
+SET citus.enable_sorted_merge TO off;
+SELECT id, sum(num) + 1 as s FROM sorted_merge_test GROUP BY id ORDER BY sum(num) + 1 LIMIT 5;
+ id |  s
+---------------------------------------------------------------------
+  1 | 2.5
+  2 | 4.0
+  3 | 5.5
+  4 | 7.0
+  5 | 8.5
+(5 rows)
+
+SET citus.enable_sorted_merge TO on;
+SELECT id, sum(num) + 1 as s FROM sorted_merge_test GROUP BY id ORDER BY sum(num) + 1 LIMIT 5;
+ id |  s
+---------------------------------------------------------------------
+  1 | 2.5
+  2 | 4.0
+  3 | 5.5
+  4 | 7.0
+  5 | 8.5
+(5 rows)
+
+-- -----------------------------------------------------------------
+-- J23–J24: EXPLAIN plans for pushable expression patterns
+-- -----------------------------------------------------------------
+SET citus.enable_sorted_merge TO on;
+-- J23: Does function-call ORDER BY get pushed to workers?
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY upper(val) LIMIT 5');
+                                                                                explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
+         ->  Task
+               Query: SELECT id, val, upper(val) AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (upper(val)) LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
+                     Output: id, val, (upper(val))
+                     ->  Sort (actual rows=N loops=N)
+                           Output: id, val, (upper(val))
+                           Sort Key: (upper(sorted_merge_test.val))
+                           Sort Method: top-N heapsort  Memory: NkB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                 Output: id, val, upper(val)
+(20 rows)
+
+-- J24: ORDER BY expression not in SELECT list — pushed to workers?
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id FROM sorted_merge_test ORDER BY num + 1 LIMIT 5');
+                                                                                                           explain_filter
+---------------------------------------------------------------------
+ Limit (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.worker_column_2
+   ->  Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+         Output: remote_scan.id, remote_scan.worker_column_2
+         Task Count: N
+         Merge Method: sorted merge
+         Tuple data received from nodes: N bytes
+         Tasks Shown: One of N
+         ->  Task
+               Query: SELECT id, (num OPERATOR(pg_catalog.+) 'N'::numeric) AS worker_column_2 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (num OPERATOR(pg_catalog.+) 'N'::numeric) LIMIT 'N'::bigint
+               Tuple data received from node: N bytes
+               Node: host=localhost port=N dbname=regression
+               ->  Limit (actual rows=N loops=N)
+                     Output: id, ((num + 'N'::numeric))
+                     ->  Sort (actual rows=N loops=N)
+                           Output: id, ((num + 'N'::numeric))
+                           Sort Key: ((sorted_merge_test.num + 'N'::numeric))
+                           Sort Method: top-N heapsort  Memory: NkB
+                           ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                                 Output: id, (num + 'N'::numeric)
+(20 rows)
+
+-- =================================================================
+-- Category K: Index-based sort avoidance
+-- =================================================================
+-- When an index exists on the ORDER BY column, PostgreSQL's worker-side
+-- planner should choose an Index Scan instead of Sort + Seq Scan, making
+-- the worker-side sort essentially free. This is the best-case scenario
+-- for sorted merge: zero worker sort cost + zero coordinator sort cost.
+--
+-- We disable enable_seqscan to force the worker planner to prefer the
+-- index, since the test table is small enough that Seq Scan + Sort
+-- would otherwise be cheaper.
+CREATE INDEX sorted_merge_test_id_idx ON sorted_merge_test(id);
+-- Use a transaction with SET LOCAL to propagate enable_seqscan=off to workers,
+-- forcing the worker planner to use the index instead of Seq Scan + Sort.
+SET citus.propagate_set_commands TO 'local';
+-- K1: EXPLAIN with index — worker uses Index Scan, no Sort node
+SET citus.enable_sorted_merge TO on;
+BEGIN;
+SET LOCAL enable_seqscan TO off;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY id');
+                                                               explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Index Scan using sorted_merge_test_id_idx_960000 on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+               Output: id, val
+(12 rows)
+
+COMMIT;
+-- K2: Correctness with index — GUC off vs on
+BEGIN;
+SET LOCAL enable_seqscan TO off;
+SET LOCAL citus.enable_sorted_merge TO off;
+SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+SET LOCAL citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test ORDER BY id LIMIT 5;
+ id |  val
+---------------------------------------------------------------------
+  1 | val_1
+  2 | val_2
+  3 | val_3
+  4 | val_4
+  5 | val_5
+(5 rows)
+
+COMMIT;
+-- K3: Multi-column index
+CREATE INDEX sorted_merge_test_num_id_idx ON sorted_merge_test(num, id);
+SET citus.enable_sorted_merge TO on;
+BEGIN;
+SET LOCAL enable_seqscan TO off;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, num FROM sorted_merge_test ORDER BY num, id');
+                                                                    explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.num
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, num FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY num, id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Index Only Scan using sorted_merge_test_num_id_idx_960000 on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+               Output: id, num
+               Heap Fetches: N
+(13 rows)
+
+COMMIT;
+-- K4: Correctness with multi-column index — GUC off vs on
+BEGIN;
+SET LOCAL enable_seqscan TO off;
+SET LOCAL citus.enable_sorted_merge TO off;
+SELECT id, num FROM sorted_merge_test ORDER BY num, id LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
+
+SET LOCAL citus.enable_sorted_merge TO on;
+SELECT id, num FROM sorted_merge_test ORDER BY num, id LIMIT 5;
+ id | num
+---------------------------------------------------------------------
+  1 | 1.5
+  2 | 3.0
+  3 | 4.5
+  4 | 6.0
+  5 | 7.5
+(5 rows)
+
+COMMIT;
+-- K5: DESC ordering with index
+SET citus.enable_sorted_merge TO on;
+BEGIN;
+SET LOCAL enable_seqscan TO off;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY id DESC');
+                                                                    explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY id DESC
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Index Scan Backward using sorted_merge_test_id_idx_960000 on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+               Output: id, val
+(12 rows)
+
+COMMIT;
+RESET citus.propagate_set_commands;
+DROP INDEX sorted_merge_test_id_idx;
+DROP INDEX sorted_merge_test_num_id_idx;
+-- =================================================================
+-- Category L: Volatile and stable functions in ORDER BY
+-- Tests that ORDER BY with functions works correctly with sorted merge.
+-- Volatile functions (random, clock_timestamp, timeofday) are pushed
+-- to workers as computed columns — sorted merge uses the materialized
+-- worker values, which is semantically equivalent to coordinator Sort.
+-- =================================================================
+-- L1: STABLE function — now() in expression with column
+-- now() returns the same value on all workers within a transaction,
+-- so the merge is globally consistent. Sorted merge should be used.
+SET citus.enable_sorted_merge TO on;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY now() - ts, id');
+                                                                                            explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val, (now() OPERATOR(pg_catalog.-) ts) AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (now() OPERATOR(pg_catalog.-) ts), id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val, ((now() - ts))
+               Sort Key: ((now() - sorted_merge_test.ts)), sorted_merge_test.id
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val, (now() - ts)
+(16 rows)
+
+-- L2: VOLATILE function — random() in ORDER BY
+-- random() is pushed to workers as worker_column_3; each worker sorts
+-- by its own random values. The merge interleaves using materialized
+-- values — semantically equivalent to coordinator Sort on worker_column_3.
+-- Test plan shape only (result is non-deterministic).
+SET citus.enable_sorted_merge TO on;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY random(), id');
+                                                                    explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val, random() AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (random()), id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val, (random())
+               Sort Key: (random()), sorted_merge_test.id
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val, random()
+(16 rows)
+
+-- L3: VOLATILE function — clock_timestamp() in ORDER BY
+-- Same mechanics as random(): pushed to workers, sorted locally, merged.
+SET citus.enable_sorted_merge TO on;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY clock_timestamp(), id');
+                                                                             explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val, clock_timestamp() AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY (clock_timestamp()), id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val, (clock_timestamp())
+               Sort Key: (clock_timestamp()), sorted_merge_test.id
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val, clock_timestamp()
+(16 rows)
+
+-- L4: nextval() in ORDER BY with sorted merge ON — expected ERROR
+-- nextval() cannot be pushed to workers (CanPushDownExpression blocks it).
+-- The sort clause references a target entry missing from the worker target
+-- list, causing a plan-time error. This is a pre-existing Citus limitation.
+CREATE SEQUENCE sorted_merge_test_seq;
+SET citus.enable_sorted_merge TO on;
+SELECT id, val FROM sorted_merge_test ORDER BY nextval('sorted_merge_test_seq');
+psql:sql/multi_orderby_pushdown.sql:777: ERROR:  ORDER/GROUP BY expression not found in targetlist
+-- L4b: nextval() in ORDER BY with sorted merge OFF but LIMIT present
+-- Same error — demonstrates this is NOT a sorted merge regression.
+SET citus.enable_sorted_merge TO off;
+SELECT id, val FROM sorted_merge_test ORDER BY nextval('sorted_merge_test_seq') LIMIT 5;
+psql:sql/multi_orderby_pushdown.sql:782: ERROR:  ORDER/GROUP BY expression not found in targetlist
+DROP SEQUENCE sorted_merge_test_seq;
+-- L5: STABLE function alone (constant-fold case)
+-- current_timestamp is constant-folded by the planner; the sort key
+-- effectively becomes just 'id'. Sorted merge should be used.
+SET citus.enable_sorted_merge TO on;
+SELECT public.explain_filter('EXPLAIN (ANALYZE ON, VERBOSE ON, COSTS OFF, TIMING OFF, BUFFERS OFF, SUMMARY OFF) SELECT id, val FROM sorted_merge_test ORDER BY current_timestamp, id');
+                                                                            explain_filter
+---------------------------------------------------------------------
+ Custom Scan (Citus Adaptive) (actual rows=N loops=N)
+   Output: remote_scan.id, remote_scan.val, remote_scan.worker_column_3
+   Task Count: N
+   Merge Method: sorted merge
+   Tuple data received from nodes: N bytes
+   Tasks Shown: One of N
+   ->  Task
+         Query: SELECT id, val, CURRENT_TIMESTAMP AS worker_column_3 FROM public.sorted_merge_test_960000 sorted_merge_test WHERE true ORDER BY CURRENT_TIMESTAMP, id
+         Tuple data received from node: N bytes
+         Node: host=localhost port=N dbname=regression
+         ->  Sort (actual rows=N loops=N)
+               Output: id, val, (CURRENT_TIMESTAMP)
+               Sort Key: sorted_merge_test.id
+               Sort Method: quicksort  Memory: NkB
+               ->  Seq Scan on public.sorted_merge_test_960000 sorted_merge_test (actual rows=N loops=N)
+                     Output: id, val, CURRENT_TIMESTAMP
+(16 rows)
+
+SET citus.enable_sorted_merge TO off;
 -- =================================================================
 -- Cleanup
 -- =================================================================

From 1c11bf9da3889d8c1617d7f696fe46846cd1441f Mon Sep 17 00:00:00 2001
From: Neil Deshpande <ndeshpan@microsoft.com>
Date: Tue, 14 Apr 2026 21:47:42 +0000
Subject: [PATCH 7/7] Make style checks happy

---
 .../distributed/executor/sorted_merge.c       |  2 +-
 src/backend/distributed/shared_library_init.c | 30 +++++++++----------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/backend/distributed/executor/sorted_merge.c b/src/backend/distributed/executor/sorted_merge.c
index 75e09f92baa..397b1cc9620 100644
--- a/src/backend/distributed/executor/sorted_merge.c
+++ b/src/backend/distributed/executor/sorted_merge.c
@@ -382,7 +382,7 @@ CreateSortedMergeAdapter(Tuplestorestate **perTaskStores,
  * On each call after the first, we advance the previous winner's store
  * and update the heap before selecting the new winner. This matches the
  * MergeAppend pattern in nodeMergeAppend.c.
- * 
+ *
  * Possible perf optimizations to explore in the future:
  * Avoid copying the winning tuple into the scan slot by returning a pointer to the winner's slot instead.
  * This would require changes to the caller to not modify the returned slot and to understand that it's owned by the adapter until the next call.
diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c
index 8b4b366ace3..039e8ee5c51 100644
--- a/src/backend/distributed/shared_library_init.c
+++ b/src/backend/distributed/shared_library_init.c
@@ -1617,21 +1617,6 @@ RegisterCitusConfigVariables(void)
 		GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
 		NULL, NULL, NULL);
 
-	DefineCustomBoolVariable(
-		"citus.enable_streaming_sorted_merge",
-		gettext_noop("Use streaming adapter instead of eager merge for sorted merge."),
-		gettext_noop("When enabled alongside citus.enable_sorted_merge, the coordinator "
-					 "streams merged tuples directly from per-task stores via a binary "
-					 "heap instead of eagerly copying all tuples into a final tuplestore. "
-					 "This reduces memory usage and improves time-to-first-tuple, "
-					 "especially for LIMIT queries. Requires citus.enable_sorted_merge "
-					 "to also be enabled. This is an experimental feature."),
-		&EnableStreamingSortedMerge,
-		false,
-		PGC_USERSET,
-		GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
-		NULL, NULL, NULL);
-
 	DefineCustomBoolVariable(
 		"citus.enable_stat_counters",
 		gettext_noop("Enables the collection of statistic counters for Citus."),
@@ -1656,6 +1641,21 @@ RegisterCitusConfigVariables(void)
 		GUC_SUPERUSER_ONLY,
 		NULL, NULL, NULL);
 
+	DefineCustomBoolVariable(
+		"citus.enable_streaming_sorted_merge",
+		gettext_noop("Use streaming adapter instead of eager merge for sorted merge."),
+		gettext_noop("When enabled alongside citus.enable_sorted_merge, the coordinator "
+					 "streams merged tuples directly from per-task stores via a binary "
+					 "heap instead of eagerly copying all tuples into a final tuplestore. "
+					 "This reduces memory usage and improves time-to-first-tuple, "
+					 "especially for LIMIT queries. Requires citus.enable_sorted_merge "
+					 "to also be enabled. This is an experimental feature."),
+		&EnableStreamingSortedMerge,
+		false,
+		PGC_USERSET,
+		GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
+		NULL, NULL, NULL);
+
 	DefineCustomBoolVariable(
 		"citus.enable_unique_job_ids",
 		gettext_noop("Enables unique job IDs by prepending the local process ID and "