From 4e29a81e8dec829dc3f1a5896db7b57de101ecbf Mon Sep 17 00:00:00 2001
From: Qiwei Huang <qiwei.huang@jsessh.com>
Date: Fri, 12 Jun 2026 17:52:12 +0800
Subject: [PATCH 01/14] bench(parquet): add row filter baseline cases

---
 parquet/benches/arrow_reader_row_filter.rs | 1444 +++++++++++++++++++-
 parquet/benches/row_selection_cursor.rs    |  148 ++
 2 files changed, 1559 insertions(+), 33 deletions(-)
diff --git a/parquet/benches/arrow_reader_row_filter.rs b/parquet/benches/arrow_reader_row_filter.rs
index 2b5a09eebcb3..6ae7c816e56b 100644
--- a/parquet/benches/arrow_reader_row_filter.rs
+++ b/parquet/benches/arrow_reader_row_filter.rs
@@ -52,20 +52,24 @@
 //!   - unsel_clustered: for Unselective Clustered – in each 10K-row block, rows with an offset >= 1000 are "unsel_clustered".
 //!
 
-use arrow::array::{ArrayRef, BooleanArray, Float64Array, Int64Array, TimestampMillisecondArray};
-use arrow::compute::and;
+use arrow::array::{
+    ArrayRef, BooleanArray, Float64Array, Int64Array, StructArray, TimestampMillisecondArray,
+};
 use arrow::compute::kernels::cmp::{eq, gt, lt, neq};
+use arrow::compute::{and, or};
 use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
 use arrow::record_batch::RecordBatch;
 use arrow_array::StringViewArray;
 use arrow_array::builder::{ArrayBuilder, StringViewBuilder};
-use arrow_cast::pretty::pretty_format_batches;
 use bytes::Bytes;
-use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
+use criterion::{
+    BenchmarkGroup, BenchmarkId, Criterion, criterion_group, criterion_main, measurement::WallTime,
+};
 use futures::future::BoxFuture;
 use futures::{FutureExt, StreamExt};
 use parquet::arrow::arrow_reader::{
     ArrowPredicateFn, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowFilter,
+    RowSelectionPolicy,
 };
 use parquet::arrow::async_reader::AsyncFileReader;
 use parquet::arrow::{ArrowWriter, ParquetRecordBatchStreamBuilder, ProjectionMask};
@@ -76,6 +80,9 @@ use rand::{Rng, SeedableRng, rngs::StdRng};
 use std::ops::Range;
 use std::sync::Arc;
 
+const COLUMN_NAMES: [&str; 4] = ["int64", "float64", "utf8View", "ts"];
+const UTF8_VIEW_MISSING_VALUE: &str = "__arrow_rs_missing__";
+
 /// Generates a random string. Has a 50% chance to generate a short string (3–11 characters)
 /// or a long string (13–20 characters).
 fn random_string(rng: &mut StdRng) -> String {
@@ -188,32 +195,77 @@ const ROW_GROUP_SIZE: usize = 100_000;
 
 /// Writes the RecordBatch to an in memory buffer, returning the buffer
 fn write_parquet_file() -> Vec<u8> {
-    let batch = create_record_batch(TOTAL_ROWS);
-    println!("Batch created with {TOTAL_ROWS} rows, row group size = {ROW_GROUP_SIZE}");
-    println!(
-        "First 100 rows:\n{}",
-        pretty_format_batches(&[batch.clone().slice(0, 100)]).unwrap()
-    );
+    write_parquet_file_with_rows(TOTAL_ROWS, ROW_GROUP_SIZE)
+}
+
+/// Writes a RecordBatch with a configurable shape to an in memory buffer,
+/// returning the buffer.
+fn write_parquet_file_with_rows(total_rows: usize, row_group_size: usize) -> Vec<u8> {
+    let batch = create_record_batch(total_rows);
+    write_record_batch_to_parquet(&batch, row_group_size)
+}
+
+fn write_record_batch_to_parquet(batch: &RecordBatch, row_group_size: usize) -> Vec<u8> {
     let schema = batch.schema();
     let props = WriterProperties::builder()
         .set_compression(Compression::SNAPPY)
-        .set_max_row_group_row_count(Some(ROW_GROUP_SIZE))
+        .set_max_row_group_row_count(Some(row_group_size))
         .build();
     let mut buffer = vec![];
     {
         let mut writer = ArrowWriter::try_new(&mut buffer, schema.clone(), Some(props)).unwrap();
-        writer.write(&batch).unwrap();
+        writer.write(batch).unwrap();
         writer.close().unwrap();
     }
     buffer
 }
 
+fn create_nested_record_batch(size: usize) -> RecordBatch {
+    let tag = Arc::new(StringViewArray::from_iter_values(
+        (0..size).map(|idx| format!("tag_{}", idx % 7)),
+    )) as ArrayRef;
+    let payload = StructArray::from(vec![
+        (
+            Arc::new(Field::new("id", DataType::Int64, false)),
+            Arc::new(Int64Array::from_iter_values(
+                (0..size).map(|idx| idx as i64 + 1_000),
+            )) as ArrayRef,
+        ),
+        (
+            Arc::new(Field::new("label", DataType::Utf8View, false)),
+            Arc::new(StringViewArray::from_iter_values(
+                (0..size).map(|idx| format!("payload_{idx}")),
+            )) as ArrayRef,
+        ),
+    ]);
+    let payload = Arc::new(payload) as ArrayRef;
+    let value = Arc::new(Int64Array::from_iter_values(
+        (0..size).map(|idx| idx as i64 + 10_000),
+    )) as ArrayRef;
+
+    RecordBatch::try_from_iter(vec![("tag", tag), ("payload", payload), ("value", value)]).unwrap()
+}
+
+fn write_nested_parquet_file_with_rows(total_rows: usize, row_group_size: usize) -> Vec<u8> {
+    let batch = create_nested_record_batch(total_rows);
+    write_record_batch_to_parquet(&batch, row_group_size)
+}
+
 /// ProjectionCase defines the projection mode for the benchmark:
 /// either projecting all columns or excluding the column that is used for filtering.
-#[derive(Clone)]
+#[derive(Clone, Copy)]
 enum ProjectionCase {
     AllColumns,
     ExcludeFilterColumn,
+    FilterColumnsOnly,
+    CountOnly,
+    FixedColumns,
+    Float64AndTs,
+    Float64Only,
+    Int64AndFloat64,
+    Int64AndUtf8,
+    TsAndUtf8,
+    Utf8Only,
 }
 
 impl std::fmt::Display for ProjectionCase {
@@ -221,6 +273,53 @@ impl std::fmt::Display for ProjectionCase {
         match self {
             ProjectionCase::AllColumns => write!(f, "all_columns"),
             ProjectionCase::ExcludeFilterColumn => write!(f, "exclude_filter_column"),
+            ProjectionCase::FilterColumnsOnly => write!(f, "filter_columns_only"),
+            ProjectionCase::CountOnly => write!(f, "count_only"),
+            ProjectionCase::FixedColumns => write!(f, "fixed_columns"),
+            ProjectionCase::Float64AndTs => write!(f, "float64_and_ts"),
+            ProjectionCase::Float64Only => write!(f, "float64_only"),
+            ProjectionCase::Int64AndFloat64 => write!(f, "int64_and_float64"),
+            ProjectionCase::Int64AndUtf8 => write!(f, "int64_and_utf8"),
+            ProjectionCase::TsAndUtf8 => write!(f, "ts_and_utf8"),
+            ProjectionCase::Utf8Only => write!(f, "utf8_only"),
+        }
+    }
+}
+
+#[derive(Clone, Copy)]
+enum SyncStrategy {
+    FullPostFilter,
+    PushdownAuto,
+    PushdownSelectors,
+    PushdownMask,
+}
+
+impl std::fmt::Display for SyncStrategy {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            SyncStrategy::FullPostFilter => write!(f, "full_post_filter"),
+            SyncStrategy::PushdownAuto => write!(f, "pushdown_auto"),
+            SyncStrategy::PushdownSelectors => write!(f, "pushdown_selectors"),
+            SyncStrategy::PushdownMask => write!(f, "pushdown_mask"),
+        }
+    }
+}
+
+#[derive(Clone, Copy)]
+enum AsyncStrategy {
+    FullPostFilter,
+    PushdownAuto,
+    PushdownSelectors,
+    PushdownMask,
+}
+
+impl std::fmt::Display for AsyncStrategy {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            AsyncStrategy::FullPostFilter => write!(f, "full_post_filter"),
+            AsyncStrategy::PushdownAuto => write!(f, "pushdown_auto"),
+            AsyncStrategy::PushdownSelectors => write!(f, "pushdown_selectors"),
+            AsyncStrategy::PushdownMask => write!(f, "pushdown_mask"),
         }
     }
 }
@@ -334,6 +433,71 @@ enum FilterType {
     /// [ClickBench]: https://github.com/ClickHouse/ClickBench
     /// [Q21-Q27]: https://github.com/apache/datafusion/blob/b7177234e65cbbb2dcc04c252f6acd80bb026362/benchmarks/queries/clickbench/queries.sql#L22-L28
     Utf8ViewNonEmpty,
+    /// Sparse variable-width predicate shaped like TPC-DS Q83 dynamic
+    /// `i_item_id` filters, where the predicate column is also projected.
+    Utf8ViewMissing,
+    /// Scalar-only part of ClickBench Q37:
+    ///
+    /// ```sql
+    /// WHERE CounterID = 62
+    ///   AND EventDate BETWEEN ...
+    ///   AND DontCountHits = 0
+    ///   AND IsRefresh = 0
+    ///   AND Title <> ''
+    /// ```
+    ///
+    /// DataFusion `Auto` does not push down the `Title <> ''` string predicate,
+    /// but it can push down the scalar prefix to defer decoding `Title`.
+    /// This synthetic predicate keeps that reader-level shape: cheap scalar
+    /// filter columns protect an expensive `Utf8View` output column.
+    ClickBenchQ37ScalarPrefix,
+    /// Shape of ClickBench extended Q6 under DataFusion row-filter pushdown:
+    /// an early cheap fixed-width predicate can prune almost all rows before a
+    /// later unprojected variable-width predicate is decoded.
+    ClickBenchQ6MixedPredicates,
+    /// Same scalar + variable-width predicate columns as [`Self::ClickBenchQ6MixedPredicates`],
+    /// but with the variable-width predicate evaluated first. This anchors the
+    /// static post-filter gate against predicate-order drift.
+    ClickBenchQ6VarWidthFirst,
+    /// Shape of ClickBench Q41-like fixed-width filters: sparse fragmented
+    /// scalar predicates with a cheap fixed-width output projection.
+    ClickBenchQ41SparseFixedOutput,
+    /// Shape of ClickBench Q40: multiple cheap scalar predicates, very small
+    /// output, and one projected predicate column used later by grouping.
+    ClickBenchQ40ScalarGroupBy,
+    /// Shape of TPC-DS Q41: a complex OR predicate over dictionary/string-like
+    /// and scalar columns where predicate evaluation dominates reader time.
+    TpcdsQ41ComplexOr,
+    /// Shape of TPC-DS Q20 catalog_sales after dynamic filters: multiple
+    /// fixed-width predicates where predicate columns are also projected.
+    TpcdsQ20ProjectedDynamicFilters,
+    /// Shape of TPC-DS Q21 after dynamic-filter pruning: sparse fragmented
+    /// fixed-width predicates where the final projection still includes the
+    /// predicate columns. This protects against choosing selectors for columns
+    /// that were already decoded/cached by predicate evaluation.
+    TpcdsQ21ProjectedFixedOutput,
+    /// Shape of TPC-DS Q2 fact scans: the dynamic filter applies to the date
+    /// key, the same date key is projected, and an additional fixed-width sales
+    /// value can still be deferred by predicate pushdown.
+    TpcdsQ2ProjectedPredicate5Pct,
+    TpcdsQ2ProjectedPredicate8Pct,
+    TpcdsQ2ProjectedPredicate10Pct,
+    TpcdsQ2ProjectedPredicate20Pct,
+    TpcdsQ2ProjectedPredicate30Pct,
+    TpcdsQ2ProjectedPredicate40Pct,
+    TpcdsQ2ProjectedPredicate50Pct,
+    /// Scalar range predicate shaped like TPC-DS Q9 `ss_quantity BETWEEN ...`
+    /// subqueries. The selected rows are random and moderately selective, and
+    /// benchmark projections cover both count-only and numeric aggregate cases.
+    TpcdsQ9QuantityRange,
+    /// Exact shape for the projected-predicate moderate-selectivity gate:
+    /// a clustered 20% timestamp predicate where the predicate column is
+    /// projected and the deferred output is variable-width.
+    ProjectedTs8PctClustered,
+    ProjectedTs20PctClustered,
+    /// Very sparse projected fixed-width scan shaped like TPC-DS fact-table
+    /// filters where the predicate column is also needed in the output projection.
+    TpcdsSparseProjectedFactScan,
 }
 
 impl std::fmt::Display for FilterType {
@@ -347,6 +511,50 @@ impl std::fmt::Display for FilterType {
             FilterType::UnselectiveClustered => "ts < 9000",
             FilterType::Composite => "float64 > 99.0 AND ts >= 9000",
             FilterType::Utf8ViewNonEmpty => "utf8View <> ''",
+            FilterType::Utf8ViewMissing => "utf8View == '<missing>'",
+            FilterType::ClickBenchQ37ScalarPrefix => "int64 == 62 AND ts < 9000",
+            FilterType::ClickBenchQ6MixedPredicates => "int64 == 9999 AND utf8View <> ''",
+            FilterType::ClickBenchQ6VarWidthFirst => "utf8View <> '' AND int64 == 9999",
+            FilterType::ClickBenchQ41SparseFixedOutput => "int64 < 8 AND ts < 9000",
+            FilterType::ClickBenchQ40ScalarGroupBy => {
+                "int64 == 62 AND float64 > 10.0 AND ts < 9000"
+            }
+            FilterType::TpcdsQ41ComplexOr => {
+                "(utf8View <> '' AND int64 < 8) OR (ts < 100 AND float64 > 95.0)"
+            }
+            FilterType::TpcdsQ20ProjectedDynamicFilters => {
+                "int64 < 12 AND ts < 9000 projected dynamic filters"
+            }
+            FilterType::TpcdsQ21ProjectedFixedOutput => {
+                "int64 < 8 AND ts < 9000 projected predicates"
+            }
+            FilterType::TpcdsQ2ProjectedPredicate10Pct => {
+                "int64 < 10 projected predicate with fixed output"
+            }
+            FilterType::TpcdsQ2ProjectedPredicate5Pct => {
+                "int64 < 5 projected predicate with fixed output"
+            }
+            FilterType::TpcdsQ2ProjectedPredicate8Pct => {
+                "int64 < 8 projected predicate with fixed output"
+            }
+            FilterType::TpcdsQ2ProjectedPredicate20Pct => {
+                "int64 < 20 projected predicate with fixed output"
+            }
+            FilterType::TpcdsQ2ProjectedPredicate30Pct => {
+                "int64 < 30 projected predicate with fixed output"
+            }
+            FilterType::TpcdsQ2ProjectedPredicate40Pct => {
+                "int64 < 40 projected predicate with fixed output"
+            }
+            FilterType::TpcdsQ2ProjectedPredicate50Pct => {
+                "int64 < 50 projected predicate with fixed output"
+            }
+            FilterType::TpcdsQ9QuantityRange => "int64 > 0 AND int64 < 21",
+            FilterType::ProjectedTs20PctClustered => {
+                "ts < 2000 projected predicate with utf8 output"
+            }
+            FilterType::ProjectedTs8PctClustered => "ts < 800 projected predicate with utf8 output",
+            FilterType::TpcdsSparseProjectedFactScan => "ts % 1000 == 0",
         };
         write!(f, "{s}")
     }
@@ -401,6 +609,113 @@ impl FilterType {
                 let scalar = StringViewArray::new_scalar("");
                 neq(array, &scalar)
             }
+            FilterType::Utf8ViewMissing => {
+                let array = batch.column(batch.schema().index_of("utf8View")?);
+                let scalar = StringViewArray::new_scalar(UTF8_VIEW_MISSING_VALUE);
+                eq(array, &scalar)
+            }
+            // ClickBenchQ37ScalarPrefix: a cheap fragmented scalar predicate
+            // evaluated before decoding a variable-width output column.
+            FilterType::ClickBenchQ37ScalarPrefix => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                let counter_match = eq(int64, &Int64Array::new_scalar(62))?;
+                let date_like_range = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
+                and(&counter_match, &date_like_range)
+            }
+            FilterType::ClickBenchQ6MixedPredicates | FilterType::ClickBenchQ6VarWidthFirst => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let utf8 = batch.column(batch.schema().index_of("utf8View")?);
+                let cheap_prefix = eq(int64, &Int64Array::new_scalar(9999))?;
+                let string_suffix = neq(utf8, &StringViewArray::new_scalar(""))?;
+                and(&cheap_prefix, &string_suffix)
+            }
+            FilterType::ClickBenchQ41SparseFixedOutput
+            | FilterType::TpcdsQ21ProjectedFixedOutput => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                let counter_like = lt(int64, &Int64Array::new_scalar(8))?;
+                let date_like = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
+                and(&counter_like, &date_like)
+            }
+            FilterType::ClickBenchQ40ScalarGroupBy => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let float64 = batch.column(batch.schema().index_of("float64")?);
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                let counter_match = eq(int64, &Int64Array::new_scalar(62))?;
+                let width_match = gt(float64, &Float64Array::new_scalar(10.0))?;
+                let date_like = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
+                and(&and(&counter_match, &width_match)?, &date_like)
+            }
+            FilterType::TpcdsQ41ComplexOr => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let float64 = batch.column(batch.schema().index_of("float64")?);
+                let utf8 = batch.column(batch.schema().index_of("utf8View")?);
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                let string_branch = and(
+                    &neq(utf8, &StringViewArray::new_scalar(""))?,
+                    &lt(int64, &Int64Array::new_scalar(8))?,
+                )?;
+                let scalar_branch = and(
+                    &lt(ts, &TimestampMillisecondArray::new_scalar(100))?,
+                    &gt(float64, &Float64Array::new_scalar(95.0))?,
+                )?;
+                or(&string_branch, &scalar_branch)
+            }
+            FilterType::TpcdsQ20ProjectedDynamicFilters => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                let item_like = lt(int64, &Int64Array::new_scalar(12))?;
+                let date_like = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
+                and(&item_like, &date_like)
+            }
+            FilterType::TpcdsQ2ProjectedPredicate5Pct
+            | FilterType::TpcdsQ2ProjectedPredicate8Pct
+            | FilterType::TpcdsQ2ProjectedPredicate10Pct
+            | FilterType::TpcdsQ2ProjectedPredicate20Pct
+            | FilterType::TpcdsQ2ProjectedPredicate30Pct
+            | FilterType::TpcdsQ2ProjectedPredicate40Pct
+            | FilterType::TpcdsQ2ProjectedPredicate50Pct => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let threshold = match self {
+                    FilterType::TpcdsQ2ProjectedPredicate5Pct => 5,
+                    FilterType::TpcdsQ2ProjectedPredicate8Pct => 8,
+                    FilterType::TpcdsQ2ProjectedPredicate10Pct => 10,
+                    FilterType::TpcdsQ2ProjectedPredicate20Pct => 20,
+                    FilterType::TpcdsQ2ProjectedPredicate30Pct => 30,
+                    FilterType::TpcdsQ2ProjectedPredicate40Pct => 40,
+                    FilterType::TpcdsQ2ProjectedPredicate50Pct => 50,
+                    _ => unreachable!(),
+                };
+                lt(int64, &Int64Array::new_scalar(threshold))
+            }
+            FilterType::TpcdsQ9QuantityRange => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let lower = gt(int64, &Int64Array::new_scalar(0))?;
+                let upper = lt(int64, &Int64Array::new_scalar(21))?;
+                and(&lower, &upper)
+            }
+            FilterType::ProjectedTs8PctClustered => {
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                lt(ts, &TimestampMillisecondArray::new_scalar(800))
+            }
+            FilterType::ProjectedTs20PctClustered => {
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                lt(ts, &TimestampMillisecondArray::new_scalar(2000))
+            }
+            FilterType::TpcdsSparseProjectedFactScan => {
+                let ts = batch
+                    .column(batch.schema().index_of("ts")?)
+                    .as_any()
+                    .downcast_ref::<TimestampMillisecondArray>()
+                    .unwrap();
+                Ok(BooleanArray::from(
+                    ts.values()
+                        .iter()
+                        .map(|value| value % 1000 == 0)
+                        .collect::<Vec<_>>(),
+                ))
+            }
         }
     }
 
@@ -414,7 +729,26 @@ impl FilterType {
             FilterType::UnselectiveUnclustered => &[1],
             FilterType::UnselectiveClustered => &[3],
             FilterType::Composite => &[1, 3], // Use float64 column and ts column as representative for composite
-            FilterType::Utf8ViewNonEmpty => &[2],
+            FilterType::Utf8ViewNonEmpty | FilterType::Utf8ViewMissing => &[2],
+            FilterType::ClickBenchQ37ScalarPrefix => &[0, 3],
+            FilterType::ClickBenchQ6MixedPredicates | FilterType::ClickBenchQ6VarWidthFirst => {
+                &[0, 2]
+            }
+            FilterType::ClickBenchQ40ScalarGroupBy => &[0, 1, 3],
+            FilterType::ClickBenchQ41SparseFixedOutput
+            | FilterType::TpcdsQ20ProjectedDynamicFilters
+            | FilterType::TpcdsQ21ProjectedFixedOutput => &[0, 3],
+            FilterType::TpcdsQ41ComplexOr => &[0, 1, 2, 3],
+            FilterType::TpcdsQ2ProjectedPredicate5Pct
+            | FilterType::TpcdsQ2ProjectedPredicate8Pct
+            | FilterType::TpcdsQ2ProjectedPredicate10Pct
+            | FilterType::TpcdsQ2ProjectedPredicate20Pct
+            | FilterType::TpcdsQ2ProjectedPredicate30Pct
+            | FilterType::TpcdsQ2ProjectedPredicate40Pct
+            | FilterType::TpcdsQ2ProjectedPredicate50Pct => &[0],
+            FilterType::TpcdsQ9QuantityRange => &[0],
+            FilterType::ProjectedTs8PctClustered | FilterType::ProjectedTs20PctClustered => &[3],
+            FilterType::TpcdsSparseProjectedFactScan => &[3],
         }
     }
 }
@@ -449,17 +783,8 @@ fn benchmark_filters_and_projections(c: &mut Criterion) {
 
     for filter_type in filter_types {
         for proj_case in &projection_cases {
-            // All indices corresponding to the 10 columns.
-            let all_indices = vec![0, 1, 2, 3];
             let filter_col = filter_type.filter_projection().to_vec();
-            // For the projection, either select all columns or exclude the filter column(s).
-            let output_projection: Vec<usize> = match proj_case {
-                ProjectionCase::AllColumns => all_indices.clone(),
-                ProjectionCase::ExcludeFilterColumn => all_indices
-                    .into_iter()
-                    .filter(|i| !filter_col.contains(i))
-                    .collect(),
-            };
+            let output_projection = output_projection_for(filter_type, proj_case);
 
             let reader = InMemoryReader::try_new(&parquet_file).unwrap();
             let metadata = Arc::clone(reader.metadata());
@@ -510,6 +835,820 @@ fn benchmark_filters_and_projections(c: &mut Criterion) {
     }
 }
 
+/// Compare full scan plus post-filtering against row-level pushdown strategies.
+///
+/// This group is intentionally sync-only and smaller than
+/// [`benchmark_filters_and_projections`]. It tracks the cases most likely to
+/// inform a future default `Auto` policy: selective random filters, clustered
+/// filters, ClickBench-like string filters, and the forced selector strategy
+/// that originally motivated apache/arrow-rs#8565.
+fn benchmark_sync_strategy_matrix(c: &mut Criterion) {
+    let parquet_file = Bytes::from(write_parquet_file());
+    let filter_types = [
+        FilterType::SelectiveUnclustered,
+        FilterType::ModeratelySelectiveClustered,
+        FilterType::ModeratelySelectiveUnclustered,
+        FilterType::Utf8ViewNonEmpty,
+    ];
+    let strategies = [
+        SyncStrategy::FullPostFilter,
+        SyncStrategy::PushdownAuto,
+        SyncStrategy::PushdownSelectors,
+        SyncStrategy::PushdownMask,
+    ];
+
+    let mut group = c.benchmark_group("arrow_reader_row_filter_strategy_matrix");
+
+    for filter_type in filter_types {
+        for projection_case in [
+            ProjectionCase::AllColumns,
+            ProjectionCase::ExcludeFilterColumn,
+        ] {
+            let reader = InMemoryReader::try_new(&parquet_file).unwrap();
+            let metadata = Arc::clone(reader.metadata());
+            let schema_descr = metadata.file_metadata().schema_descr();
+            let output_projection = output_projection_for(filter_type, &projection_case);
+            let read_projection = full_post_filter_read_projection(filter_type, &output_projection);
+            let output_column_names = projection_names(&output_projection);
+            let projection_mask = ProjectionMask::roots(schema_descr, output_projection);
+            let read_projection_mask = ProjectionMask::roots(schema_descr, read_projection);
+            let pred_mask = ProjectionMask::roots(
+                schema_descr,
+                filter_type.filter_projection().iter().copied(),
+            );
+
+            for strategy in strategies {
+                let bench_id = BenchmarkId::new(
+                    format!("{filter_type}/{projection_case}"),
+                    strategy.to_string(),
+                );
+
+                group.bench_function(bench_id, |b| {
+                    b.iter(|| {
+                        let reader = reader.clone();
+                        let pred_mask = pred_mask.clone();
+                        let projection_mask = projection_mask.clone();
+                        let read_projection_mask = read_projection_mask.clone();
+                        let output_column_names = output_column_names.clone();
+
+                        match strategy {
+                            SyncStrategy::FullPostFilter => benchmark_sync_reader_post_filter(
+                                reader,
+                                read_projection_mask,
+                                output_column_names,
+                                filter_type,
+                            ),
+                            SyncStrategy::PushdownAuto => {
+                                let row_filter = row_filter_for(filter_type, pred_mask);
+                                benchmark_sync_reader_with_policy(
+                                    reader,
+                                    projection_mask,
+                                    row_filter,
+                                    RowSelectionPolicy::default(),
+                                )
+                            }
+                            SyncStrategy::PushdownSelectors => {
+                                let row_filter = row_filter_for(filter_type, pred_mask);
+                                benchmark_sync_reader_with_policy(
+                                    reader,
+                                    projection_mask,
+                                    row_filter,
+                                    RowSelectionPolicy::Selectors,
+                                )
+                            }
+                            SyncStrategy::PushdownMask => {
+                                let row_filter = row_filter_for(filter_type, pred_mask);
+                                benchmark_sync_reader_with_policy(
+                                    reader,
+                                    projection_mask,
+                                    row_filter,
+                                    RowSelectionPolicy::Mask,
+                                )
+                            }
+                        }
+                    });
+                });
+            }
+        }
+    }
+}
+
+/// Compare async full scan plus post-filtering against async row-level pushdown
+/// strategies. This is the matrix that exercises the current reader `Auto`
+/// policy through the async stream backed by the push decoder row-group pipeline.
+fn benchmark_async_strategy_matrix(c: &mut Criterion) {
+    let parquet_file = Bytes::from(write_parquet_file());
+    let filter_types = [
+        FilterType::SelectiveUnclustered,
+        FilterType::ModeratelySelectiveClustered,
+        FilterType::ModeratelySelectiveUnclustered,
+        FilterType::Utf8ViewNonEmpty,
+    ];
+    let strategies = [
+        AsyncStrategy::FullPostFilter,
+        AsyncStrategy::PushdownAuto,
+        AsyncStrategy::PushdownSelectors,
+        AsyncStrategy::PushdownMask,
+    ];
+
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()
+        .unwrap();
+
+    let mut group = c.benchmark_group("arrow_reader_row_filter_async_strategy_matrix");
+
+    for filter_type in filter_types {
+        for projection_case in [
+            ProjectionCase::AllColumns,
+            ProjectionCase::ExcludeFilterColumn,
+        ] {
+            let reader = InMemoryReader::try_new(&parquet_file).unwrap();
+            let metadata = Arc::clone(reader.metadata());
+            let schema_descr = metadata.file_metadata().schema_descr();
+            let output_projection = output_projection_for(filter_type, &projection_case);
+            let read_projection = full_post_filter_read_projection(filter_type, &output_projection);
+            let output_column_names = projection_names(&output_projection);
+            let projection_mask = ProjectionMask::roots(schema_descr, output_projection);
+            let read_projection_mask = ProjectionMask::roots(schema_descr, read_projection);
+            let pred_mask = ProjectionMask::roots(
+                schema_descr,
+                filter_type.filter_projection().iter().copied(),
+            );
+
+            for strategy in strategies {
+                let bench_id = BenchmarkId::new(
+                    format!("{filter_type}/{projection_case}"),
+                    strategy.to_string(),
+                );
+                let rt_captured = rt.handle().clone();
+
+                group.bench_function(bench_id, |b| {
+                    b.iter(|| {
+                        let reader = reader.clone();
+                        let pred_mask = pred_mask.clone();
+                        let projection_mask = projection_mask.clone();
+                        let read_projection_mask = read_projection_mask.clone();
+                        let output_column_names = output_column_names.clone();
+
+                        rt_captured.block_on(async {
+                            match strategy {
+                                AsyncStrategy::FullPostFilter => {
+                                    benchmark_async_reader_post_filter(
+                                        reader,
+                                        read_projection_mask,
+                                        output_column_names,
+                                        filter_type,
+                                    )
+                                    .await
+                                }
+                                AsyncStrategy::PushdownAuto => {
+                                    let row_filter = row_filter_for(filter_type, pred_mask);
+                                    benchmark_async_reader_with_policy(
+                                        reader,
+                                        projection_mask,
+                                        row_filter,
+                                        RowSelectionPolicy::default(),
+                                    )
+                                    .await
+                                }
+                                AsyncStrategy::PushdownSelectors => {
+                                    let row_filter = row_filter_for(filter_type, pred_mask);
+                                    benchmark_async_reader_with_policy(
+                                        reader,
+                                        projection_mask,
+                                        row_filter,
+                                        RowSelectionPolicy::Selectors,
+                                    )
+                                    .await
+                                }
+                                AsyncStrategy::PushdownMask => {
+                                    let row_filter = row_filter_for(filter_type, pred_mask);
+                                    benchmark_async_reader_with_policy(
+                                        reader,
+                                        projection_mask,
+                                        row_filter,
+                                        RowSelectionPolicy::Mask,
+                                    )
+                                    .await
+                                }
+                            }
+                        })
+                    });
+                });
+            }
+        }
+    }
+}
+
+/// A small async-only matrix that isolates the cases most relevant to the
+/// row-filter Auto policy. This is intentionally narrower than
+/// [`benchmark_async_strategy_matrix`]: it keeps the benchmark output focused
+/// on cases where later PRs may teach `Auto` to switch execution modes or
+/// explicitly keep predicate pushdown.
+///
+/// The `profile_*` cases are derived from DataFusion ClickBench and TPC-DS
+/// comparisons. They keep the reader-level shapes worth tracking while
+/// excluding query regressions that did not construct a Parquet `RowFilter`.
+fn benchmark_async_auto_policy_focus(c: &mut Criterion) {
+    const SMALL_TOTAL_ROWS: usize = 20_000;
+    const SMALL_ROW_GROUP_SIZE: usize = 5_000;
+
+    let parquet_file = Bytes::from(write_parquet_file());
+    let small_parquet_file = Bytes::from(write_parquet_file_with_rows(
+        SMALL_TOTAL_ROWS,
+        SMALL_ROW_GROUP_SIZE,
+    ));
+    let cases = [
+        AsyncFocusCase::new(
+            "utf8_non_empty",
+            parquet_file.clone(),
+            FilterType::Utf8ViewNonEmpty,
+            ProjectionCase::ExcludeFilterColumn,
+        ),
+        AsyncFocusCase::new(
+            "utf8_non_empty",
+            parquet_file.clone(),
+            FilterType::Utf8ViewNonEmpty,
+            ProjectionCase::AllColumns,
+        ),
+        AsyncFocusCase::new(
+            "high_selectivity_float64",
+            parquet_file.clone(),
+            FilterType::UnselectiveUnclustered,
+            ProjectionCase::ExcludeFilterColumn,
+        ),
+        AsyncFocusCase::new(
+            "high_selectivity_ts_clustered",
+            parquet_file.clone(),
+            FilterType::UnselectiveClustered,
+            ProjectionCase::ExcludeFilterColumn,
+        ),
+        AsyncFocusCase::new(
+            "fragmented_int64_10pct",
+            parquet_file.clone(),
+            FilterType::ModeratelySelectiveUnclustered,
+            ProjectionCase::ExcludeFilterColumn,
+        ),
+        AsyncFocusCase::new(
+            "selective_float64_1pct",
+            parquet_file.clone(),
+            FilterType::SelectiveUnclustered,
+            ProjectionCase::ExcludeFilterColumn,
+        ),
+        AsyncFocusCase::new(
+            "profile_q37_scalar_utf8",
+            parquet_file.clone(),
+            FilterType::ClickBenchQ37ScalarPrefix,
+            ProjectionCase::Utf8Only,
+        ),
+        // Historical Q6 focus case: cheap fixed-width predicate before the
+        // unprojected variable-width predicate.
+        AsyncFocusCase::new(
+            "profile_q6_mixed_predicates",
+            parquet_file.clone(),
+            FilterType::ClickBenchQ6MixedPredicates,
+            ProjectionCase::Float64Only,
+        ),
+        AsyncFocusCase::new(
+            "profile_varwidth_then_fixed_prefix",
+            parquet_file.clone(),
+            FilterType::ClickBenchQ6VarWidthFirst,
+            ProjectionCase::Float64Only,
+        ),
+        AsyncFocusCase::new(
+            "profile_q40_scalar_group_by",
+            parquet_file.clone(),
+            FilterType::ClickBenchQ40ScalarGroupBy,
+            ProjectionCase::Float64AndTs,
+        ),
+        AsyncFocusCase::new(
+            "profile_q41_sparse_fixed_output",
+            parquet_file.clone(),
+            FilterType::ClickBenchQ41SparseFixedOutput,
+            ProjectionCase::Float64Only,
+        ),
+        AsyncFocusCase::new(
+            "profile_tpcds_q41_complex_or",
+            parquet_file.clone(),
+            FilterType::TpcdsQ41ComplexOr,
+            ProjectionCase::Float64Only,
+        ),
+        AsyncFocusCase::new(
+            "profile_tpcds_q20_projected_dynamic_filters",
+            parquet_file.clone(),
+            FilterType::TpcdsQ20ProjectedDynamicFilters,
+            ProjectionCase::FixedColumns,
+        ),
+        AsyncFocusCase::new(
+            "profile_q21_projected_predicate_fixed_output",
+            parquet_file.clone(),
+            FilterType::TpcdsQ21ProjectedFixedOutput,
+            ProjectionCase::FixedColumns,
+        ),
+        AsyncFocusCase::new(
+            "profile_q2_projected_predicate_5pct",
+            parquet_file.clone(),
+            FilterType::TpcdsQ2ProjectedPredicate5Pct,
+            ProjectionCase::Int64AndFloat64,
+        ),
+        AsyncFocusCase::new(
+            "profile_q2_projected_predicate_8pct_filter_only",
+            parquet_file.clone(),
+            FilterType::TpcdsQ2ProjectedPredicate8Pct,
+            ProjectionCase::FilterColumnsOnly,
+        ),
+        AsyncFocusCase::new(
+            "profile_q2_projected_predicate_8pct_fixed_output",
+            parquet_file.clone(),
+            FilterType::TpcdsQ2ProjectedPredicate8Pct,
+            ProjectionCase::Int64AndFloat64,
+        ),
+        AsyncFocusCase::new(
+            "profile_q2_projected_predicate_8pct_varwidth_output",
+            parquet_file.clone(),
+            FilterType::TpcdsQ2ProjectedPredicate8Pct,
+            ProjectionCase::Int64AndUtf8,
+        ),
+        AsyncFocusCase::new(
+            "profile_q2_projected_predicate_10pct",
+            parquet_file.clone(),
+            FilterType::TpcdsQ2ProjectedPredicate10Pct,
+            ProjectionCase::Int64AndFloat64,
+        ),
+        AsyncFocusCase::new(
+            "profile_q2_projected_predicate_20pct",
+            parquet_file.clone(),
+            FilterType::TpcdsQ2ProjectedPredicate20Pct,
+            ProjectionCase::Int64AndFloat64,
+        ),
+        AsyncFocusCase::new(
+            "profile_q2_projected_predicate_20pct_varwidth_output",
+            parquet_file.clone(),
+            FilterType::TpcdsQ2ProjectedPredicate20Pct,
+            ProjectionCase::Int64AndUtf8,
+        ),
+        AsyncFocusCase::new(
+            "profile_projected_ts_8pct_fixed_output",
+            parquet_file.clone(),
+            FilterType::ProjectedTs8PctClustered,
+            ProjectionCase::Float64AndTs,
+        ),
+        AsyncFocusCase::new(
+            "profile_projected_ts_8pct_varwidth_output",
+            parquet_file.clone(),
+            FilterType::ProjectedTs8PctClustered,
+            ProjectionCase::TsAndUtf8,
+        ),
+        AsyncFocusCase::new(
+            "profile_projected_ts_20pct_fixed_output",
+            parquet_file.clone(),
+            FilterType::ProjectedTs20PctClustered,
+            ProjectionCase::Float64AndTs,
+        ),
+        AsyncFocusCase::new(
+            "profile_projected_ts_20pct_varwidth_output",
+            parquet_file.clone(),
+            FilterType::ProjectedTs20PctClustered,
+            ProjectionCase::TsAndUtf8,
+        ),
+        AsyncFocusCase::new(
+            "profile_q2_projected_predicate_30pct",
+            parquet_file.clone(),
+            FilterType::TpcdsQ2ProjectedPredicate30Pct,
+            ProjectionCase::Int64AndFloat64,
+        ),
+        AsyncFocusCase::new(
+            "profile_q2_projected_predicate_40pct",
+            parquet_file.clone(),
+            FilterType::TpcdsQ2ProjectedPredicate40Pct,
+            ProjectionCase::Int64AndFloat64,
+        ),
+        AsyncFocusCase::new(
+            "profile_q2_projected_predicate_50pct",
+            parquet_file.clone(),
+            FilterType::TpcdsQ2ProjectedPredicate50Pct,
+            ProjectionCase::Int64AndFloat64,
+        ),
+        AsyncFocusCase::new(
+            "profile_q1_count_only",
+            parquet_file.clone(),
+            FilterType::ClickBenchQ41SparseFixedOutput,
+            ProjectionCase::CountOnly,
+        ),
+        AsyncFocusCase::new(
+            "profile_q19_no_defer",
+            parquet_file.clone(),
+            FilterType::PointLookup,
+            ProjectionCase::FilterColumnsOnly,
+        ),
+        AsyncFocusCase::new(
+            "profile_sparse_fixed_deferred_output",
+            parquet_file.clone(),
+            FilterType::PointLookup,
+            ProjectionCase::Float64Only,
+        ),
+        AsyncFocusCase::new(
+            "profile_tpcds_sparse_projected_fact_scan",
+            parquet_file.clone(),
+            FilterType::TpcdsSparseProjectedFactScan,
+            ProjectionCase::FixedColumns,
+        ),
+        AsyncFocusCase::new(
+            "profile_q83_sparse_utf8_projected",
+            parquet_file.clone(),
+            FilterType::Utf8ViewMissing,
+            ProjectionCase::AllColumns,
+        ),
+        AsyncFocusCase::new(
+            "profile_small_scalar_no_defer",
+            small_parquet_file.clone(),
+            FilterType::ModeratelySelectiveUnclustered,
+            ProjectionCase::FilterColumnsOnly,
+        ),
+        AsyncFocusCase::new(
+            "profile_small_q37_scalar_utf8",
+            small_parquet_file,
+            FilterType::ClickBenchQ37ScalarPrefix,
+            ProjectionCase::Utf8Only,
+        ),
+        AsyncFocusCase::new(
+            "profile_q9_quantity_count",
+            parquet_file.clone(),
+            FilterType::TpcdsQ9QuantityRange,
+            ProjectionCase::FilterColumnsOnly,
+        ),
+        AsyncFocusCase::new(
+            "profile_q9_quantity_avg",
+            parquet_file,
+            FilterType::TpcdsQ9QuantityRange,
+            ProjectionCase::Float64Only,
+        ),
+    ];
+    let strategies = [
+        AsyncStrategy::FullPostFilter,
+        AsyncStrategy::PushdownAuto,
+        AsyncStrategy::PushdownMask,
+        AsyncStrategy::PushdownSelectors,
+    ];
+
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()
+        .unwrap();
+
+    let mut group = c.benchmark_group("arrow_reader_row_filter_async_auto_policy_focus");
+
+    for case in cases {
+        benchmark_async_focus_case(&mut group, &rt, case, &strategies);
+    }
+}
+
+/// Isolate projected scans that do not construct a [`RowFilter`].
+///
+/// This tracks the reader-level shape seen in TPC-DS Q83 return-table scans:
+/// a narrow primitive projection where row-level pushdown metrics are zero.
+/// It deliberately lives outside the adaptive-materialization matrix because there is no
+/// filter strategy to choose.
+fn benchmark_projection_scan_focus(c: &mut Criterion) {
+    let parquet_file = Bytes::from(write_parquet_file());
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()
+        .unwrap();
+
+    let mut group = c.benchmark_group("arrow_reader_projection_scan_focus");
+
+    let case_name = "profile_q83_return_scan_primitives";
+    let projection = vec![0, 1, 3];
+    let reader = InMemoryReader::try_new(&parquet_file).unwrap();
+    let metadata = Arc::clone(reader.metadata());
+    let schema_descr = metadata.file_metadata().schema_descr();
+    let projection_mask = ProjectionMask::roots(schema_descr, projection);
+
+    let bench_id = BenchmarkId::new(case_name, "async");
+    let rt_captured = rt.handle().clone();
+    group.bench_function(bench_id, |b| {
+        b.iter(|| {
+            let reader = reader.clone();
+            let projection_mask = projection_mask.clone();
+            rt_captured.block_on(benchmark_async_reader_projected(reader, projection_mask));
+        });
+    });
+
+    let bench_id = BenchmarkId::new(case_name, "sync");
+    group.bench_function(bench_id, |b| {
+        b.iter(|| {
+            let reader = reader.clone();
+            let projection_mask = projection_mask.clone();
+            benchmark_sync_reader_projected(reader, projection_mask);
+        });
+    });
+}
+
+struct AsyncFocusCase {
+    case_name: &'static str,
+    parquet_file: Bytes,
+    filter_type: FilterType,
+    projection_case: ProjectionCase,
+}
+
+impl AsyncFocusCase {
+    fn new(
+        case_name: &'static str,
+        parquet_file: Bytes,
+        filter_type: FilterType,
+        projection_case: ProjectionCase,
+    ) -> Self {
+        Self {
+            case_name,
+            parquet_file,
+            filter_type,
+            projection_case,
+        }
+    }
+}
+
+fn benchmark_async_focus_case(
+    group: &mut BenchmarkGroup<'_, WallTime>,
+    rt: &tokio::runtime::Runtime,
+    case: AsyncFocusCase,
+    strategies: &[AsyncStrategy],
+) {
+    let AsyncFocusCase {
+        case_name,
+        parquet_file,
+        filter_type,
+        projection_case,
+    } = case;
+
+    let reader = InMemoryReader::try_new(&parquet_file).unwrap();
+    let metadata = Arc::clone(reader.metadata());
+    let schema_descr = metadata.file_metadata().schema_descr();
+    let output_projection = output_projection_for(filter_type, &projection_case);
+    let read_projection = full_post_filter_read_projection(filter_type, &output_projection);
+    let output_column_names = projection_names(&output_projection);
+    let projection_mask = ProjectionMask::roots(schema_descr, output_projection);
+    let read_projection_mask = ProjectionMask::roots(schema_descr, read_projection);
+    let pred_mask = ProjectionMask::roots(
+        schema_descr,
+        filter_type.filter_projection().iter().copied(),
+    );
+    let q6_int64_pred_mask = ProjectionMask::roots(schema_descr, [0]);
+    let q6_utf8_pred_mask = ProjectionMask::roots(schema_descr, [2]);
+    let q41_int64_pred_mask = ProjectionMask::roots(schema_descr, [0]);
+    let q41_ts_pred_mask = ProjectionMask::roots(schema_descr, [3]);
+    let q40_float64_pred_mask = ProjectionMask::roots(schema_descr, [1]);
+
+    for strategy in strategies.iter().copied() {
+        let bench_id = BenchmarkId::new(
+            format!("{case_name}/{projection_case}"),
+            strategy.to_string(),
+        );
+        let rt_captured = rt.handle().clone();
+
+        group.bench_function(bench_id, |b| {
+            b.iter(|| {
+                let reader = reader.clone();
+                let pred_mask = pred_mask.clone();
+                let q6_int64_pred_mask = q6_int64_pred_mask.clone();
+                let q6_utf8_pred_mask = q6_utf8_pred_mask.clone();
+                let q41_int64_pred_mask = q41_int64_pred_mask.clone();
+                let q41_ts_pred_mask = q41_ts_pred_mask.clone();
+                let q40_float64_pred_mask = q40_float64_pred_mask.clone();
+                let projection_mask = projection_mask.clone();
+                let read_projection_mask = read_projection_mask.clone();
+                let output_column_names = output_column_names.clone();
+
+                rt_captured.block_on(async {
+                    match strategy {
+                        AsyncStrategy::FullPostFilter => {
+                            benchmark_async_reader_post_filter(
+                                reader,
+                                read_projection_mask,
+                                output_column_names,
+                                filter_type,
+                            )
+                            .await
+                        }
+                        AsyncStrategy::PushdownAuto => {
+                            let row_filter = row_filter_for_focus_case(
+                                filter_type,
+                                pred_mask,
+                                q6_int64_pred_mask,
+                                q6_utf8_pred_mask,
+                                q41_int64_pred_mask,
+                                q41_ts_pred_mask,
+                                q40_float64_pred_mask,
+                            );
+                            benchmark_async_reader_with_policy(
+                                reader,
+                                projection_mask,
+                                row_filter,
+                                RowSelectionPolicy::default(),
+                            )
+                            .await
+                        }
+                        AsyncStrategy::PushdownSelectors => {
+                            let row_filter = row_filter_for_focus_case(
+                                filter_type,
+                                pred_mask,
+                                q6_int64_pred_mask,
+                                q6_utf8_pred_mask,
+                                q41_int64_pred_mask,
+                                q41_ts_pred_mask,
+                                q40_float64_pred_mask,
+                            );
+                            benchmark_async_reader_with_policy(
+                                reader,
+                                projection_mask,
+                                row_filter,
+                                RowSelectionPolicy::Selectors,
+                            )
+                            .await
+                        }
+                        AsyncStrategy::PushdownMask => {
+                            let row_filter = row_filter_for_focus_case(
+                                filter_type,
+                                pred_mask,
+                                q6_int64_pred_mask,
+                                q6_utf8_pred_mask,
+                                q41_int64_pred_mask,
+                                q41_ts_pred_mask,
+                                q40_float64_pred_mask,
+                            );
+                            benchmark_async_reader_with_policy(
+                                reader,
+                                projection_mask,
+                                row_filter,
+                                RowSelectionPolicy::Mask,
+                            )
+                            .await
+                        }
+                    }
+                })
+            });
+        });
+    }
+}
+
+fn output_projection_for(filter_type: FilterType, projection_case: &ProjectionCase) -> Vec<usize> {
+    let filter_columns = filter_type.filter_projection();
+    match projection_case {
+        ProjectionCase::AllColumns | ProjectionCase::ExcludeFilterColumn => COLUMN_NAMES
+            .iter()
+            .enumerate()
+            .map(|(idx, _)| idx)
+            .filter(move |idx| {
+                matches!(projection_case, ProjectionCase::AllColumns)
+                    || !filter_columns.contains(idx)
+            })
+            .collect(),
+        ProjectionCase::FilterColumnsOnly => filter_columns.to_vec(),
+        ProjectionCase::CountOnly => vec![],
+        ProjectionCase::FixedColumns => vec![0, 1, 3],
+        ProjectionCase::Float64AndTs => vec![1, 3],
+        ProjectionCase::Float64Only => vec![1],
+        ProjectionCase::Int64AndFloat64 => vec![0, 1],
+        ProjectionCase::Int64AndUtf8 => vec![0, 2],
+        ProjectionCase::TsAndUtf8 => vec![2, 3],
+        ProjectionCase::Utf8Only => vec![2],
+    }
+}
+
+fn full_post_filter_read_projection(
+    filter_type: FilterType,
+    output_projection: &[usize],
+) -> Vec<usize> {
+    let mut read_projection = output_projection.to_vec();
+    for filter_idx in filter_type.filter_projection() {
+        if !read_projection.contains(filter_idx) {
+            read_projection.push(*filter_idx);
+        }
+    }
+    read_projection.sort_unstable();
+    read_projection
+}
+
+fn projection_names(projection: &[usize]) -> Vec<&'static str> {
+    projection.iter().map(|idx| COLUMN_NAMES[*idx]).collect()
+}
+
+fn row_filter_for(filter_type: FilterType, pred_mask: ProjectionMask) -> RowFilter {
+    let filter = ArrowPredicateFn::new(pred_mask, move |batch| filter_type.filter_batch(&batch));
+    RowFilter::new(vec![Box::new(filter)])
+}
+
+fn row_filter_for_focus_case(
+    filter_type: FilterType,
+    pred_mask: ProjectionMask,
+    q6_int64_pred_mask: ProjectionMask,
+    q6_utf8_pred_mask: ProjectionMask,
+    q41_int64_pred_mask: ProjectionMask,
+    q41_ts_pred_mask: ProjectionMask,
+    q40_float64_pred_mask: ProjectionMask,
+) -> RowFilter {
+    match filter_type {
+        FilterType::ClickBenchQ6MixedPredicates | FilterType::ClickBenchQ6VarWidthFirst => {
+            let int64_filter =
+                ArrowPredicateFn::new(q6_int64_pred_mask, move |batch: RecordBatch| {
+                    let int64 = batch.column(batch.schema().index_of("int64")?);
+                    eq(int64, &Int64Array::new_scalar(9999))
+                });
+            let utf8_filter =
+                ArrowPredicateFn::new(q6_utf8_pred_mask, move |batch: RecordBatch| {
+                    let utf8 = batch.column(batch.schema().index_of("utf8View")?);
+                    neq(utf8, &StringViewArray::new_scalar(""))
+                });
+
+            match filter_type {
+                FilterType::ClickBenchQ6MixedPredicates => {
+                    RowFilter::new(vec![Box::new(int64_filter), Box::new(utf8_filter)])
+                }
+                FilterType::ClickBenchQ6VarWidthFirst => {
+                    RowFilter::new(vec![Box::new(utf8_filter), Box::new(int64_filter)])
+                }
+                _ => unreachable!(),
+            }
+        }
+        FilterType::ClickBenchQ40ScalarGroupBy => {
+            let int64_filter =
+                ArrowPredicateFn::new(q41_int64_pred_mask, move |batch: RecordBatch| {
+                    let int64 = batch.column(batch.schema().index_of("int64")?);
+                    eq(int64, &Int64Array::new_scalar(62))
+                });
+            let float64_filter =
+                ArrowPredicateFn::new(q40_float64_pred_mask, move |batch: RecordBatch| {
+                    let float64 = batch.column(batch.schema().index_of("float64")?);
+                    gt(float64, &Float64Array::new_scalar(10.0))
+                });
+            let ts_filter = ArrowPredicateFn::new(q41_ts_pred_mask, move |batch: RecordBatch| {
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                lt(ts, &TimestampMillisecondArray::new_scalar(9000))
+            });
+
+            RowFilter::new(vec![
+                Box::new(int64_filter),
+                Box::new(float64_filter),
+                Box::new(ts_filter),
+            ])
+        }
+        FilterType::ClickBenchQ41SparseFixedOutput
+        | FilterType::TpcdsQ20ProjectedDynamicFilters
+        | FilterType::TpcdsQ21ProjectedFixedOutput => {
+            let int64_filter =
+                ArrowPredicateFn::new(q41_int64_pred_mask, move |batch: RecordBatch| {
+                    let int64 = batch.column(batch.schema().index_of("int64")?);
+                    let scalar = match filter_type {
+                        FilterType::TpcdsQ20ProjectedDynamicFilters => 12,
+                        _ => 8,
+                    };
+                    lt(int64, &Int64Array::new_scalar(scalar))
+                });
+            let ts_filter = ArrowPredicateFn::new(q41_ts_pred_mask, move |batch: RecordBatch| {
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                lt(ts, &TimestampMillisecondArray::new_scalar(9000))
+            });
+
+            RowFilter::new(vec![Box::new(int64_filter), Box::new(ts_filter)])
+        }
+        _ => row_filter_for(filter_type, pred_mask),
+    }
+}
+
+#[derive(Clone, Copy)]
+enum NestedFilterType {
+    AlwaysTrueTag,
+    TagNotZero,
+}
+
+impl std::fmt::Display for NestedFilterType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::AlwaysTrueTag => write!(f, "always_true_tag"),
+            Self::TagNotZero => write!(f, "tag_not_zero"),
+        }
+    }
+}
+
+impl NestedFilterType {
+    fn filter_batch(self, batch: &RecordBatch) -> arrow::error::Result<BooleanArray> {
+        match self {
+            Self::AlwaysTrueTag => Ok(BooleanArray::from(vec![true; batch.num_rows()])),
+            Self::TagNotZero => {
+                let tag = batch.column(batch.schema().index_of("tag")?);
+                let scalar = StringViewArray::new_scalar("tag_0");
+                neq(tag, &scalar)
+            }
+        }
+    }
+}
+
+fn nested_row_filter_for(filter_type: NestedFilterType, pred_mask: ProjectionMask) -> RowFilter {
+    let filter = ArrowPredicateFn::new(pred_mask, move |batch| filter_type.filter_batch(&batch));
+    RowFilter::new(vec![Box::new(filter)])
+}
+
 /// Use async API
 async fn benchmark_async_reader(
     reader: InMemoryReader,
@@ -529,6 +1668,94 @@ async fn benchmark_async_reader(
     }
 }
 
+async fn benchmark_async_reader_with_policy(
+    reader: InMemoryReader,
+    projection_mask: ProjectionMask,
+    row_filter: RowFilter,
+    row_selection_policy: RowSelectionPolicy,
+) {
+    let mut stream = ParquetRecordBatchStreamBuilder::new(reader)
+        .await
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(projection_mask)
+        .with_row_filter(row_filter)
+        .with_row_selection_policy(row_selection_policy)
+        .build()
+        .unwrap();
+    while let Some(b) = stream.next().await {
+        b.unwrap(); // consume the batches, no buffering
+    }
+}
+
+async fn benchmark_async_reader_post_filter(
+    reader: InMemoryReader,
+    read_projection: ProjectionMask,
+    output_column_names: Vec<&'static str>,
+    filter_type: FilterType,
+) {
+    let mut stream = ParquetRecordBatchStreamBuilder::new(reader)
+        .await
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(read_projection)
+        .build()
+        .unwrap();
+
+    while let Some(b) = stream.next().await {
+        let batch = b.unwrap();
+        let filter = filter_type.filter_batch(&batch).unwrap();
+        let filtered = arrow_select::filter::filter_record_batch(&batch, &filter).unwrap();
+        let output_projection = output_column_names
+            .iter()
+            .map(|name| filtered.schema().index_of(name).unwrap())
+            .collect::<Vec<_>>();
+        let output = filtered.project(&output_projection).unwrap();
+        std::hint::black_box(output.num_rows());
+    }
+}
+
+async fn benchmark_async_reader_post_filter_nested(
+    reader: InMemoryReader,
+    read_projection: ProjectionMask,
+    output_column_names: &[&str],
+    filter_type: NestedFilterType,
+) {
+    let mut stream = ParquetRecordBatchStreamBuilder::new(reader)
+        .await
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(read_projection)
+        .build()
+        .unwrap();
+
+    while let Some(b) = stream.next().await {
+        let batch = b.unwrap();
+        let filter = filter_type.filter_batch(&batch).unwrap();
+        let filtered = arrow_select::filter::filter_record_batch(&batch, &filter).unwrap();
+        let output_projection = output_column_names
+            .iter()
+            .map(|name| filtered.schema().index_of(name).unwrap())
+            .collect::<Vec<_>>();
+        let output = filtered.project(&output_projection).unwrap();
+        std::hint::black_box(output.num_rows());
+    }
+}
+
+async fn benchmark_async_reader_projected(reader: InMemoryReader, projection_mask: ProjectionMask) {
+    let mut stream = ParquetRecordBatchStreamBuilder::new(reader)
+        .await
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(projection_mask)
+        .build()
+        .unwrap();
+    while let Some(b) = stream.next().await {
+        let batch = b.unwrap();
+        std::hint::black_box(batch.num_rows());
+    }
+}
+
 /// Like [`benchmark_async_reader`] but also threads `with_limit(limit)` into
 /// the stream builder. Used by the `LIMIT` benchmark below.
 async fn benchmark_async_reader_with_limit(
@@ -569,6 +1796,65 @@ fn benchmark_sync_reader(
     }
 }
 
+fn benchmark_sync_reader_with_policy(
+    reader: InMemoryReader,
+    projection_mask: ProjectionMask,
+    row_filter: RowFilter,
+    row_selection_policy: RowSelectionPolicy,
+) {
+    let stream = ParquetRecordBatchReaderBuilder::try_new(reader.into_inner())
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(projection_mask)
+        .with_row_filter(row_filter)
+        .with_row_selection_policy(row_selection_policy)
+        .build()
+        .unwrap();
+    for b in stream {
+        b.unwrap(); // consume the batches, no buffering
+    }
+}
+
+fn benchmark_sync_reader_post_filter(
+    reader: InMemoryReader,
+    read_projection: ProjectionMask,
+    output_column_names: Vec<&'static str>,
+    filter_type: FilterType,
+) {
+    let stream = ParquetRecordBatchReaderBuilder::try_new(reader.into_inner())
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(read_projection)
+        .build()
+        .unwrap();
+
+    for b in stream {
+        let batch = b.unwrap();
+        let filter = filter_type.filter_batch(&batch).unwrap();
+        let filtered = arrow_select::filter::filter_record_batch(&batch, &filter).unwrap();
+        let output_projection = output_column_names
+            .iter()
+            .map(|name| filtered.schema().index_of(name).unwrap())
+            .collect::<Vec<_>>();
+        let output = filtered.project(&output_projection).unwrap();
+        std::hint::black_box(output.num_rows());
+    }
+}
+
+fn benchmark_sync_reader_projected(reader: InMemoryReader, projection_mask: ProjectionMask) {
+    let stream = ParquetRecordBatchReaderBuilder::try_new(reader.into_inner())
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(projection_mask)
+        .build()
+        .unwrap();
+
+    for b in stream {
+        let batch = b.unwrap();
+        std::hint::black_box(batch.num_rows());
+    }
+}
+
 /// Adapter to read asynchronously from in memory bytes and always loads the
 /// metadata with page indexes.
 #[derive(Debug, Clone)]
@@ -636,7 +1922,6 @@ fn benchmark_filters_with_limit(c: &mut Criterion) {
         ProjectionCase::AllColumns,
         ProjectionCase::ExcludeFilterColumn,
     ];
-    let all_indices = vec![0, 1, 2, 3];
 
     let rt = tokio::runtime::Builder::new_multi_thread()
         .enable_all()
@@ -648,14 +1933,7 @@ fn benchmark_filters_with_limit(c: &mut Criterion) {
     for filter_type in filter_types {
         for proj_case in &projection_cases {
             let filter_col = filter_type.filter_projection().to_vec();
-            let output_projection: Vec<usize> = match proj_case {
-                ProjectionCase::AllColumns => all_indices.clone(),
-                ProjectionCase::ExcludeFilterColumn => all_indices
-                    .iter()
-                    .copied()
-                    .filter(|i| !filter_col.contains(i))
-                    .collect(),
-            };
+            let output_projection = output_projection_for(filter_type, proj_case);
 
             let reader = InMemoryReader::try_new(&parquet_file).unwrap();
             let metadata = Arc::clone(reader.metadata());
@@ -693,9 +1971,109 @@ fn benchmark_filters_with_limit(c: &mut Criterion) {
     }
 }
 
+/// Focused nested-output case for comparing manual post-filtering against
+/// row-filter pushdown policies.
+///
+/// The predicate column is an unprojected variable-width scalar column, and the
+/// output is a whole nested `Struct` root. This isolates the reader case enabled
+/// by root-aware post-filter projection without requiring recursive nested-child
+/// projection.
+fn benchmark_async_nested_post_filter_focus(c: &mut Criterion) {
+    let parquet_file = Bytes::from(write_nested_parquet_file_with_rows(
+        TOTAL_ROWS,
+        ROW_GROUP_SIZE,
+    ));
+    let strategies = [
+        AsyncStrategy::FullPostFilter,
+        AsyncStrategy::PushdownAuto,
+        AsyncStrategy::PushdownMask,
+        AsyncStrategy::PushdownSelectors,
+    ];
+
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()
+        .unwrap();
+
+    let mut group = c.benchmark_group("arrow_reader_row_filter_async_nested_post_filter_focus");
+    let reader = InMemoryReader::try_new(&parquet_file).unwrap();
+    let metadata = Arc::clone(reader.metadata());
+    let schema_descr = metadata.file_metadata().schema_descr();
+    let output_projection = ProjectionMask::columns(schema_descr, ["payload"]);
+    let read_projection = ProjectionMask::columns(schema_descr, ["tag", "payload"]);
+    let pred_mask = ProjectionMask::columns(schema_descr, ["tag"]);
+    let filter_cases = [
+        NestedFilterType::AlwaysTrueTag,
+        NestedFilterType::TagNotZero,
+    ];
+
+    for filter_case in filter_cases {
+        for strategy in strategies {
+            let bench_id = BenchmarkId::new(
+                format!("whole_struct_output/{filter_case}"),
+                strategy.to_string(),
+            );
+            let rt_captured = rt.handle().clone();
+            group.bench_function(bench_id, |b| {
+                b.iter(|| {
+                    let reader = reader.clone();
+                    let pred_mask = pred_mask.clone();
+                    let output_projection = output_projection.clone();
+                    let read_projection = read_projection.clone();
+                    rt_captured.block_on(async {
+                        match strategy {
+                            AsyncStrategy::FullPostFilter => {
+                                benchmark_async_reader_post_filter_nested(
+                                    reader,
+                                    read_projection,
+                                    &["payload"],
+                                    filter_case,
+                                )
+                                .await
+                            }
+                            AsyncStrategy::PushdownAuto => {
+                                benchmark_async_reader_with_policy(
+                                    reader,
+                                    output_projection,
+                                    nested_row_filter_for(filter_case, pred_mask),
+                                    RowSelectionPolicy::default(),
+                                )
+                                .await
+                            }
+                            AsyncStrategy::PushdownSelectors => {
+                                benchmark_async_reader_with_policy(
+                                    reader,
+                                    output_projection,
+                                    nested_row_filter_for(filter_case, pred_mask),
+                                    RowSelectionPolicy::Selectors,
+                                )
+                                .await
+                            }
+                            AsyncStrategy::PushdownMask => {
+                                benchmark_async_reader_with_policy(
+                                    reader,
+                                    output_projection,
+                                    nested_row_filter_for(filter_case, pred_mask),
+                                    RowSelectionPolicy::Mask,
+                                )
+                                .await
+                            }
+                        }
+                    })
+                });
+            });
+        }
+    }
+}
+
 criterion_group!(
     benches,
     benchmark_filters_and_projections,
+    benchmark_sync_strategy_matrix,
+    benchmark_async_strategy_matrix,
+    benchmark_async_auto_policy_focus,
+    benchmark_projection_scan_focus,
     benchmark_filters_with_limit,
+    benchmark_async_nested_post_filter_focus,
 );
 criterion_main!(benches);
diff --git a/parquet/benches/row_selection_cursor.rs b/parquet/benches/row_selection_cursor.rs
index 49c9e6d68acf..b5d73eefb6b6 100644
--- a/parquet/benches/row_selection_cursor.rs
+++ b/parquet/benches/row_selection_cursor.rs
@@ -34,6 +34,7 @@ const TOTAL_ROWS: usize = 1 << 20;
 const BATCH_SIZE: usize = 1 << 10;
 const BASE_SEED: u64 = 0xA55AA55A;
 const AVG_SELECTOR_LENGTHS: &[usize] = &[4, 8, 12, 16, 20, 24, 28, 32, 36, 40];
+const SHAPE_FOCUS_SELECTED_RUN_LENGTHS: &[usize] = &[1, 2, 4, 8, 32];
 const COLUMN_WIDTHS: &[usize] = &[2, 4, 8, 16, 32];
 const UTF8VIEW_LENS: &[usize] = &[4, 8, 16, 32, 64, 128, 256];
 const BENCH_MODES: &[BenchMode] = &[BenchMode::ReadSelector, BenchMode::ReadMask];
@@ -203,6 +204,87 @@ fn criterion_benchmark(c: &mut Criterion) {
             BASE_SEED ^ ((offset as u64) << 40),
         );
     }
+
+    bench_shape_focus(c);
+}
+
+fn bench_shape_focus(c: &mut Criterion) {
+    let scenarios = [
+        ShapeFocusScenario {
+            name: "sparse10",
+            select_ratio: 0.1,
+            start_with_select: false,
+        },
+        ShapeFocusScenario {
+            name: "sparse20",
+            select_ratio: 0.2,
+            start_with_select: false,
+        },
+        ShapeFocusScenario {
+            name: "moderate40",
+            select_ratio: 0.4,
+            start_with_select: false,
+        },
+        ShapeFocusScenario {
+            name: "dense80",
+            select_ratio: 0.8,
+            start_with_select: true,
+        },
+    ];
+
+    let profiles = [
+        DataProfile {
+            name: "int32",
+            build_batch: build_int32_batch,
+        },
+        DataProfile {
+            name: "utf8view",
+            build_batch: build_utf8view_batch,
+        },
+    ];
+
+    for profile in profiles {
+        let parquet_data = build_parquet_data(TOTAL_ROWS, profile.build_batch);
+        for scenario in &scenarios {
+            for &selected_run_len in SHAPE_FOCUS_SELECTED_RUN_LENGTHS {
+                let selectors =
+                    generate_shape_focus_selectors(selected_run_len, TOTAL_ROWS, scenario);
+                if selectors.is_empty() {
+                    continue;
+                }
+
+                let stats = SelectorStats::new(&selectors);
+                let selection = RowSelection::from(selectors);
+                let suffix = format!(
+                    "shape-focus-{}-{}-run{:02}-avg{:.1}-sel{:02}",
+                    scenario.name,
+                    profile.name,
+                    selected_run_len,
+                    stats.average_selector_len,
+                    (stats.select_ratio * 100.0).round() as u32
+                );
+
+                let bench_input = BenchInput {
+                    parquet_data: parquet_data.clone(),
+                    selection,
+                };
+
+                for &mode in BENCH_MODES {
+                    c.bench_with_input(
+                        BenchmarkId::new(mode.label(), &suffix),
+                        &bench_input,
+                        |b, input| {
+                            b.iter(|| {
+                                let total =
+                                    run_read(&input.parquet_data, &input.selection, mode.policy());
+                                hint::black_box(total);
+                            });
+                        },
+                    );
+                }
+            }
+        }
+    }
 }
 
 fn bench_over_lengths(
@@ -349,6 +431,12 @@ struct Scenario {
     distribution: RunDistribution,
 }
 
+struct ShapeFocusScenario {
+    name: &'static str,
+    select_ratio: f64,
+    start_with_select: bool,
+}
+
 #[derive(Clone)]
 enum RunDistribution {
     Constant,
@@ -409,6 +497,66 @@ fn generate_selectors(
     selection.into()
 }
 
+fn generate_shape_focus_selectors(
+    selected_run_len: usize,
+    total_rows: usize,
+    scenario: &ShapeFocusScenario,
+) -> Vec<RowSelector> {
+    const CYCLE_ROWS: usize = 1_000;
+
+    assert!(selected_run_len > 0);
+    assert!(
+        (0.0..=1.0).contains(&scenario.select_ratio),
+        "select_ratio must be in [0, 1]"
+    );
+
+    let mut selectors = Vec::new();
+    let mut remaining_rows = total_rows;
+
+    while remaining_rows > 0 {
+        let cycle_rows = CYCLE_ROWS.min(remaining_rows);
+        let selected_rows = (cycle_rows as f64 * scenario.select_ratio).round() as usize;
+        if selected_rows == 0 {
+            selectors.push(RowSelector::skip(cycle_rows));
+            remaining_rows -= cycle_rows;
+            continue;
+        }
+        if selected_rows >= cycle_rows {
+            selectors.push(RowSelector::select(cycle_rows));
+            remaining_rows -= cycle_rows;
+            continue;
+        }
+
+        let selected_runs = selected_rows.div_ceil(selected_run_len);
+        let skipped_rows = cycle_rows - selected_rows;
+        if skipped_rows < selected_runs {
+            return Vec::new();
+        }
+
+        let base_skip_len = skipped_rows / selected_runs;
+        let extra_skip_runs = skipped_rows % selected_runs;
+        let mut remaining_selected_rows = selected_rows;
+
+        for run_idx in 0..selected_runs {
+            let skip_len = base_skip_len + usize::from(run_idx < extra_skip_runs);
+            let select_len = selected_run_len.min(remaining_selected_rows);
+            if scenario.start_with_select {
+                selectors.push(RowSelector::select(select_len));
+                selectors.push(RowSelector::skip(skip_len));
+            } else {
+                selectors.push(RowSelector::skip(skip_len));
+                selectors.push(RowSelector::select(select_len));
+            }
+            remaining_selected_rows -= select_len;
+        }
+
+        remaining_rows -= cycle_rows;
+    }
+
+    let selection: RowSelection = selectors.into();
+    selection.into()
+}
+
 fn sample_length(mean: f64, distribution: &RunDistribution, rng: &mut StdRng) -> usize {
     match distribution {
         RunDistribution::Constant => mean.round().max(1.0) as usize,

From f11b48e99e234354b292471d20a87ce55d5495b0 Mon Sep 17 00:00:00 2001
From: Qiwei Huang <qiwei.huang@jsessh.com>
Date: Fri, 12 Jun 2026 18:54:50 +0800
Subject: [PATCH 02/14] ci: install cargo-msrv with locked dependencies

---
 .github/workflows/rust.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 77fccdbebc46..964b16b4fc9d 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -118,7 +118,7 @@ jobs:
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv (if needed)
         # cargo-msrv binary may be cached by the cargo cache step in setup-builder, and cargo install will error if it is already installed
-        run: if which cargo-msrv ; then echo "using existing cargo-msrv binary" ; else cargo install cargo-msrv ; fi
+        run: if which cargo-msrv ; then echo "using existing cargo-msrv binary" ; else cargo install cargo-msrv --locked ; fi
       - name: Check all packages
         run: |
           # run `cargo msrv verify --manifest-path "path/to/Cargo.toml"` to see problematic dependencies

From c8a627ced95613be91182850f1314ad38701ea1b Mon Sep 17 00:00:00 2001
From: Qiwei Huang <qiwei.huang@jsessh.com>
Date: Fri, 12 Jun 2026 19:00:51 +0800
Subject: [PATCH 03/14] Revert "ci: install cargo-msrv with locked
 dependencies"

This reverts commit f11b48e99e234354b292471d20a87ce55d5495b0.
---
 .github/workflows/rust.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 964b16b4fc9d..77fccdbebc46 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -118,7 +118,7 @@ jobs:
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv (if needed)
         # cargo-msrv binary may be cached by the cargo cache step in setup-builder, and cargo install will error if it is already installed
-        run: if which cargo-msrv ; then echo "using existing cargo-msrv binary" ; else cargo install cargo-msrv --locked ; fi
+        run: if which cargo-msrv ; then echo "using existing cargo-msrv binary" ; else cargo install cargo-msrv ; fi
       - name: Check all packages
         run: |
           # run `cargo msrv verify --manifest-path "path/to/Cargo.toml"` to see problematic dependencies

From faa058f146e09f0932808bd4e400413f4a88012a Mon Sep 17 00:00:00 2001
From: Qiwei Huang <qiwei.huang@jsessh.com>
Date: Fri, 12 Jun 2026 19:46:35 +0800
Subject: [PATCH 04/14] ci: install cargo-msrv with locked dependencies

---
 .github/workflows/rust.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 77fccdbebc46..964b16b4fc9d 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -118,7 +118,7 @@ jobs:
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv (if needed)
         # cargo-msrv binary may be cached by the cargo cache step in setup-builder, and cargo install will error if it is already installed
-        run: if which cargo-msrv ; then echo "using existing cargo-msrv binary" ; else cargo install cargo-msrv ; fi
+        run: if which cargo-msrv ; then echo "using existing cargo-msrv binary" ; else cargo install cargo-msrv --locked ; fi
       - name: Check all packages
         run: |
           # run `cargo msrv verify --manifest-path "path/to/Cargo.toml"` to see problematic dependencies

From d95eddffc9f05fa2e03782767da3f3cf2fa680f9 Mon Sep 17 00:00:00 2001
From: Qiwei Huang <qiwei.huang@jsessh.com>
Date: Fri, 12 Jun 2026 21:29:33 +0800
Subject: [PATCH 05/14] Revert "ci: install cargo-msrv with locked
 dependencies"

This reverts commit faa058f146e09f0932808bd4e400413f4a88012a.
---
 .github/workflows/rust.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 964b16b4fc9d..77fccdbebc46 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -118,7 +118,7 @@ jobs:
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv (if needed)
         # cargo-msrv binary may be cached by the cargo cache step in setup-builder, and cargo install will error if it is already installed
-        run: if which cargo-msrv ; then echo "using existing cargo-msrv binary" ; else cargo install cargo-msrv --locked ; fi
+        run: if which cargo-msrv ; then echo "using existing cargo-msrv binary" ; else cargo install cargo-msrv ; fi
       - name: Check all packages
         run: |
           # run `cargo msrv verify --manifest-path "path/to/Cargo.toml"` to see problematic dependencies

From 40727a52c64517d2831a2e4d992c4531dfc4c9ca Mon Sep 17 00:00:00 2001
From: Qiwei Huang <qiwei_huang@qq.com>
Date: Sat, 13 Jun 2026 00:26:24 +0800
Subject: [PATCH 06/14] Expose parquet benchmark helpers and streamline
 post-filtering

---
 parquet/benches/arrow_reader_row_filter.rs | 78 ++++++++++++++--------
 1 file changed, 50 insertions(+), 28 deletions(-)

diff --git a/parquet/benches/arrow_reader_row_filter.rs b/parquet/benches/arrow_reader_row_filter.rs
index 6ae7c816e56b..57ce949dcb2d 100644
--- a/parquet/benches/arrow_reader_row_filter.rs
+++ b/parquet/benches/arrow_reader_row_filter.rs
@@ -55,7 +55,7 @@
 use arrow::array::{
     ArrayRef, BooleanArray, Float64Array, Int64Array, StructArray, TimestampMillisecondArray,
 };
-use arrow::compute::kernels::cmp::{eq, gt, lt, neq};
+use arrow::compute::kernels::cmp::{eq, gt, lt, lt_eq, neq};
 use arrow::compute::{and, or};
 use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
 use arrow::record_batch::RecordBatch;
@@ -115,6 +115,14 @@ fn create_float64_array(size: usize) -> ArrayRef {
     Arc::new(Float64Array::from(values)) as ArrayRef
 }
 
+fn append_utf8_view_value(builder: &mut StringViewBuilder, value: &str) {
+    if builder.len() % 1_000 == 0 {
+        builder.append_value(UTF8_VIEW_MISSING_VALUE);
+    } else {
+        builder.append_value(value);
+    }
+}
+
 /// Creates a utf8View array of a given size with random strings.
 ///
 /// This is modeled after the "SearchPhrase" column in the ClickBench benchmark.
@@ -146,11 +154,11 @@ fn create_utf8_view_array(size: usize) -> ArrayRef {
         let choice = rng.random_range(0..100);
         if choice < EMPTY_DENSITY {
             for _ in 0..run_length {
-                builder.append_value("");
+                append_utf8_view_value(&mut builder, "");
             }
         } else {
             for _ in 0..run_length {
-                builder.append_value(random_string(&mut rng));
+                append_utf8_view_value(&mut builder, &random_string(&mut rng));
             }
         }
     }
@@ -165,7 +173,7 @@ fn create_ts_array(size: usize) -> ArrayRef {
 }
 
 /// Creates a RecordBatch with 100K rows and 4 columns: int64, float64, utf8View, and ts.
-fn create_record_batch(size: usize) -> RecordBatch {
+pub(crate) fn create_record_batch(size: usize) -> RecordBatch {
     let fields = vec![
         Field::new("int64", DataType::Int64, false),
         Field::new("float64", DataType::Float64, false),
@@ -327,7 +335,7 @@ impl std::fmt::Display for AsyncStrategy {
 /// FilterType encapsulates the different filter comparisons.
 /// The variants correspond to the different filter patterns.
 #[derive(Clone, Copy, Debug)]
-enum FilterType {
+pub(crate) enum FilterType {
     /// "Point Lookup": selects a single row
     /// ```text
     /// ┌───────────────┐    ┌───────────────┐
@@ -562,7 +570,7 @@ impl std::fmt::Display for FilterType {
 
 impl FilterType {
     /// Applies the specified filter on the given RecordBatch and returns a BooleanArray mask.
-    fn filter_batch(&self, batch: &RecordBatch) -> arrow::error::Result<BooleanArray> {
+    pub(crate) fn filter_batch(&self, batch: &RecordBatch) -> arrow::error::Result<BooleanArray> {
         match self {
             // Point Lookup on int64 column
             FilterType::PointLookup => {
@@ -590,7 +598,7 @@ impl FilterType {
             // Unselective Unclustered on float64 column: NOT (float64 > 99.0)
             FilterType::UnselectiveUnclustered => {
                 let array = batch.column(batch.schema().index_of("float64")?);
-                gt(array, &Float64Array::new_scalar(99.0))
+                lt_eq(array, &Float64Array::new_scalar(99.0))
             }
             // Unselective Clustered on ts column: ts < 9000
             FilterType::UnselectiveClustered => {
@@ -1534,6 +1542,32 @@ fn projection_names(projection: &[usize]) -> Vec<&'static str> {
     projection.iter().map(|idx| COLUMN_NAMES[*idx]).collect()
 }
 
+pub(crate) fn filter_projected_record_batch(
+    batch: &RecordBatch,
+    filter: &BooleanArray,
+    output_column_names: &[&str],
+) -> arrow::error::Result<RecordBatch> {
+    let output_projection = output_column_names
+        .iter()
+        .map(|name| batch.schema().index_of(name))
+        .collect::<arrow::error::Result<Vec<_>>>()?;
+    let output = batch.project(&output_projection)?;
+    arrow_select::filter::filter_record_batch(&output, filter)
+}
+
+pub(crate) fn post_filter_projected_num_rows(
+    batch: &RecordBatch,
+    filter: &BooleanArray,
+    output_column_names: &[&str],
+) -> arrow::error::Result<usize> {
+    if output_column_names.is_empty() {
+        return Ok(filter.true_count());
+    }
+
+    let output = filter_projected_record_batch(batch, filter, output_column_names)?;
+    Ok(output.num_rows())
+}
+
 fn row_filter_for(filter_type: FilterType, pred_mask: ProjectionMask) -> RowFilter {
     let filter = ArrowPredicateFn::new(pred_mask, move |batch| filter_type.filter_batch(&batch));
     RowFilter::new(vec![Box::new(filter)])
@@ -1705,13 +1739,9 @@ async fn benchmark_async_reader_post_filter(
     while let Some(b) = stream.next().await {
         let batch = b.unwrap();
         let filter = filter_type.filter_batch(&batch).unwrap();
-        let filtered = arrow_select::filter::filter_record_batch(&batch, &filter).unwrap();
-        let output_projection = output_column_names
-            .iter()
-            .map(|name| filtered.schema().index_of(name).unwrap())
-            .collect::<Vec<_>>();
-        let output = filtered.project(&output_projection).unwrap();
-        std::hint::black_box(output.num_rows());
+        let output_rows =
+            post_filter_projected_num_rows(&batch, &filter, &output_column_names).unwrap();
+        std::hint::black_box(output_rows);
     }
 }
 
@@ -1732,13 +1762,9 @@ async fn benchmark_async_reader_post_filter_nested(
     while let Some(b) = stream.next().await {
         let batch = b.unwrap();
         let filter = filter_type.filter_batch(&batch).unwrap();
-        let filtered = arrow_select::filter::filter_record_batch(&batch, &filter).unwrap();
-        let output_projection = output_column_names
-            .iter()
-            .map(|name| filtered.schema().index_of(name).unwrap())
-            .collect::<Vec<_>>();
-        let output = filtered.project(&output_projection).unwrap();
-        std::hint::black_box(output.num_rows());
+        let output_rows =
+            post_filter_projected_num_rows(&batch, &filter, output_column_names).unwrap();
+        std::hint::black_box(output_rows);
     }
 }
 
@@ -1831,13 +1857,9 @@ fn benchmark_sync_reader_post_filter(
     for b in stream {
         let batch = b.unwrap();
         let filter = filter_type.filter_batch(&batch).unwrap();
-        let filtered = arrow_select::filter::filter_record_batch(&batch, &filter).unwrap();
-        let output_projection = output_column_names
-            .iter()
-            .map(|name| filtered.schema().index_of(name).unwrap())
-            .collect::<Vec<_>>();
-        let output = filtered.project(&output_projection).unwrap();
-        std::hint::black_box(output.num_rows());
+        let output_rows =
+            post_filter_projected_num_rows(&batch, &filter, &output_column_names).unwrap();
+        std::hint::black_box(output_rows);
     }
 }
 

From a5c636bf07faad3686f84fde5194481b7c8b08ea Mon Sep 17 00:00:00 2001
From: Qiwei Huang <qiwei_huang@qq.com>
Date: Sat, 13 Jun 2026 00:54:46 +0800
Subject: [PATCH 07/14] Refactor parquet row selection shape-focus benchmarks

---
 parquet/benches/row_selection_cursor.rs | 102 ++++++++++++++----------
 1 file changed, 62 insertions(+), 40 deletions(-)

diff --git a/parquet/benches/row_selection_cursor.rs b/parquet/benches/row_selection_cursor.rs
index b5d73eefb6b6..8b5b13cdeea5 100644
--- a/parquet/benches/row_selection_cursor.rs
+++ b/parquet/benches/row_selection_cursor.rs
@@ -35,9 +35,36 @@ const BATCH_SIZE: usize = 1 << 10;
 const BASE_SEED: u64 = 0xA55AA55A;
 const AVG_SELECTOR_LENGTHS: &[usize] = &[4, 8, 12, 16, 20, 24, 28, 32, 36, 40];
 const SHAPE_FOCUS_SELECTED_RUN_LENGTHS: &[usize] = &[1, 2, 4, 8, 32];
+const DENSE_SHAPE_FOCUS_SELECTED_RUN_LENGTHS: &[usize] = &[4, 8, 32];
 const COLUMN_WIDTHS: &[usize] = &[2, 4, 8, 16, 32];
 const UTF8VIEW_LENS: &[usize] = &[4, 8, 16, 32, 64, 128, 256];
 const BENCH_MODES: &[BenchMode] = &[BenchMode::ReadSelector, BenchMode::ReadMask];
+const SHAPE_FOCUS_SCENARIOS: &[ShapeFocusScenario] = &[
+    ShapeFocusScenario {
+        name: "sparse10",
+        select_ratio: 0.1,
+        start_with_select: false,
+        selected_run_lengths: SHAPE_FOCUS_SELECTED_RUN_LENGTHS,
+    },
+    ShapeFocusScenario {
+        name: "sparse20",
+        select_ratio: 0.2,
+        start_with_select: false,
+        selected_run_lengths: SHAPE_FOCUS_SELECTED_RUN_LENGTHS,
+    },
+    ShapeFocusScenario {
+        name: "moderate40",
+        select_ratio: 0.4,
+        start_with_select: false,
+        selected_run_lengths: SHAPE_FOCUS_SELECTED_RUN_LENGTHS,
+    },
+    ShapeFocusScenario {
+        name: "dense80",
+        select_ratio: 0.8,
+        start_with_select: true,
+        selected_run_lengths: DENSE_SHAPE_FOCUS_SELECTED_RUN_LENGTHS,
+    },
+];
 
 struct DataProfile {
     name: &'static str,
@@ -209,29 +236,6 @@ fn criterion_benchmark(c: &mut Criterion) {
 }
 
 fn bench_shape_focus(c: &mut Criterion) {
-    let scenarios = [
-        ShapeFocusScenario {
-            name: "sparse10",
-            select_ratio: 0.1,
-            start_with_select: false,
-        },
-        ShapeFocusScenario {
-            name: "sparse20",
-            select_ratio: 0.2,
-            start_with_select: false,
-        },
-        ShapeFocusScenario {
-            name: "moderate40",
-            select_ratio: 0.4,
-            start_with_select: false,
-        },
-        ShapeFocusScenario {
-            name: "dense80",
-            select_ratio: 0.8,
-            start_with_select: true,
-        },
-    ];
-
     let profiles = [
         DataProfile {
             name: "int32",
@@ -245,25 +249,21 @@ fn bench_shape_focus(c: &mut Criterion) {
 
     for profile in profiles {
         let parquet_data = build_parquet_data(TOTAL_ROWS, profile.build_batch);
-        for scenario in &scenarios {
-            for &selected_run_len in SHAPE_FOCUS_SELECTED_RUN_LENGTHS {
+        for scenario in shape_focus_scenarios() {
+            for &selected_run_len in scenario.selected_run_lengths {
                 let selectors =
                     generate_shape_focus_selectors(selected_run_len, TOTAL_ROWS, scenario);
-                if selectors.is_empty() {
-                    continue;
-                }
-
-                let stats = SelectorStats::new(&selectors);
-                let selection = RowSelection::from(selectors);
-                let suffix = format!(
-                    "shape-focus-{}-{}-run{:02}-avg{:.1}-sel{:02}",
+                assert!(
+                    !selectors.is_empty(),
+                    "invalid shape focus case {} maxrun {}",
                     scenario.name,
-                    profile.name,
-                    selected_run_len,
-                    stats.average_selector_len,
-                    (stats.select_ratio * 100.0).round() as u32
+                    selected_run_len
                 );
 
+                let suffix =
+                    shape_focus_suffix(scenario, profile.name, selected_run_len, &selectors);
+                let selection = RowSelection::from(selectors);
+
                 let bench_input = BenchInput {
                     parquet_data: parquet_data.clone(),
                     selection,
@@ -431,10 +431,11 @@ struct Scenario {
     distribution: RunDistribution,
 }
 
-struct ShapeFocusScenario {
-    name: &'static str,
+pub(crate) struct ShapeFocusScenario {
+    pub(crate) name: &'static str,
     select_ratio: f64,
     start_with_select: bool,
+    pub(crate) selected_run_lengths: &'static [usize],
 }
 
 #[derive(Clone)]
@@ -497,7 +498,11 @@ fn generate_selectors(
     selection.into()
 }
 
-fn generate_shape_focus_selectors(
+pub(crate) fn shape_focus_scenarios() -> &'static [ShapeFocusScenario] {
+    SHAPE_FOCUS_SCENARIOS
+}
+
+pub(crate) fn generate_shape_focus_selectors(
     selected_run_len: usize,
     total_rows: usize,
     scenario: &ShapeFocusScenario,
@@ -557,6 +562,23 @@ fn generate_shape_focus_selectors(
     selection.into()
 }
 
+pub(crate) fn shape_focus_suffix(
+    scenario: &ShapeFocusScenario,
+    profile_name: &str,
+    selected_run_len: usize,
+    selectors: &[RowSelector],
+) -> String {
+    let stats = SelectorStats::new(selectors);
+    format!(
+        "shape-focus-{}-{}-maxrun{:02}-avg{:.1}-sel{:02}",
+        scenario.name,
+        profile_name,
+        selected_run_len,
+        stats.average_selector_len,
+        (stats.select_ratio * 100.0).round() as u32
+    )
+}
+
 fn sample_length(mean: f64, distribution: &RunDistribution, rng: &mut StdRng) -> usize {
     match distribution {
         RunDistribution::Constant => mean.round().max(1.0) as usize,

From fe69b217559040e1edc631ffebaa5ae0a0f45076 Mon Sep 17 00:00:00 2001
From: Qiwei Huang <qiwei_huang@qq.com>
Date: Mon, 15 Jun 2026 00:48:19 +0800
Subject: [PATCH 08/14] Rename parquet row-filter benchmark cases for clarity

---
 parquet/benches/arrow_reader_row_filter.rs | 801 +++++++++++++--------
 parquet/benches/row_selection_cursor.rs    |  17 +
 2 files changed, 535 insertions(+), 283 deletions(-)

diff --git a/parquet/benches/arrow_reader_row_filter.rs b/parquet/benches/arrow_reader_row_filter.rs
index 57ce949dcb2d..cb26443fa3b0 100644
--- a/parquet/benches/arrow_reader_row_filter.rs
+++ b/parquet/benches/arrow_reader_row_filter.rs
@@ -35,22 +35,27 @@
 //!
 //! [Efficient Filter Pushdown in Parquet]: https://datafusion.apache.org/blog/2025/03/21/parquet-pushdown/
 //!
-//! The benchmark creates an in-memory Parquet file with 100K rows and ten columns.
-//! The first four columns are:
-//!   - int64: random integers (range: 0..100) generated with a fixed seed.
-//!   - float64: random floating-point values (range: 0.0..100.0) generated with a fixed seed.
-//!   - utf8View: random strings with some empty values and occasional constant "const" values.
-//!   - ts: sequential timestamps in milliseconds.
-//!
-//! The following six columns (for filtering) are generated to mimic different
-//! filter selectivity and clustering patterns:
-//!   - pt: for Point Lookup – exactly one row is set to "unique_point", all others are random strings.
-//!   - sel: for Selective Unclustered – exactly 1% of rows (those with i % 100 == 0) are "selected".
-//!   - mod_clustered: for Moderately Selective Clustered – in each 10K-row block, the first 10 rows are "mod_clustered".
-//!   - mod_unclustered: for Moderately Selective Unclustered – exactly 10% of rows (those with i % 10 == 1) are "mod_unclustered".
-//!   - unsel_unclustered: for Unselective Unclustered – exactly 99% of rows (those with i % 100 != 0) are "unsel_unclustered".
-//!   - unsel_clustered: for Unselective Clustered – in each 10K-row block, rows with an offset >= 1000 are "unsel_clustered".
+//! The benchmark creates an in-memory Parquet file with 500K rows and four root
+//! columns:
+//! - `int64`: random integers with an injected point-lookup value.
+//! - `float64`: random floating-point values used for sparse and dense filters.
+//! - `utf8View`: ClickBench-like string values with sparse sentinel values.
+//! - `ts`: sequential timestamps used for clustered filters.
 //!
+//! The benchmark groups cover a few distinct reader-level questions:
+//! - `arrow_reader_row_filter`: baseline filter/projection combinations.
+//! - `arrow_reader_row_filter_{async_,}strategy_matrix`: full post-filtering
+//!   versus row-filter pushdown with `Auto`, forced `Selectors`, and forced
+//!   `Mask`.
+//! - `arrow_reader_row_filter_async_auto_policy_focus`: focused synthetic shapes
+//!   derived from ClickBench and TPC-DS regressions, including sparse and dense
+//!   filters, clustered and fragmented selections, variable-width predicates,
+//!   projected predicate columns, count/filter-only outputs, and mixed predicate
+//!   order.
+//! - `arrow_reader_projection_scan_focus`: projection-only scans that do not
+//!   construct a `RowFilter`.
+//! - `arrow_reader_row_filter_async_nested_post_filter_focus`: nested root output
+//!   with a separate predicate column.
 
 use arrow::array::{
     ArrayRef, BooleanArray, Float64Array, Int64Array, StructArray, TimestampMillisecondArray,
@@ -441,10 +446,10 @@ pub(crate) enum FilterType {
     /// [ClickBench]: https://github.com/ClickHouse/ClickBench
     /// [Q21-Q27]: https://github.com/apache/datafusion/blob/b7177234e65cbbb2dcc04c252f6acd80bb026362/benchmarks/queries/clickbench/queries.sql#L22-L28
     Utf8ViewNonEmpty,
-    /// Sparse variable-width predicate shaped like TPC-DS Q83 dynamic
-    /// `i_item_id` filters, where the predicate column is also projected.
-    Utf8ViewMissing,
-    /// Scalar-only part of ClickBench Q37:
+
+    // Deferred-output shapes. Predicate columns are separate from the output,
+    // so rejected rows can skip output-column decoding.
+    /// Scalar-prefix shape derived from DataFusion ClickBench Q37:
     ///
     /// ```sql
     /// WHERE CounterID = 62
@@ -456,56 +461,263 @@ pub(crate) enum FilterType {
     ///
     /// DataFusion `Auto` does not push down the `Title <> ''` string predicate,
     /// but it can push down the scalar prefix to defer decoding `Title`.
-    /// This synthetic predicate keeps that reader-level shape: cheap scalar
-    /// filter columns protect an expensive `Utf8View` output column.
-    ClickBenchQ37ScalarPrefix,
-    /// Shape of ClickBench extended Q6 under DataFusion row-filter pushdown:
+    /// Fragmented ~0.9% selection: approx 4,500 selected rows in 500K.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    ScalarPrefixUtf8Output,
+    /// Sparse fragmented scalar predicates (~7%, approx 36,000 selected rows
+    /// in 500K) with a cheap fixed-width output projection, derived from a
+    /// ClickBench Q41-like shape.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    SparseScalarFixedOutput,
+    /// Scalar range predicate derived from TPC-DS Q9 `ss_quantity BETWEEN ...`
+    /// subqueries. The selected rows are random and moderately selective, and
+    /// benchmark projections cover both count-only and numeric aggregate cases.
+    /// Fragmented ~20% selection: approx 100,000 selected rows in 500K.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    QuantityRangePredicate,
+
+    // Multi-predicate shapes. These focus predicate ordering and predicate
+    // evaluation cost independently of projection cost.
+    /// Predicate-order shape derived from DataFusion ClickBench extended Q6:
     /// an early cheap fixed-width predicate can prune almost all rows before a
     /// later unprojected variable-width predicate is decoded.
-    ClickBenchQ6MixedPredicates,
-    /// Same scalar + variable-width predicate columns as [`Self::ClickBenchQ6MixedPredicates`],
-    /// but with the variable-width predicate evaluated first. This anchors the
-    /// static post-filter gate against predicate-order drift.
-    ClickBenchQ6VarWidthFirst,
-    /// Shape of ClickBench Q41-like fixed-width filters: sparse fragmented
-    /// scalar predicates with a cheap fixed-width output projection.
-    ClickBenchQ41SparseFixedOutput,
-    /// Shape of ClickBench Q40: multiple cheap scalar predicates, very small
-    /// output, and one projected predicate column used later by grouping.
-    ClickBenchQ40ScalarGroupBy,
-    /// Shape of TPC-DS Q41: a complex OR predicate over dictionary/string-like
-    /// and scalar columns where predicate evaluation dominates reader time.
-    TpcdsQ41ComplexOr,
-    /// Shape of TPC-DS Q20 catalog_sales after dynamic filters: multiple
-    /// fixed-width predicates where predicate columns are also projected.
-    TpcdsQ20ProjectedDynamicFilters,
+    /// Point-lookup prefix: at most 1 row reaches the variable-width predicate.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │      ...      │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │      ...      │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    FixedThenVarWidthPredicates,
+    /// Same scalar + variable-width predicate columns as
+    /// [`Self::FixedThenVarWidthPredicates`], but with the variable-width
+    /// predicate evaluated first. This anchors the static post-filter gate
+    /// against predicate-order drift.
+    /// At most 1 row survives the final point lookup.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │      ...      │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │      ...      │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    VarWidthThenFixedPredicates,
+    /// Multiple cheap scalar predicates, very small output, and projected
+    /// predicate columns used later by grouping. Derived from ClickBench Q40.
+    /// Fragmented ~0.8% selection: approx 4,000 selected rows in 500K.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    MultiScalarProjectedKey,
+    /// Complex OR predicate over dictionary/string-like and scalar columns
+    /// where predicate evaluation dominates reader time. Derived from TPC-DS
+    /// Q41.
+    /// Mixed string/scalar OR branches select approx 1% of rows.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │      ...      │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │      ...      │    │               │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    ComplexOrMixedPredicates,
+
+    // Projected-predicate shapes. At least one predicate column is also needed
+    // in the final projection.
+    /// Multiple fixed-width dynamic filters where predicate columns are also
+    /// projected. Derived from TPC-DS Q20 catalog_sales.
+    /// Fragmented ~11% selection: approx 54,000 selected rows in 500K.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    ProjectedDynamicFilters,
     /// Shape of TPC-DS Q21 after dynamic-filter pruning: sparse fragmented
     /// fixed-width predicates where the final projection still includes the
     /// predicate columns. This protects against choosing selectors for columns
     /// that were already decoded/cached by predicate evaluation.
-    TpcdsQ21ProjectedFixedOutput,
-    /// Shape of TPC-DS Q2 fact scans: the dynamic filter applies to the date
-    /// key, the same date key is projected, and an additional fixed-width sales
-    /// value can still be deferred by predicate pushdown.
-    TpcdsQ2ProjectedPredicate5Pct,
-    TpcdsQ2ProjectedPredicate8Pct,
-    TpcdsQ2ProjectedPredicate10Pct,
-    TpcdsQ2ProjectedPredicate20Pct,
-    TpcdsQ2ProjectedPredicate30Pct,
-    TpcdsQ2ProjectedPredicate40Pct,
-    TpcdsQ2ProjectedPredicate50Pct,
-    /// Scalar range predicate shaped like TPC-DS Q9 `ss_quantity BETWEEN ...`
-    /// subqueries. The selected rows are random and moderately selective, and
-    /// benchmark projections cover both count-only and numeric aggregate cases.
-    TpcdsQ9QuantityRange,
+    /// Fragmented ~7% selection: approx 36,000 selected rows in 500K.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    SparseProjectedPredicatesFixedOutput,
+    /// Projected-predicate shape derived from TPC-DS Q2 fact scans: the
+    /// dynamic filter applies to the date key, the same date key is projected,
+    /// and an additional fixed-width sales value can still be deferred by
+    /// predicate pushdown.
+    /// Selectivity ranges from 5% to 50%: approx 25K to 250K selected rows in
+    /// 500K.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    ProjectedPredicate5Pct,
+    ProjectedPredicate8Pct,
+    ProjectedPredicate10Pct,
+    ProjectedPredicate20Pct,
+    ProjectedPredicate30Pct,
+    ProjectedPredicate40Pct,
+    ProjectedPredicate50Pct,
     /// Exact shape for the projected-predicate moderate-selectivity gate:
     /// a clustered 20% timestamp predicate where the predicate column is
     /// projected and the deferred output is variable-width.
-    ProjectedTs8PctClustered,
-    ProjectedTs20PctClustered,
+    /// Clustered 8% or 20% selection: 40,000 or 100,000 selected rows in 500K.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │               │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    ClusteredTs8PctProjectedPredicate,
+    ClusteredTs20PctProjectedPredicate,
+    /// Sparse variable-width predicate shaped like TPC-DS Q83 dynamic
+    /// `i_item_id` filters, where the predicate column is also projected.
+    /// Sparse 0.1% selection: 500 sentinel rows in 500K, one every 1,000 rows.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    Utf8ViewMissing,
     /// Very sparse projected fixed-width scan shaped like TPC-DS fact-table
     /// filters where the predicate column is also needed in the output projection.
-    TpcdsSparseProjectedFactScan,
+    /// Sparse 0.1% selection: 500 rows in 500K, one timestamp match every
+    /// 1,000 rows.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    SparseProjectedFactScan,
 }
 
 impl std::fmt::Display for FilterType {
@@ -520,49 +732,45 @@ impl std::fmt::Display for FilterType {
             FilterType::Composite => "float64 > 99.0 AND ts >= 9000",
             FilterType::Utf8ViewNonEmpty => "utf8View <> ''",
             FilterType::Utf8ViewMissing => "utf8View == '<missing>'",
-            FilterType::ClickBenchQ37ScalarPrefix => "int64 == 62 AND ts < 9000",
-            FilterType::ClickBenchQ6MixedPredicates => "int64 == 9999 AND utf8View <> ''",
-            FilterType::ClickBenchQ6VarWidthFirst => "utf8View <> '' AND int64 == 9999",
-            FilterType::ClickBenchQ41SparseFixedOutput => "int64 < 8 AND ts < 9000",
-            FilterType::ClickBenchQ40ScalarGroupBy => {
-                "int64 == 62 AND float64 > 10.0 AND ts < 9000"
-            }
-            FilterType::TpcdsQ41ComplexOr => {
+            FilterType::ScalarPrefixUtf8Output => "int64 == 62 AND ts < 9000",
+            FilterType::FixedThenVarWidthPredicates => "int64 == 9999 AND utf8View <> ''",
+            FilterType::VarWidthThenFixedPredicates => "utf8View <> '' AND int64 == 9999",
+            FilterType::SparseScalarFixedOutput => "int64 < 8 AND ts < 9000",
+            FilterType::MultiScalarProjectedKey => "int64 == 62 AND float64 > 10.0 AND ts < 9000",
+            FilterType::ComplexOrMixedPredicates => {
                 "(utf8View <> '' AND int64 < 8) OR (ts < 100 AND float64 > 95.0)"
             }
-            FilterType::TpcdsQ20ProjectedDynamicFilters => {
+            FilterType::ProjectedDynamicFilters => {
                 "int64 < 12 AND ts < 9000 projected dynamic filters"
             }
-            FilterType::TpcdsQ21ProjectedFixedOutput => {
+            FilterType::SparseProjectedPredicatesFixedOutput => {
                 "int64 < 8 AND ts < 9000 projected predicates"
             }
-            FilterType::TpcdsQ2ProjectedPredicate10Pct => {
+            FilterType::ProjectedPredicate10Pct => {
                 "int64 < 10 projected predicate with fixed output"
             }
-            FilterType::TpcdsQ2ProjectedPredicate5Pct => {
-                "int64 < 5 projected predicate with fixed output"
-            }
-            FilterType::TpcdsQ2ProjectedPredicate8Pct => {
-                "int64 < 8 projected predicate with fixed output"
-            }
-            FilterType::TpcdsQ2ProjectedPredicate20Pct => {
+            FilterType::ProjectedPredicate5Pct => "int64 < 5 projected predicate with fixed output",
+            FilterType::ProjectedPredicate8Pct => "int64 < 8 projected predicate with fixed output",
+            FilterType::ProjectedPredicate20Pct => {
                 "int64 < 20 projected predicate with fixed output"
             }
-            FilterType::TpcdsQ2ProjectedPredicate30Pct => {
+            FilterType::ProjectedPredicate30Pct => {
                 "int64 < 30 projected predicate with fixed output"
             }
-            FilterType::TpcdsQ2ProjectedPredicate40Pct => {
+            FilterType::ProjectedPredicate40Pct => {
                 "int64 < 40 projected predicate with fixed output"
             }
-            FilterType::TpcdsQ2ProjectedPredicate50Pct => {
+            FilterType::ProjectedPredicate50Pct => {
                 "int64 < 50 projected predicate with fixed output"
             }
-            FilterType::TpcdsQ9QuantityRange => "int64 > 0 AND int64 < 21",
-            FilterType::ProjectedTs20PctClustered => {
+            FilterType::QuantityRangePredicate => "int64 > 0 AND int64 < 21",
+            FilterType::ClusteredTs20PctProjectedPredicate => {
                 "ts < 2000 projected predicate with utf8 output"
             }
-            FilterType::ProjectedTs8PctClustered => "ts < 800 projected predicate with utf8 output",
-            FilterType::TpcdsSparseProjectedFactScan => "ts % 1000 == 0",
+            FilterType::ClusteredTs8PctProjectedPredicate => {
+                "ts < 800 projected predicate with utf8 output"
+            }
+            FilterType::SparseProjectedFactScan => "ts % 1000 == 0",
         };
         write!(f, "{s}")
     }
@@ -622,31 +830,31 @@ impl FilterType {
                 let scalar = StringViewArray::new_scalar(UTF8_VIEW_MISSING_VALUE);
                 eq(array, &scalar)
             }
-            // ClickBenchQ37ScalarPrefix: a cheap fragmented scalar predicate
+            // ScalarPrefixUtf8Output: a cheap fragmented scalar predicate
             // evaluated before decoding a variable-width output column.
-            FilterType::ClickBenchQ37ScalarPrefix => {
+            FilterType::ScalarPrefixUtf8Output => {
                 let int64 = batch.column(batch.schema().index_of("int64")?);
                 let ts = batch.column(batch.schema().index_of("ts")?);
                 let counter_match = eq(int64, &Int64Array::new_scalar(62))?;
                 let date_like_range = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
                 and(&counter_match, &date_like_range)
             }
-            FilterType::ClickBenchQ6MixedPredicates | FilterType::ClickBenchQ6VarWidthFirst => {
+            FilterType::FixedThenVarWidthPredicates | FilterType::VarWidthThenFixedPredicates => {
                 let int64 = batch.column(batch.schema().index_of("int64")?);
                 let utf8 = batch.column(batch.schema().index_of("utf8View")?);
                 let cheap_prefix = eq(int64, &Int64Array::new_scalar(9999))?;
                 let string_suffix = neq(utf8, &StringViewArray::new_scalar(""))?;
                 and(&cheap_prefix, &string_suffix)
             }
-            FilterType::ClickBenchQ41SparseFixedOutput
-            | FilterType::TpcdsQ21ProjectedFixedOutput => {
+            FilterType::SparseScalarFixedOutput
+            | FilterType::SparseProjectedPredicatesFixedOutput => {
                 let int64 = batch.column(batch.schema().index_of("int64")?);
                 let ts = batch.column(batch.schema().index_of("ts")?);
                 let counter_like = lt(int64, &Int64Array::new_scalar(8))?;
                 let date_like = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
                 and(&counter_like, &date_like)
             }
-            FilterType::ClickBenchQ40ScalarGroupBy => {
+            FilterType::MultiScalarProjectedKey => {
                 let int64 = batch.column(batch.schema().index_of("int64")?);
                 let float64 = batch.column(batch.schema().index_of("float64")?);
                 let ts = batch.column(batch.schema().index_of("ts")?);
@@ -655,7 +863,7 @@ impl FilterType {
                 let date_like = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
                 and(&and(&counter_match, &width_match)?, &date_like)
             }
-            FilterType::TpcdsQ41ComplexOr => {
+            FilterType::ComplexOrMixedPredicates => {
                 let int64 = batch.column(batch.schema().index_of("int64")?);
                 let float64 = batch.column(batch.schema().index_of("float64")?);
                 let utf8 = batch.column(batch.schema().index_of("utf8View")?);
@@ -670,48 +878,48 @@ impl FilterType {
                 )?;
                 or(&string_branch, &scalar_branch)
             }
-            FilterType::TpcdsQ20ProjectedDynamicFilters => {
+            FilterType::ProjectedDynamicFilters => {
                 let int64 = batch.column(batch.schema().index_of("int64")?);
                 let ts = batch.column(batch.schema().index_of("ts")?);
                 let item_like = lt(int64, &Int64Array::new_scalar(12))?;
                 let date_like = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
                 and(&item_like, &date_like)
             }
-            FilterType::TpcdsQ2ProjectedPredicate5Pct
-            | FilterType::TpcdsQ2ProjectedPredicate8Pct
-            | FilterType::TpcdsQ2ProjectedPredicate10Pct
-            | FilterType::TpcdsQ2ProjectedPredicate20Pct
-            | FilterType::TpcdsQ2ProjectedPredicate30Pct
-            | FilterType::TpcdsQ2ProjectedPredicate40Pct
-            | FilterType::TpcdsQ2ProjectedPredicate50Pct => {
+            FilterType::ProjectedPredicate5Pct
+            | FilterType::ProjectedPredicate8Pct
+            | FilterType::ProjectedPredicate10Pct
+            | FilterType::ProjectedPredicate20Pct
+            | FilterType::ProjectedPredicate30Pct
+            | FilterType::ProjectedPredicate40Pct
+            | FilterType::ProjectedPredicate50Pct => {
                 let int64 = batch.column(batch.schema().index_of("int64")?);
                 let threshold = match self {
-                    FilterType::TpcdsQ2ProjectedPredicate5Pct => 5,
-                    FilterType::TpcdsQ2ProjectedPredicate8Pct => 8,
-                    FilterType::TpcdsQ2ProjectedPredicate10Pct => 10,
-                    FilterType::TpcdsQ2ProjectedPredicate20Pct => 20,
-                    FilterType::TpcdsQ2ProjectedPredicate30Pct => 30,
-                    FilterType::TpcdsQ2ProjectedPredicate40Pct => 40,
-                    FilterType::TpcdsQ2ProjectedPredicate50Pct => 50,
+                    FilterType::ProjectedPredicate5Pct => 5,
+                    FilterType::ProjectedPredicate8Pct => 8,
+                    FilterType::ProjectedPredicate10Pct => 10,
+                    FilterType::ProjectedPredicate20Pct => 20,
+                    FilterType::ProjectedPredicate30Pct => 30,
+                    FilterType::ProjectedPredicate40Pct => 40,
+                    FilterType::ProjectedPredicate50Pct => 50,
                     _ => unreachable!(),
                 };
                 lt(int64, &Int64Array::new_scalar(threshold))
             }
-            FilterType::TpcdsQ9QuantityRange => {
+            FilterType::QuantityRangePredicate => {
                 let int64 = batch.column(batch.schema().index_of("int64")?);
                 let lower = gt(int64, &Int64Array::new_scalar(0))?;
                 let upper = lt(int64, &Int64Array::new_scalar(21))?;
                 and(&lower, &upper)
             }
-            FilterType::ProjectedTs8PctClustered => {
+            FilterType::ClusteredTs8PctProjectedPredicate => {
                 let ts = batch.column(batch.schema().index_of("ts")?);
                 lt(ts, &TimestampMillisecondArray::new_scalar(800))
             }
-            FilterType::ProjectedTs20PctClustered => {
+            FilterType::ClusteredTs20PctProjectedPredicate => {
                 let ts = batch.column(batch.schema().index_of("ts")?);
                 lt(ts, &TimestampMillisecondArray::new_scalar(2000))
             }
-            FilterType::TpcdsSparseProjectedFactScan => {
+            FilterType::SparseProjectedFactScan => {
                 let ts = batch
                     .column(batch.schema().index_of("ts")?)
                     .as_any()
@@ -738,25 +946,26 @@ impl FilterType {
             FilterType::UnselectiveClustered => &[3],
             FilterType::Composite => &[1, 3], // Use float64 column and ts column as representative for composite
             FilterType::Utf8ViewNonEmpty | FilterType::Utf8ViewMissing => &[2],
-            FilterType::ClickBenchQ37ScalarPrefix => &[0, 3],
-            FilterType::ClickBenchQ6MixedPredicates | FilterType::ClickBenchQ6VarWidthFirst => {
+            FilterType::ScalarPrefixUtf8Output => &[0, 3],
+            FilterType::FixedThenVarWidthPredicates | FilterType::VarWidthThenFixedPredicates => {
                 &[0, 2]
             }
-            FilterType::ClickBenchQ40ScalarGroupBy => &[0, 1, 3],
-            FilterType::ClickBenchQ41SparseFixedOutput
-            | FilterType::TpcdsQ20ProjectedDynamicFilters
-            | FilterType::TpcdsQ21ProjectedFixedOutput => &[0, 3],
-            FilterType::TpcdsQ41ComplexOr => &[0, 1, 2, 3],
-            FilterType::TpcdsQ2ProjectedPredicate5Pct
-            | FilterType::TpcdsQ2ProjectedPredicate8Pct
-            | FilterType::TpcdsQ2ProjectedPredicate10Pct
-            | FilterType::TpcdsQ2ProjectedPredicate20Pct
-            | FilterType::TpcdsQ2ProjectedPredicate30Pct
-            | FilterType::TpcdsQ2ProjectedPredicate40Pct
-            | FilterType::TpcdsQ2ProjectedPredicate50Pct => &[0],
-            FilterType::TpcdsQ9QuantityRange => &[0],
-            FilterType::ProjectedTs8PctClustered | FilterType::ProjectedTs20PctClustered => &[3],
-            FilterType::TpcdsSparseProjectedFactScan => &[3],
+            FilterType::MultiScalarProjectedKey => &[0, 1, 3],
+            FilterType::SparseScalarFixedOutput
+            | FilterType::ProjectedDynamicFilters
+            | FilterType::SparseProjectedPredicatesFixedOutput => &[0, 3],
+            FilterType::ComplexOrMixedPredicates => &[0, 1, 2, 3],
+            FilterType::ProjectedPredicate5Pct
+            | FilterType::ProjectedPredicate8Pct
+            | FilterType::ProjectedPredicate10Pct
+            | FilterType::ProjectedPredicate20Pct
+            | FilterType::ProjectedPredicate30Pct
+            | FilterType::ProjectedPredicate40Pct
+            | FilterType::ProjectedPredicate50Pct => &[0],
+            FilterType::QuantityRangePredicate => &[0],
+            FilterType::ClusteredTs8PctProjectedPredicate
+            | FilterType::ClusteredTs20PctProjectedPredicate => &[3],
+            FilterType::SparseProjectedFactScan => &[3],
         }
     }
 }
@@ -1049,15 +1258,18 @@ fn benchmark_async_strategy_matrix(c: &mut Criterion) {
     }
 }
 
-/// A small async-only matrix that isolates the cases most relevant to the
+/// A focused async-only matrix that isolates the cases most relevant to the
 /// row-filter Auto policy. This is intentionally narrower than
 /// [`benchmark_async_strategy_matrix`]: it keeps the benchmark output focused
 /// on cases where later PRs may teach `Auto` to switch execution modes or
 /// explicitly keep predicate pushdown.
 ///
-/// The `profile_*` cases are derived from DataFusion ClickBench and TPC-DS
-/// comparisons. They keep the reader-level shapes worth tracking while
-/// excluding query regressions that did not construct a Parquet `RowFilter`.
+/// The cases use structure-oriented names. Comments on [`FilterType`] keep the
+/// ClickBench and TPC-DS provenance, but these are synthetic reader shapes, not
+/// end-to-end query benchmarks.
+///
+/// Individual [`FilterType`] variants include shaded-row diagrams for the
+/// representative selection shapes.
 fn benchmark_async_auto_policy_focus(c: &mut Criterion) {
     const SMALL_TOTAL_ROWS: usize = 20_000;
     const SMALL_ROW_GROUP_SIZE: usize = 5_000;
@@ -1068,6 +1280,7 @@ fn benchmark_async_auto_policy_focus(c: &mut Criterion) {
         SMALL_ROW_GROUP_SIZE,
     ));
     let cases = [
+        // Baseline selectivity shapes.
         AsyncFocusCase::new(
             "utf8_non_empty",
             parquet_file.clone(),
@@ -1104,194 +1317,200 @@ fn benchmark_async_auto_policy_focus(c: &mut Criterion) {
             FilterType::SelectiveUnclustered,
             ProjectionCase::ExcludeFilterColumn,
         ),
+        // Filter-only and count-only shapes. These guard the cases where there
+        // is no deferred output column to amortize the cost of row selection.
         AsyncFocusCase::new(
-            "profile_q37_scalar_utf8",
+            "point_lookup_filter_only",
             parquet_file.clone(),
-            FilterType::ClickBenchQ37ScalarPrefix,
-            ProjectionCase::Utf8Only,
+            FilterType::PointLookup,
+            ProjectionCase::FilterColumnsOnly,
         ),
-        // Historical Q6 focus case: cheap fixed-width predicate before the
-        // unprojected variable-width predicate.
         AsyncFocusCase::new(
-            "profile_q6_mixed_predicates",
+            "projected_predicate_8pct_filter_only",
             parquet_file.clone(),
-            FilterType::ClickBenchQ6MixedPredicates,
-            ProjectionCase::Float64Only,
+            FilterType::ProjectedPredicate8Pct,
+            ProjectionCase::FilterColumnsOnly,
         ),
         AsyncFocusCase::new(
-            "profile_varwidth_then_fixed_prefix",
+            "sparse_scalar_count_only",
             parquet_file.clone(),
-            FilterType::ClickBenchQ6VarWidthFirst,
-            ProjectionCase::Float64Only,
+            FilterType::SparseScalarFixedOutput,
+            ProjectionCase::CountOnly,
+        ),
+        AsyncFocusCase::new(
+            "small_fragmented_scalar_filter_only",
+            small_parquet_file.clone(),
+            FilterType::ModeratelySelectiveUnclustered,
+            ProjectionCase::FilterColumnsOnly,
         ),
         AsyncFocusCase::new(
-            "profile_q40_scalar_group_by",
+            "quantity_range_filter_columns_only",
             parquet_file.clone(),
-            FilterType::ClickBenchQ40ScalarGroupBy,
-            ProjectionCase::Float64AndTs,
+            FilterType::QuantityRangePredicate,
+            ProjectionCase::FilterColumnsOnly,
         ),
+        // Deferred-output shapes. Predicate columns are not part of the output,
+        // so pushdown can skip decoding projected columns for rejected rows.
         AsyncFocusCase::new(
-            "profile_q41_sparse_fixed_output",
+            "scalar_prefix_utf8_output",
             parquet_file.clone(),
-            FilterType::ClickBenchQ41SparseFixedOutput,
+            FilterType::ScalarPrefixUtf8Output,
+            ProjectionCase::Utf8Only,
+        ),
+        AsyncFocusCase::new(
+            "small_scalar_prefix_utf8_output",
+            small_parquet_file.clone(),
+            FilterType::ScalarPrefixUtf8Output,
+            ProjectionCase::Utf8Only,
+        ),
+        AsyncFocusCase::new(
+            "point_lookup_deferred_fixed_output",
+            parquet_file.clone(),
+            FilterType::PointLookup,
             ProjectionCase::Float64Only,
         ),
         AsyncFocusCase::new(
-            "profile_tpcds_q41_complex_or",
+            "sparse_scalar_fixed_output",
             parquet_file.clone(),
-            FilterType::TpcdsQ41ComplexOr,
+            FilterType::SparseScalarFixedOutput,
             ProjectionCase::Float64Only,
         ),
         AsyncFocusCase::new(
-            "profile_tpcds_q20_projected_dynamic_filters",
+            "quantity_range_numeric_output",
             parquet_file.clone(),
-            FilterType::TpcdsQ20ProjectedDynamicFilters,
-            ProjectionCase::FixedColumns,
+            FilterType::QuantityRangePredicate,
+            ProjectionCase::Float64Only,
         ),
+        // Multi-predicate shapes. These make predicate order and predicate
+        // evaluation cost visible separately from projection cost.
         AsyncFocusCase::new(
-            "profile_q21_projected_predicate_fixed_output",
+            "fixed_then_varwidth_predicates",
             parquet_file.clone(),
-            FilterType::TpcdsQ21ProjectedFixedOutput,
-            ProjectionCase::FixedColumns,
+            FilterType::FixedThenVarWidthPredicates,
+            ProjectionCase::Float64Only,
         ),
         AsyncFocusCase::new(
-            "profile_q2_projected_predicate_5pct",
+            "varwidth_then_fixed_predicates",
             parquet_file.clone(),
-            FilterType::TpcdsQ2ProjectedPredicate5Pct,
-            ProjectionCase::Int64AndFloat64,
+            FilterType::VarWidthThenFixedPredicates,
+            ProjectionCase::Float64Only,
         ),
         AsyncFocusCase::new(
-            "profile_q2_projected_predicate_8pct_filter_only",
+            "multi_scalar_projected_key",
             parquet_file.clone(),
-            FilterType::TpcdsQ2ProjectedPredicate8Pct,
-            ProjectionCase::FilterColumnsOnly,
+            FilterType::MultiScalarProjectedKey,
+            ProjectionCase::Float64AndTs,
         ),
         AsyncFocusCase::new(
-            "profile_q2_projected_predicate_8pct_fixed_output",
+            "complex_or_mixed_predicates",
             parquet_file.clone(),
-            FilterType::TpcdsQ2ProjectedPredicate8Pct,
-            ProjectionCase::Int64AndFloat64,
+            FilterType::ComplexOrMixedPredicates,
+            ProjectionCase::Float64Only,
         ),
+        // Projected-predicate shapes. The predicate column is also projected,
+        // so pushdown must not assume the predicate decode is purely overhead.
         AsyncFocusCase::new(
-            "profile_q2_projected_predicate_8pct_varwidth_output",
+            "projected_dynamic_filters",
             parquet_file.clone(),
-            FilterType::TpcdsQ2ProjectedPredicate8Pct,
-            ProjectionCase::Int64AndUtf8,
+            FilterType::ProjectedDynamicFilters,
+            ProjectionCase::FixedColumns,
         ),
         AsyncFocusCase::new(
-            "profile_q2_projected_predicate_10pct",
+            "sparse_projected_predicates_fixed_output",
             parquet_file.clone(),
-            FilterType::TpcdsQ2ProjectedPredicate10Pct,
+            FilterType::SparseProjectedPredicatesFixedOutput,
+            ProjectionCase::FixedColumns,
+        ),
+        AsyncFocusCase::new(
+            "projected_predicate_5pct_fixed_output",
+            parquet_file.clone(),
+            FilterType::ProjectedPredicate5Pct,
             ProjectionCase::Int64AndFloat64,
         ),
         AsyncFocusCase::new(
-            "profile_q2_projected_predicate_20pct",
+            "projected_predicate_8pct_fixed_output",
             parquet_file.clone(),
-            FilterType::TpcdsQ2ProjectedPredicate20Pct,
+            FilterType::ProjectedPredicate8Pct,
             ProjectionCase::Int64AndFloat64,
         ),
         AsyncFocusCase::new(
-            "profile_q2_projected_predicate_20pct_varwidth_output",
+            "projected_predicate_8pct_varwidth_output",
             parquet_file.clone(),
-            FilterType::TpcdsQ2ProjectedPredicate20Pct,
+            FilterType::ProjectedPredicate8Pct,
             ProjectionCase::Int64AndUtf8,
         ),
         AsyncFocusCase::new(
-            "profile_projected_ts_8pct_fixed_output",
+            "projected_predicate_10pct_fixed_output",
             parquet_file.clone(),
-            FilterType::ProjectedTs8PctClustered,
-            ProjectionCase::Float64AndTs,
+            FilterType::ProjectedPredicate10Pct,
+            ProjectionCase::Int64AndFloat64,
         ),
         AsyncFocusCase::new(
-            "profile_projected_ts_8pct_varwidth_output",
+            "projected_predicate_20pct_fixed_output",
             parquet_file.clone(),
-            FilterType::ProjectedTs8PctClustered,
-            ProjectionCase::TsAndUtf8,
+            FilterType::ProjectedPredicate20Pct,
+            ProjectionCase::Int64AndFloat64,
         ),
         AsyncFocusCase::new(
-            "profile_projected_ts_20pct_fixed_output",
+            "projected_predicate_20pct_varwidth_output",
             parquet_file.clone(),
-            FilterType::ProjectedTs20PctClustered,
-            ProjectionCase::Float64AndTs,
+            FilterType::ProjectedPredicate20Pct,
+            ProjectionCase::Int64AndUtf8,
         ),
         AsyncFocusCase::new(
-            "profile_projected_ts_20pct_varwidth_output",
+            "projected_predicate_30pct_fixed_output",
             parquet_file.clone(),
-            FilterType::ProjectedTs20PctClustered,
-            ProjectionCase::TsAndUtf8,
+            FilterType::ProjectedPredicate30Pct,
+            ProjectionCase::Int64AndFloat64,
         ),
         AsyncFocusCase::new(
-            "profile_q2_projected_predicate_30pct",
+            "projected_predicate_40pct_fixed_output",
             parquet_file.clone(),
-            FilterType::TpcdsQ2ProjectedPredicate30Pct,
+            FilterType::ProjectedPredicate40Pct,
             ProjectionCase::Int64AndFloat64,
         ),
         AsyncFocusCase::new(
-            "profile_q2_projected_predicate_40pct",
+            "projected_predicate_50pct_fixed_output",
             parquet_file.clone(),
-            FilterType::TpcdsQ2ProjectedPredicate40Pct,
+            FilterType::ProjectedPredicate50Pct,
             ProjectionCase::Int64AndFloat64,
         ),
         AsyncFocusCase::new(
-            "profile_q2_projected_predicate_50pct",
+            "clustered_ts_8pct_fixed_output",
             parquet_file.clone(),
-            FilterType::TpcdsQ2ProjectedPredicate50Pct,
-            ProjectionCase::Int64AndFloat64,
+            FilterType::ClusteredTs8PctProjectedPredicate,
+            ProjectionCase::Float64AndTs,
         ),
         AsyncFocusCase::new(
-            "profile_q1_count_only",
+            "clustered_ts_8pct_varwidth_output",
             parquet_file.clone(),
-            FilterType::ClickBenchQ41SparseFixedOutput,
-            ProjectionCase::CountOnly,
+            FilterType::ClusteredTs8PctProjectedPredicate,
+            ProjectionCase::TsAndUtf8,
         ),
         AsyncFocusCase::new(
-            "profile_q19_no_defer",
+            "clustered_ts_20pct_fixed_output",
             parquet_file.clone(),
-            FilterType::PointLookup,
-            ProjectionCase::FilterColumnsOnly,
+            FilterType::ClusteredTs20PctProjectedPredicate,
+            ProjectionCase::Float64AndTs,
         ),
         AsyncFocusCase::new(
-            "profile_sparse_fixed_deferred_output",
+            "clustered_ts_20pct_varwidth_output",
             parquet_file.clone(),
-            FilterType::PointLookup,
-            ProjectionCase::Float64Only,
+            FilterType::ClusteredTs20PctProjectedPredicate,
+            ProjectionCase::TsAndUtf8,
         ),
         AsyncFocusCase::new(
-            "profile_tpcds_sparse_projected_fact_scan",
+            "sparse_projected_fact_scan",
             parquet_file.clone(),
-            FilterType::TpcdsSparseProjectedFactScan,
+            FilterType::SparseProjectedFactScan,
             ProjectionCase::FixedColumns,
         ),
         AsyncFocusCase::new(
-            "profile_q83_sparse_utf8_projected",
+            "sparse_utf8_projected_predicate",
             parquet_file.clone(),
             FilterType::Utf8ViewMissing,
             ProjectionCase::AllColumns,
         ),
-        AsyncFocusCase::new(
-            "profile_small_scalar_no_defer",
-            small_parquet_file.clone(),
-            FilterType::ModeratelySelectiveUnclustered,
-            ProjectionCase::FilterColumnsOnly,
-        ),
-        AsyncFocusCase::new(
-            "profile_small_q37_scalar_utf8",
-            small_parquet_file,
-            FilterType::ClickBenchQ37ScalarPrefix,
-            ProjectionCase::Utf8Only,
-        ),
-        AsyncFocusCase::new(
-            "profile_q9_quantity_count",
-            parquet_file.clone(),
-            FilterType::TpcdsQ9QuantityRange,
-            ProjectionCase::FilterColumnsOnly,
-        ),
-        AsyncFocusCase::new(
-            "profile_q9_quantity_avg",
-            parquet_file,
-            FilterType::TpcdsQ9QuantityRange,
-            ProjectionCase::Float64Only,
-        ),
     ];
     let strategies = [
         AsyncStrategy::FullPostFilter,
@@ -1318,6 +1537,21 @@ fn benchmark_async_auto_policy_focus(c: &mut Criterion) {
 /// a narrow primitive projection where row-level pushdown metrics are zero.
 /// It deliberately lives outside the adaptive-materialization matrix because there is no
 /// filter strategy to choose.
+///
+/// ```text
+/// no RowFilter             projected primitive columns
+/// ┌───────────────┐    ┌───────────────┐
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// │      ...      │    │      ...      │
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// └───────────────┘    └───────────────┘
+/// ```
 fn benchmark_projection_scan_focus(c: &mut Criterion) {
     let parquet_file = Bytes::from(write_parquet_file());
     let rt = tokio::runtime::Builder::new_multi_thread()
@@ -1327,7 +1561,7 @@ fn benchmark_projection_scan_focus(c: &mut Criterion) {
 
     let mut group = c.benchmark_group("arrow_reader_projection_scan_focus");
 
-    let case_name = "profile_q83_return_scan_primitives";
+    let case_name = "primitive_projection_only";
     let projection = vec![0, 1, 3];
     let reader = InMemoryReader::try_new(&parquet_file).unwrap();
     let metadata = Arc::clone(reader.metadata());
@@ -1402,11 +1636,11 @@ fn benchmark_async_focus_case(
         schema_descr,
         filter_type.filter_projection().iter().copied(),
     );
-    let q6_int64_pred_mask = ProjectionMask::roots(schema_descr, [0]);
-    let q6_utf8_pred_mask = ProjectionMask::roots(schema_descr, [2]);
-    let q41_int64_pred_mask = ProjectionMask::roots(schema_descr, [0]);
-    let q41_ts_pred_mask = ProjectionMask::roots(schema_descr, [3]);
-    let q40_float64_pred_mask = ProjectionMask::roots(schema_descr, [1]);
+    let fixed_pred_mask = ProjectionMask::roots(schema_descr, [0]);
+    let varwidth_pred_mask = ProjectionMask::roots(schema_descr, [2]);
+    let sparse_int64_pred_mask = ProjectionMask::roots(schema_descr, [0]);
+    let sparse_ts_pred_mask = ProjectionMask::roots(schema_descr, [3]);
+    let scalar_float64_pred_mask = ProjectionMask::roots(schema_descr, [1]);
 
     for strategy in strategies.iter().copied() {
         let bench_id = BenchmarkId::new(
@@ -1419,11 +1653,11 @@ fn benchmark_async_focus_case(
             b.iter(|| {
                 let reader = reader.clone();
                 let pred_mask = pred_mask.clone();
-                let q6_int64_pred_mask = q6_int64_pred_mask.clone();
-                let q6_utf8_pred_mask = q6_utf8_pred_mask.clone();
-                let q41_int64_pred_mask = q41_int64_pred_mask.clone();
-                let q41_ts_pred_mask = q41_ts_pred_mask.clone();
-                let q40_float64_pred_mask = q40_float64_pred_mask.clone();
+                let fixed_pred_mask = fixed_pred_mask.clone();
+                let varwidth_pred_mask = varwidth_pred_mask.clone();
+                let sparse_int64_pred_mask = sparse_int64_pred_mask.clone();
+                let sparse_ts_pred_mask = sparse_ts_pred_mask.clone();
+                let scalar_float64_pred_mask = scalar_float64_pred_mask.clone();
                 let projection_mask = projection_mask.clone();
                 let read_projection_mask = read_projection_mask.clone();
                 let output_column_names = output_column_names.clone();
@@ -1443,11 +1677,11 @@ fn benchmark_async_focus_case(
                             let row_filter = row_filter_for_focus_case(
                                 filter_type,
                                 pred_mask,
-                                q6_int64_pred_mask,
-                                q6_utf8_pred_mask,
-                                q41_int64_pred_mask,
-                                q41_ts_pred_mask,
-                                q40_float64_pred_mask,
+                                fixed_pred_mask,
+                                varwidth_pred_mask,
+                                sparse_int64_pred_mask,
+                                sparse_ts_pred_mask,
+                                scalar_float64_pred_mask,
                             );
                             benchmark_async_reader_with_policy(
                                 reader,
@@ -1461,11 +1695,11 @@ fn benchmark_async_focus_case(
                             let row_filter = row_filter_for_focus_case(
                                 filter_type,
                                 pred_mask,
-                                q6_int64_pred_mask,
-                                q6_utf8_pred_mask,
-                                q41_int64_pred_mask,
-                                q41_ts_pred_mask,
-                                q40_float64_pred_mask,
+                                fixed_pred_mask,
+                                varwidth_pred_mask,
+                                sparse_int64_pred_mask,
+                                sparse_ts_pred_mask,
+                                scalar_float64_pred_mask,
                             );
                             benchmark_async_reader_with_policy(
                                 reader,
@@ -1479,11 +1713,11 @@ fn benchmark_async_focus_case(
                             let row_filter = row_filter_for_focus_case(
                                 filter_type,
                                 pred_mask,
-                                q6_int64_pred_mask,
-                                q6_utf8_pred_mask,
-                                q41_int64_pred_mask,
-                                q41_ts_pred_mask,
-                                q40_float64_pred_mask,
+                                fixed_pred_mask,
+                                varwidth_pred_mask,
+                                sparse_int64_pred_mask,
+                                sparse_ts_pred_mask,
+                                scalar_float64_pred_mask,
                             );
                             benchmark_async_reader_with_policy(
                                 reader,
@@ -1576,50 +1810,50 @@ fn row_filter_for(filter_type: FilterType, pred_mask: ProjectionMask) -> RowFilt
 fn row_filter_for_focus_case(
     filter_type: FilterType,
     pred_mask: ProjectionMask,
-    q6_int64_pred_mask: ProjectionMask,
-    q6_utf8_pred_mask: ProjectionMask,
-    q41_int64_pred_mask: ProjectionMask,
-    q41_ts_pred_mask: ProjectionMask,
-    q40_float64_pred_mask: ProjectionMask,
+    fixed_pred_mask: ProjectionMask,
+    varwidth_pred_mask: ProjectionMask,
+    sparse_int64_pred_mask: ProjectionMask,
+    sparse_ts_pred_mask: ProjectionMask,
+    scalar_float64_pred_mask: ProjectionMask,
 ) -> RowFilter {
     match filter_type {
-        FilterType::ClickBenchQ6MixedPredicates | FilterType::ClickBenchQ6VarWidthFirst => {
-            let int64_filter =
-                ArrowPredicateFn::new(q6_int64_pred_mask, move |batch: RecordBatch| {
-                    let int64 = batch.column(batch.schema().index_of("int64")?);
-                    eq(int64, &Int64Array::new_scalar(9999))
-                });
+        FilterType::FixedThenVarWidthPredicates | FilterType::VarWidthThenFixedPredicates => {
+            let int64_filter = ArrowPredicateFn::new(fixed_pred_mask, move |batch: RecordBatch| {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                eq(int64, &Int64Array::new_scalar(9999))
+            });
             let utf8_filter =
-                ArrowPredicateFn::new(q6_utf8_pred_mask, move |batch: RecordBatch| {
+                ArrowPredicateFn::new(varwidth_pred_mask, move |batch: RecordBatch| {
                     let utf8 = batch.column(batch.schema().index_of("utf8View")?);
                     neq(utf8, &StringViewArray::new_scalar(""))
                 });
 
             match filter_type {
-                FilterType::ClickBenchQ6MixedPredicates => {
+                FilterType::FixedThenVarWidthPredicates => {
                     RowFilter::new(vec![Box::new(int64_filter), Box::new(utf8_filter)])
                 }
-                FilterType::ClickBenchQ6VarWidthFirst => {
+                FilterType::VarWidthThenFixedPredicates => {
                     RowFilter::new(vec![Box::new(utf8_filter), Box::new(int64_filter)])
                 }
                 _ => unreachable!(),
             }
         }
-        FilterType::ClickBenchQ40ScalarGroupBy => {
+        FilterType::MultiScalarProjectedKey => {
             let int64_filter =
-                ArrowPredicateFn::new(q41_int64_pred_mask, move |batch: RecordBatch| {
+                ArrowPredicateFn::new(sparse_int64_pred_mask, move |batch: RecordBatch| {
                     let int64 = batch.column(batch.schema().index_of("int64")?);
                     eq(int64, &Int64Array::new_scalar(62))
                 });
             let float64_filter =
-                ArrowPredicateFn::new(q40_float64_pred_mask, move |batch: RecordBatch| {
+                ArrowPredicateFn::new(scalar_float64_pred_mask, move |batch: RecordBatch| {
                     let float64 = batch.column(batch.schema().index_of("float64")?);
                     gt(float64, &Float64Array::new_scalar(10.0))
                 });
-            let ts_filter = ArrowPredicateFn::new(q41_ts_pred_mask, move |batch: RecordBatch| {
-                let ts = batch.column(batch.schema().index_of("ts")?);
-                lt(ts, &TimestampMillisecondArray::new_scalar(9000))
-            });
+            let ts_filter =
+                ArrowPredicateFn::new(sparse_ts_pred_mask, move |batch: RecordBatch| {
+                    let ts = batch.column(batch.schema().index_of("ts")?);
+                    lt(ts, &TimestampMillisecondArray::new_scalar(9000))
+                });
 
             RowFilter::new(vec![
                 Box::new(int64_filter),
@@ -1627,22 +1861,23 @@ fn row_filter_for_focus_case(
                 Box::new(ts_filter),
             ])
         }
-        FilterType::ClickBenchQ41SparseFixedOutput
-        | FilterType::TpcdsQ20ProjectedDynamicFilters
-        | FilterType::TpcdsQ21ProjectedFixedOutput => {
+        FilterType::SparseScalarFixedOutput
+        | FilterType::ProjectedDynamicFilters
+        | FilterType::SparseProjectedPredicatesFixedOutput => {
             let int64_filter =
-                ArrowPredicateFn::new(q41_int64_pred_mask, move |batch: RecordBatch| {
+                ArrowPredicateFn::new(sparse_int64_pred_mask, move |batch: RecordBatch| {
                     let int64 = batch.column(batch.schema().index_of("int64")?);
                     let scalar = match filter_type {
-                        FilterType::TpcdsQ20ProjectedDynamicFilters => 12,
+                        FilterType::ProjectedDynamicFilters => 12,
                         _ => 8,
                     };
                     lt(int64, &Int64Array::new_scalar(scalar))
                 });
-            let ts_filter = ArrowPredicateFn::new(q41_ts_pred_mask, move |batch: RecordBatch| {
-                let ts = batch.column(batch.schema().index_of("ts")?);
-                lt(ts, &TimestampMillisecondArray::new_scalar(9000))
-            });
+            let ts_filter =
+                ArrowPredicateFn::new(sparse_ts_pred_mask, move |batch: RecordBatch| {
+                    let ts = batch.column(batch.schema().index_of("ts")?);
+                    lt(ts, &TimestampMillisecondArray::new_scalar(9000))
+                });
 
             RowFilter::new(vec![Box::new(int64_filter), Box::new(ts_filter)])
         }
diff --git a/parquet/benches/row_selection_cursor.rs b/parquet/benches/row_selection_cursor.rs
index 8b5b13cdeea5..c458f5c12a52 100644
--- a/parquet/benches/row_selection_cursor.rs
+++ b/parquet/benches/row_selection_cursor.rs
@@ -15,6 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Benchmarks the cost of applying `RowSelection` as selector queues versus
+//! boolean masks.
+//!
+//! The broad sweep varies selector length, selection density, run-length
+//! distribution, data type, projected column count, and `Utf8View` payload size.
+//! The shape-focus suite keeps the data shape narrower and varies the maximum
+//! selected-run length (`maxrun`) so the results can show where
+//! `RowSelectionPolicy::Auto` should prefer `Selectors` or `Mask`.
+
 use std::hint;
 use std::sync::Arc;
 
@@ -35,6 +44,8 @@ const BATCH_SIZE: usize = 1 << 10;
 const BASE_SEED: u64 = 0xA55AA55A;
 const AVG_SELECTOR_LENGTHS: &[usize] = &[4, 8, 12, 16, 20, 24, 28, 32, 36, 40];
 const SHAPE_FOCUS_SELECTED_RUN_LENGTHS: &[usize] = &[1, 2, 4, 8, 32];
+// At 80% selectivity, maxrun1 and maxrun2 cannot be represented without
+// zero-length skip runs, so the dense-focused cases start at maxrun4.
 const DENSE_SHAPE_FOCUS_SELECTED_RUN_LENGTHS: &[usize] = &[4, 8, 32];
 const COLUMN_WIDTHS: &[usize] = &[2, 4, 8, 16, 32];
 const UTF8VIEW_LENS: &[usize] = &[4, 8, 16, 32, 64, 128, 256];
@@ -235,6 +246,12 @@ fn criterion_benchmark(c: &mut Criterion) {
     bench_shape_focus(c);
 }
 
+/// Focused selector-shape matrix for `Selectors` versus `Mask`.
+///
+/// It fixes the input profile to `int32` and `utf8view`, then varies
+/// selectivity and the requested maximum selected-run length. The benchmark
+/// suffix reports this as `maxrunNN` because the final selected run may be
+/// shorter than the requested maximum.
 fn bench_shape_focus(c: &mut Criterion) {
     let profiles = [
         DataProfile {

From a717d7f8ee74bedb4ecf5195e93edef2d5846442 Mon Sep 17 00:00:00 2001
From: Qiwei Huang <qiwei_huang@qq.com>
Date: Mon, 15 Jun 2026 01:00:37 +0800
Subject: [PATCH 09/14] ci(parquet): lock pyspark cargo installs

---
 .github/workflows/parquet.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/parquet.yml b/.github/workflows/parquet.yml
index 9ae7d47eddc6..a0d8495170eb 100644
--- a/.github/workflows/parquet.yml
+++ b/.github/workflows/parquet.yml
@@ -175,8 +175,8 @@ jobs:
           rustup default ${{ matrix.rust }}
       - name: Install binary for checking
         run: |
-          cargo install --path parquet --bin parquet-show-bloom-filter --features=cli
-          cargo install --path parquet --bin parquet-fromcsv --features=arrow,cli
+          cargo install --path parquet --bin parquet-show-bloom-filter --features=cli --locked
+          cargo install --path parquet --bin parquet-fromcsv --features=arrow,cli --locked
       - name: Run pytest
         run: |
           cd parquet/pytest

From ac186807904e7adbe0ca98bd7efde5af7f93d0d1 Mon Sep 17 00:00:00 2001
From: Qiwei Huang <qiwei_huang@qq.com>
Date: Mon, 15 Jun 2026 08:02:26 +0800
Subject: [PATCH 10/14] Revert "ci(parquet): lock pyspark cargo installs"

This reverts commit a717d7f8ee74bedb4ecf5195e93edef2d5846442.
---
 .github/workflows/parquet.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/parquet.yml b/.github/workflows/parquet.yml
index a0d8495170eb..9ae7d47eddc6 100644
--- a/.github/workflows/parquet.yml
+++ b/.github/workflows/parquet.yml
@@ -175,8 +175,8 @@ jobs:
           rustup default ${{ matrix.rust }}
       - name: Install binary for checking
         run: |
-          cargo install --path parquet --bin parquet-show-bloom-filter --features=cli --locked
-          cargo install --path parquet --bin parquet-fromcsv --features=arrow,cli --locked
+          cargo install --path parquet --bin parquet-show-bloom-filter --features=cli
+          cargo install --path parquet --bin parquet-fromcsv --features=arrow,cli
       - name: Run pytest
         run: |
           cd parquet/pytest

From cdde1fe8a2ff4f0daa9b179b08de5c4116c5a474 Mon Sep 17 00:00:00 2001
From: Qiwei Huang <qiwei.huang@jsessh.com>
Date: Tue, 16 Jun 2026 16:34:32 +0800
Subject: [PATCH 11/14] bench(parquet): split materialization policy benchmark

---
 parquet/Cargo.toml                            |    5 +
 .../arrow_reader_materialization_policy.rs    | 2434 +++++++++++++++++
 parquet/benches/arrow_reader_row_filter.rs    |  527 +---
 3 files changed, 2457 insertions(+), 509 deletions(-)
 create mode 100644 parquet/benches/arrow_reader_materialization_policy.rs

diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index dd2c872ede50..e1aedf5cb16e 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -245,6 +245,11 @@ name = "arrow_reader_row_filter"
 required-features = ["arrow", "async"]
 harness = false
 
+[[bench]]
+name = "arrow_reader_materialization_policy"
+required-features = ["arrow", "async"]
+harness = false
+
 [[bench]]
 name = "arrow_reader_clickbench"
 required-features = ["arrow", "async", "object_store"]
diff --git a/parquet/benches/arrow_reader_materialization_policy.rs b/parquet/benches/arrow_reader_materialization_policy.rs
new file mode 100644
index 000000000000..297efbe4eaf6
--- /dev/null
+++ b/parquet/benches/arrow_reader_materialization_policy.rs
@@ -0,0 +1,2434 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Focused benchmark for Parquet reader materialization policy decisions.
+//!
+#![allow(dead_code)]
+//!
+//! # Background:
+//!
+//! As described in [Efficient Filter Pushdown in Parquet], evaluating
+//! pushdown filters is a two-step process:
+//!
+//! 1. Build a filter mask by decoding and evaluating filter functions on
+//!    the filter column(s).
+//!
+//! 2. Decode the rows that match the filter mask from the projected columns.
+//!
+//! The performance depends on factors such as the number of rows selected,
+//! the clustering of results (which affects the efficiency of the filter mask),
+//! and whether the same column is used for both filtering and projection.
+//!
+//! This benchmark isolates the reader policy choice between full post-filtering
+//! and row-filter pushdown with `Auto`, forced `Selectors`, and forced `Mask`.
+//!
+//! [Efficient Filter Pushdown in Parquet]: https://datafusion.apache.org/blog/2025/03/21/parquet-pushdown/
+//!
+//! The benchmark creates an in-memory Parquet file with 500K rows and four root
+//! columns:
+//! - `int64`: random integers with an injected point-lookup value.
+//! - `float64`: random floating-point values used for sparse and dense filters.
+//! - `utf8View`: ClickBench-like string values with sparse sentinel values.
+//! - `ts`: sequential timestamps used for clustered filters.
+//!
+//! The benchmark cases are organized by reader-level axes: selection density
+//! and clustering, predicate/output overlap, deferred output payload, predicate
+//! cost and order, count/filter-only outputs, and small-file behavior.
+//!
+//! Full TPC-DS runs can show query-level movement that does not reproduce in
+//! isolated reader probes. Keep these cases focused on stable reader-level
+//! risks: moderate projected predicates with cheap deferred output can favor
+//! post-filtering, while clustered selections, variable-width deferred output,
+//! complex OR predicates, and sparse scalar prefixes should not be swept into
+//! that shortcut without their own evidence.
+
+use arrow::array::{
+    ArrayRef, BooleanArray, Float64Array, Int64Array, StructArray, TimestampMillisecondArray,
+};
+use arrow::compute::kernels::cmp::{eq, gt, lt, lt_eq, neq};
+use arrow::compute::{and, or};
+use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
+use arrow::record_batch::RecordBatch;
+use arrow_array::StringViewArray;
+use arrow_array::builder::{ArrayBuilder, StringViewBuilder};
+use bytes::Bytes;
+use criterion::{
+    BenchmarkGroup, BenchmarkId, Criterion, criterion_group, criterion_main, measurement::WallTime,
+};
+use futures::future::BoxFuture;
+use futures::{FutureExt, StreamExt};
+use parquet::arrow::arrow_reader::{
+    ArrowPredicateFn, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowFilter,
+    RowSelectionPolicy,
+};
+use parquet::arrow::async_reader::AsyncFileReader;
+use parquet::arrow::{ArrowWriter, ParquetRecordBatchStreamBuilder, ProjectionMask};
+use parquet::basic::Compression;
+use parquet::file::metadata::{PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader};
+use parquet::file::properties::WriterProperties;
+use rand::{Rng, SeedableRng, rngs::StdRng};
+use std::ops::Range;
+use std::sync::Arc;
+
+const COLUMN_NAMES: [&str; 4] = ["int64", "float64", "utf8View", "ts"];
+const UTF8_VIEW_MISSING_VALUE: &str = "__arrow_rs_missing__";
+
+/// Generates a random string. Has a 50% chance to generate a short string (3–11 characters)
+/// or a long string (13–20 characters).
+fn random_string(rng: &mut StdRng) -> String {
+    let charset = b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+    let is_long = rng.random_bool(0.5);
+    let len = if is_long {
+        rng.random_range(13..21)
+    } else {
+        rng.random_range(3..12)
+    };
+    (0..len)
+        .map(|_| charset[rng.random_range(0..charset.len())] as char)
+        .collect()
+}
+
+/// Creates an int64 array of a given size with random integers in [0, 100).
+/// Then, it overwrites a single random index with 9999 to serve as the unique value for point lookup.
+fn create_int64_array(size: usize) -> ArrayRef {
+    let mut rng = StdRng::seed_from_u64(42);
+    let mut values: Vec<i64> = (0..size).map(|_| rng.random_range(0..100)).collect();
+    let unique_index = rng.random_range(0..size);
+    values[unique_index] = 9999; // Unique value for point lookup
+    Arc::new(Int64Array::from(values)) as ArrayRef
+}
+
+/// Creates a float64 array of a given size with random floats in [0.0, 100.0).
+fn create_float64_array(size: usize) -> ArrayRef {
+    let mut rng = StdRng::seed_from_u64(43);
+    let values: Vec<f64> = (0..size).map(|_| rng.random_range(0.0..100.0)).collect();
+    Arc::new(Float64Array::from(values)) as ArrayRef
+}
+
+fn append_utf8_view_value(builder: &mut StringViewBuilder, value: &str) {
+    if builder.len() % 1_000 == 0 {
+        builder.append_value(UTF8_VIEW_MISSING_VALUE);
+    } else {
+        builder.append_value(value);
+    }
+}
+
+/// Creates a utf8View array of a given size with random strings.
+///
+/// This is modeled after the "SearchPhrase" column in the ClickBench benchmark.
+///
+/// See <https://github.com/apache/arrow-rs/issues/7460> for calculations.
+///
+/// The important ClickBench data properties are:
+/// * Selectivity is: 13172392 / 99997497 = 0.132
+/// * Number of RowSelections = 14054784
+/// * Average run length of each RowSelection: 99997497 / 14054784 = 7.114
+///
+/// The properties of this array are:
+/// * Selectivity is: 15144 / 100000 = 0.15144
+/// * Number of RowSelections = 12904
+/// * Average run length of each RowSelection: 100000 / 12904 = 7.75
+fn create_utf8_view_array(size: usize) -> ArrayRef {
+    const AVG_RUN_LENGTH: usize = 4; // average number of empty/non-empty strings in a row
+    const EMPTY_DENSITY: u32 = 85; // percent chance that each run is an empty string
+
+    let mut builder = StringViewBuilder::with_capacity(size);
+    let mut rng = StdRng::seed_from_u64(44);
+    while builder.len() < size {
+        let mut run_length = rng.random_range(1..AVG_RUN_LENGTH);
+        if builder.len() + run_length > size {
+            // cap to size rows
+            run_length = size - builder.len();
+        }
+
+        let choice = rng.random_range(0..100);
+        if choice < EMPTY_DENSITY {
+            for _ in 0..run_length {
+                append_utf8_view_value(&mut builder, "");
+            }
+        } else {
+            for _ in 0..run_length {
+                append_utf8_view_value(&mut builder, &random_string(&mut rng));
+            }
+        }
+    }
+    Arc::new(builder.finish()) as ArrayRef
+}
+
+/// Creates a ts (timestamp) array of a given size. Each value is computed as i % 10_000,
+/// which simulates repeating blocks (each block of 10,000) to model clustered patterns.
+fn create_ts_array(size: usize) -> ArrayRef {
+    let values: Vec<i64> = (0..size).map(|i| (i % 10_000) as i64).collect();
+    Arc::new(TimestampMillisecondArray::from(values)) as ArrayRef
+}
+
+/// Creates a RecordBatch with 100K rows and 4 columns: int64, float64, utf8View, and ts.
+pub(crate) fn create_record_batch(size: usize) -> RecordBatch {
+    let fields = vec![
+        Field::new("int64", DataType::Int64, false),
+        Field::new("float64", DataType::Float64, false),
+        Field::new("utf8View", DataType::Utf8View, true),
+        Field::new(
+            "ts",
+            DataType::Timestamp(TimeUnit::Millisecond, None),
+            false,
+        ),
+    ];
+    let schema = Arc::new(Schema::new(fields));
+
+    let int64_array = create_int64_array(size);
+    let float64_array = create_float64_array(size);
+    let utf8_array = create_utf8_view_array(size);
+    let ts_array = create_ts_array(size);
+
+    let arrays: Vec<ArrayRef> = vec![int64_array, float64_array, utf8_array, ts_array];
+    RecordBatch::try_new(schema, arrays).unwrap()
+}
+
+/// Total number of rows.
+const TOTAL_ROWS: usize = 500_000;
+
+/// Maximum rows per row group.
+const ROW_GROUP_SIZE: usize = 100_000;
+
+/// Writes the RecordBatch to an in memory buffer, returning the buffer
+fn write_parquet_file() -> Vec<u8> {
+    write_parquet_file_with_rows(TOTAL_ROWS, ROW_GROUP_SIZE)
+}
+
+/// Writes a RecordBatch with a configurable shape to an in memory buffer,
+/// returning the buffer.
+fn write_parquet_file_with_rows(total_rows: usize, row_group_size: usize) -> Vec<u8> {
+    let batch = create_record_batch(total_rows);
+    write_record_batch_to_parquet(&batch, row_group_size)
+}
+
+fn write_record_batch_to_parquet(batch: &RecordBatch, row_group_size: usize) -> Vec<u8> {
+    let schema = batch.schema();
+    let props = WriterProperties::builder()
+        .set_compression(Compression::SNAPPY)
+        .set_max_row_group_row_count(Some(row_group_size))
+        .build();
+    let mut buffer = vec![];
+    {
+        let mut writer = ArrowWriter::try_new(&mut buffer, schema.clone(), Some(props)).unwrap();
+        writer.write(batch).unwrap();
+        writer.close().unwrap();
+    }
+    buffer
+}
+
+fn create_nested_record_batch(size: usize) -> RecordBatch {
+    let tag = Arc::new(StringViewArray::from_iter_values(
+        (0..size).map(|idx| format!("tag_{}", idx % 7)),
+    )) as ArrayRef;
+    let payload = StructArray::from(vec![
+        (
+            Arc::new(Field::new("id", DataType::Int64, false)),
+            Arc::new(Int64Array::from_iter_values(
+                (0..size).map(|idx| idx as i64 + 1_000),
+            )) as ArrayRef,
+        ),
+        (
+            Arc::new(Field::new("label", DataType::Utf8View, false)),
+            Arc::new(StringViewArray::from_iter_values(
+                (0..size).map(|idx| format!("payload_{idx}")),
+            )) as ArrayRef,
+        ),
+    ]);
+    let payload = Arc::new(payload) as ArrayRef;
+    let value = Arc::new(Int64Array::from_iter_values(
+        (0..size).map(|idx| idx as i64 + 10_000),
+    )) as ArrayRef;
+
+    RecordBatch::try_from_iter(vec![("tag", tag), ("payload", payload), ("value", value)]).unwrap()
+}
+
+fn write_nested_parquet_file_with_rows(total_rows: usize, row_group_size: usize) -> Vec<u8> {
+    let batch = create_nested_record_batch(total_rows);
+    write_record_batch_to_parquet(&batch, row_group_size)
+}
+
+/// ProjectionCase defines the projection mode for the benchmark:
+/// either projecting all columns or excluding the column that is used for filtering.
+#[derive(Clone, Copy)]
+enum ProjectionCase {
+    AllColumns,
+    ExcludeFilterColumn,
+    FilterColumnsOnly,
+    CountOnly,
+    FixedColumns,
+    Float64AndTs,
+    Float64Only,
+    Int64AndFloat64,
+    Int64AndUtf8,
+    TsAndUtf8,
+    Utf8Only,
+}
+
+impl std::fmt::Display for ProjectionCase {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            ProjectionCase::AllColumns => write!(f, "all_columns"),
+            ProjectionCase::ExcludeFilterColumn => write!(f, "exclude_filter_column"),
+            ProjectionCase::FilterColumnsOnly => write!(f, "filter_columns_only"),
+            ProjectionCase::CountOnly => write!(f, "count_only"),
+            ProjectionCase::FixedColumns => write!(f, "fixed_columns"),
+            ProjectionCase::Float64AndTs => write!(f, "float64_and_ts"),
+            ProjectionCase::Float64Only => write!(f, "float64_only"),
+            ProjectionCase::Int64AndFloat64 => write!(f, "int64_and_float64"),
+            ProjectionCase::Int64AndUtf8 => write!(f, "int64_and_utf8"),
+            ProjectionCase::TsAndUtf8 => write!(f, "ts_and_utf8"),
+            ProjectionCase::Utf8Only => write!(f, "utf8_only"),
+        }
+    }
+}
+
+#[derive(Clone, Copy)]
+enum SyncStrategy {
+    FullPostFilter,
+    PushdownAuto,
+    PushdownSelectors,
+    PushdownMask,
+}
+
+impl std::fmt::Display for SyncStrategy {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            SyncStrategy::FullPostFilter => write!(f, "full_post_filter"),
+            SyncStrategy::PushdownAuto => write!(f, "pushdown_auto"),
+            SyncStrategy::PushdownSelectors => write!(f, "pushdown_selectors"),
+            SyncStrategy::PushdownMask => write!(f, "pushdown_mask"),
+        }
+    }
+}
+
+#[derive(Clone, Copy)]
+enum AsyncStrategy {
+    FullPostFilter,
+    PushdownAuto,
+    PushdownSelectors,
+    PushdownMask,
+}
+
+impl std::fmt::Display for AsyncStrategy {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            AsyncStrategy::FullPostFilter => write!(f, "full_post_filter"),
+            AsyncStrategy::PushdownAuto => write!(f, "pushdown_auto"),
+            AsyncStrategy::PushdownSelectors => write!(f, "pushdown_selectors"),
+            AsyncStrategy::PushdownMask => write!(f, "pushdown_mask"),
+        }
+    }
+}
+
+/// FilterType encapsulates the different filter comparisons.
+/// The variants correspond to the different filter patterns.
+#[derive(Clone, Copy, Debug)]
+pub(crate) enum FilterType {
+    /// "Point Lookup": selects a single row
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │               │    │      ...      │
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │     ...       │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │      ...      │
+    /// │               │    │               │
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    /// (1 RowSelection of 1 row)
+    PointLookup,
+    /// selective (1%) unclustered filter
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │      ...      │    │               │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │               │    │      ...      │
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │               │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    /// (1000 RowSelection of 10 rows each)
+    SelectiveUnclustered,
+    /// moderately selective (10%) clustered filter
+    /// ```text
+    ///  ┌───────────────┐    ┌───────────────┐
+    ///  │               │    │               │
+    ///  │               │    │               │
+    ///  │               │    │     ...       │
+    ///  │               │    │               │
+    ///  │     ...       │    │               │
+    ///  │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    ///  │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    ///  │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    ///  │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    ///  └───────────────┘    └───────────────┘
+    /// ```
+    /// (10 RowSelections of 10,000 rows each)
+    ModeratelySelectiveClustered,
+    /// moderately selective (10%) clustered filter
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │      ...      │    │               │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │               │    │               │
+    /// │               │    │      ...      │
+    /// │      ...      │    │               │
+    /// │               │    │               │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    /// (10 RowSelections of 10,000 rows each)
+    ModeratelySelectiveUnclustered,
+    /// unselective (99%) unclustered filter
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    /// (99,000 RowSelections of 10 rows each)
+    UnselectiveUnclustered,
+    /// unselective (90%) clustered filter
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │               │    │     ...       │
+    /// │               │    │               │
+    /// │     ...       │    │               │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    /// (99 RowSelection of 10,000 rows each)
+    UnselectiveClustered,
+    /// [`Self::SelectivelUnclusered`] `AND`
+    /// [`Self::ModeratelySelectiveClustered`]
+    Composite,
+    /// `utf8View <> ''` modeling [ClickBench] [Q21-Q27]
+    ///
+    /// [ClickBench]: https://github.com/ClickHouse/ClickBench
+    /// [Q21-Q27]: https://github.com/apache/datafusion/blob/b7177234e65cbbb2dcc04c252f6acd80bb026362/benchmarks/queries/clickbench/queries.sql#L22-L28
+    Utf8ViewNonEmpty,
+
+    // Deferred-output shapes. Predicate columns are separate from the output,
+    // so rejected rows can skip output-column decoding.
+    /// Scalar-prefix shape derived from DataFusion ClickBench Q37:
+    ///
+    /// ```sql
+    /// WHERE CounterID = 62
+    ///   AND EventDate BETWEEN ...
+    ///   AND DontCountHits = 0
+    ///   AND IsRefresh = 0
+    ///   AND Title <> ''
+    /// ```
+    ///
+    /// DataFusion `Auto` does not push down the `Title <> ''` string predicate,
+    /// but it can push down the scalar prefix to defer decoding `Title`.
+    /// Fragmented ~0.9% selection: approx 4,500 selected rows in 500K.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    ScalarPrefixUtf8Output,
+    /// Sparse fragmented scalar predicates (~7%, approx 36,000 selected rows
+    /// in 500K) with a cheap fixed-width output projection, derived from a
+    /// ClickBench Q41-like shape.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    SparseScalarFixedOutput,
+    /// Scalar range predicate derived from TPC-DS Q9 `ss_quantity BETWEEN ...`
+    /// subqueries. The selected rows are random and moderately selective, and
+    /// benchmark projections cover both count-only and numeric aggregate cases.
+    /// Fragmented ~20% selection: approx 100,000 selected rows in 500K.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    QuantityRangePredicate,
+
+    // Multi-predicate shapes. These focus predicate ordering and predicate
+    // evaluation cost independently of projection cost.
+    /// Predicate-order shape derived from DataFusion ClickBench extended Q6:
+    /// an early cheap fixed-width predicate can prune almost all rows before a
+    /// later unprojected variable-width predicate is decoded.
+    /// Point-lookup prefix: at most 1 row reaches the variable-width predicate.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │      ...      │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │      ...      │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    FixedThenVarWidthPredicates,
+    /// Same scalar + variable-width predicate columns as
+    /// [`Self::FixedThenVarWidthPredicates`], but with the variable-width
+    /// predicate evaluated first. This anchors the static post-filter gate
+    /// against predicate-order drift.
+    /// At most 1 row survives the final point lookup.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │      ...      │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │      ...      │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    VarWidthThenFixedPredicates,
+    /// Multiple cheap scalar predicates, very small output, and projected
+    /// predicate columns used later by grouping. Derived from ClickBench Q40.
+    /// Fragmented ~0.8% selection: approx 4,000 selected rows in 500K.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    MultiScalarProjectedKey,
+    /// Complex OR predicate over dictionary/string-like and scalar columns
+    /// where predicate evaluation dominates reader time. Derived from TPC-DS
+    /// Q41.
+    /// Mixed string/scalar OR branches select approx 1% of rows.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │      ...      │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │      ...      │    │               │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    ComplexOrMixedPredicates,
+
+    // Projected-predicate shapes. At least one predicate column is also needed
+    // in the final projection.
+    /// Multiple fixed-width dynamic filters where predicate columns are also
+    /// projected. Derived from TPC-DS Q20 catalog_sales.
+    /// Fragmented ~11% selection: approx 54,000 selected rows in 500K.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    ProjectedDynamicFilters,
+    /// Shape of TPC-DS Q21 after dynamic-filter pruning: sparse fragmented
+    /// fixed-width predicates where the final projection still includes the
+    /// predicate columns. This protects against choosing selectors for columns
+    /// that were already decoded/cached by predicate evaluation.
+    /// Fragmented ~7% selection: approx 36,000 selected rows in 500K.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    SparseProjectedPredicatesFixedOutput,
+    /// Projected-predicate shape derived from TPC-DS Q2 fact scans: the
+    /// dynamic filter applies to the date key, the same date key is projected,
+    /// and an additional fixed-width sales value can still be deferred by
+    /// predicate pushdown.
+    /// Selectivity ranges from 1% to 50%: approx 5K to 250K selected rows in
+    /// 500K.
+    /// The 1% variants also cover a TPC-DS Q41-like item scan where predicate
+    /// and output overlap, selection is highly fragmented, and the deferred
+    /// output payload is small enough that post-filtering can be faster than
+    /// row-filter pushdown.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    ProjectedPredicate1Pct,
+    ProjectedPredicate5Pct,
+    ProjectedPredicate8Pct,
+    ProjectedPredicate10Pct,
+    ProjectedPredicate20Pct,
+    ProjectedPredicate30Pct,
+    ProjectedPredicate40Pct,
+    ProjectedPredicate50Pct,
+    /// Exact shape for the projected-predicate moderate-selectivity gate:
+    /// a clustered 20% timestamp predicate where the predicate column is
+    /// projected and the deferred output is variable-width.
+    /// Clustered 8% or 20% selection: 40,000 or 100,000 selected rows in 500K.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │               │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    ClusteredTs8PctProjectedPredicate,
+    ClusteredTs20PctProjectedPredicate,
+    /// Sparse variable-width predicate shaped like TPC-DS Q83 dynamic
+    /// `i_item_id` filters, where the predicate column is also projected.
+    /// Sparse 0.1% selection: 500 sentinel rows in 500K, one every 1,000 rows.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    Utf8ViewMissing,
+    /// Very sparse projected fixed-width scan shaped like TPC-DS fact-table
+    /// filters where the predicate column is also needed in the output projection.
+    /// Sparse 0.1% selection: 500 rows in 500K, one timestamp match every
+    /// 1,000 rows.
+    ///
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    SparseProjectedFactScan,
+}
+
+impl std::fmt::Display for FilterType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let s = match self {
+            FilterType::PointLookup => "int64 == 9999",
+            FilterType::SelectiveUnclustered => "float64 > 99.0",
+            FilterType::ModeratelySelectiveClustered => "ts >= 9000",
+            FilterType::ModeratelySelectiveUnclustered => "int64 > 90",
+            FilterType::UnselectiveUnclustered => "float64 <= 99.0",
+            FilterType::UnselectiveClustered => "ts < 9000",
+            FilterType::Composite => "float64 > 99.0 AND ts >= 9000",
+            FilterType::Utf8ViewNonEmpty => "utf8View <> ''",
+            FilterType::Utf8ViewMissing => "utf8View == '<missing>'",
+            FilterType::ScalarPrefixUtf8Output => "int64 == 62 AND ts < 9000",
+            FilterType::FixedThenVarWidthPredicates => "int64 == 9999 AND utf8View <> ''",
+            FilterType::VarWidthThenFixedPredicates => "utf8View <> '' AND int64 == 9999",
+            FilterType::SparseScalarFixedOutput => "int64 < 8 AND ts < 9000",
+            FilterType::MultiScalarProjectedKey => "int64 == 62 AND float64 > 10.0 AND ts < 9000",
+            FilterType::ComplexOrMixedPredicates => {
+                "(utf8View <> '' AND int64 < 8) OR (ts < 100 AND float64 > 95.0)"
+            }
+            FilterType::ProjectedDynamicFilters => {
+                "int64 < 12 AND ts < 9000 projected dynamic filters"
+            }
+            FilterType::SparseProjectedPredicatesFixedOutput => {
+                "int64 < 8 AND ts < 9000 projected predicates"
+            }
+            FilterType::ProjectedPredicate1Pct => "int64 < 1 projected predicate",
+            FilterType::ProjectedPredicate10Pct => {
+                "int64 < 10 projected predicate with fixed output"
+            }
+            FilterType::ProjectedPredicate5Pct => "int64 < 5 projected predicate with fixed output",
+            FilterType::ProjectedPredicate8Pct => "int64 < 8 projected predicate with fixed output",
+            FilterType::ProjectedPredicate20Pct => {
+                "int64 < 20 projected predicate with fixed output"
+            }
+            FilterType::ProjectedPredicate30Pct => {
+                "int64 < 30 projected predicate with fixed output"
+            }
+            FilterType::ProjectedPredicate40Pct => {
+                "int64 < 40 projected predicate with fixed output"
+            }
+            FilterType::ProjectedPredicate50Pct => {
+                "int64 < 50 projected predicate with fixed output"
+            }
+            FilterType::QuantityRangePredicate => "int64 > 0 AND int64 < 21",
+            FilterType::ClusteredTs20PctProjectedPredicate => {
+                "ts < 2000 projected predicate with utf8 output"
+            }
+            FilterType::ClusteredTs8PctProjectedPredicate => {
+                "ts < 800 projected predicate with utf8 output"
+            }
+            FilterType::SparseProjectedFactScan => "ts % 1000 == 0",
+        };
+        write!(f, "{s}")
+    }
+}
+
+impl FilterType {
+    /// Applies the specified filter on the given RecordBatch and returns a BooleanArray mask.
+    pub(crate) fn filter_batch(&self, batch: &RecordBatch) -> arrow::error::Result<BooleanArray> {
+        match self {
+            // Point Lookup on int64 column
+            FilterType::PointLookup => {
+                let array = batch.column(batch.schema().index_of("int64")?);
+                let scalar = Int64Array::new_scalar(9999);
+                eq(array, &scalar)
+            }
+            // Selective Unclustered on float64 column: float64 > 99.0
+            FilterType::SelectiveUnclustered => {
+                let array = batch.column(batch.schema().index_of("float64")?);
+                let scalar = Float64Array::new_scalar(99.0);
+                gt(array, &scalar)
+            }
+            // Moderately Selective Clustered on ts column: ts >= 9000 (implemented as > 8999)
+            FilterType::ModeratelySelectiveClustered => {
+                let array = batch.column(batch.schema().index_of("ts")?);
+                gt(array, &TimestampMillisecondArray::new_scalar(8999))
+            }
+            // Moderately Selective Unclustered on int64 column: int64 > 90
+            FilterType::ModeratelySelectiveUnclustered => {
+                let array = batch.column(batch.schema().index_of("int64")?);
+                let scalar = Int64Array::new_scalar(90);
+                gt(array, &scalar)
+            }
+            // Unselective Unclustered on float64 column: NOT (float64 > 99.0)
+            FilterType::UnselectiveUnclustered => {
+                let array = batch.column(batch.schema().index_of("float64")?);
+                lt_eq(array, &Float64Array::new_scalar(99.0))
+            }
+            // Unselective Clustered on ts column: ts < 9000
+            FilterType::UnselectiveClustered => {
+                let array = batch.column(batch.schema().index_of("ts")?);
+                lt(array, &TimestampMillisecondArray::new_scalar(9000))
+            }
+            // Composite filter: logical AND of (float64 > 99.0) and (ts >= 9000)
+            FilterType::Composite => {
+                let mask1 = FilterType::SelectiveUnclustered.filter_batch(batch)?;
+                let mask2 = FilterType::ModeratelySelectiveClustered.filter_batch(batch)?;
+                and(&mask1, &mask2)
+            }
+            // Utf8ViewNonEmpty: selects rows where the utf8View column is not an empty string.
+            FilterType::Utf8ViewNonEmpty => {
+                let array = batch.column(batch.schema().index_of("utf8View")?);
+                let scalar = StringViewArray::new_scalar("");
+                neq(array, &scalar)
+            }
+            FilterType::Utf8ViewMissing => {
+                let array = batch.column(batch.schema().index_of("utf8View")?);
+                let scalar = StringViewArray::new_scalar(UTF8_VIEW_MISSING_VALUE);
+                eq(array, &scalar)
+            }
+            // ScalarPrefixUtf8Output: a cheap fragmented scalar predicate
+            // evaluated before decoding a variable-width output column.
+            FilterType::ScalarPrefixUtf8Output => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                let counter_match = eq(int64, &Int64Array::new_scalar(62))?;
+                let date_like_range = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
+                and(&counter_match, &date_like_range)
+            }
+            FilterType::FixedThenVarWidthPredicates | FilterType::VarWidthThenFixedPredicates => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let utf8 = batch.column(batch.schema().index_of("utf8View")?);
+                let cheap_prefix = eq(int64, &Int64Array::new_scalar(9999))?;
+                let string_suffix = neq(utf8, &StringViewArray::new_scalar(""))?;
+                and(&cheap_prefix, &string_suffix)
+            }
+            FilterType::SparseScalarFixedOutput
+            | FilterType::SparseProjectedPredicatesFixedOutput => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                let counter_like = lt(int64, &Int64Array::new_scalar(8))?;
+                let date_like = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
+                and(&counter_like, &date_like)
+            }
+            FilterType::MultiScalarProjectedKey => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let float64 = batch.column(batch.schema().index_of("float64")?);
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                let counter_match = eq(int64, &Int64Array::new_scalar(62))?;
+                let width_match = gt(float64, &Float64Array::new_scalar(10.0))?;
+                let date_like = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
+                and(&and(&counter_match, &width_match)?, &date_like)
+            }
+            FilterType::ComplexOrMixedPredicates => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let float64 = batch.column(batch.schema().index_of("float64")?);
+                let utf8 = batch.column(batch.schema().index_of("utf8View")?);
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                let string_branch = and(
+                    &neq(utf8, &StringViewArray::new_scalar(""))?,
+                    &lt(int64, &Int64Array::new_scalar(8))?,
+                )?;
+                let scalar_branch = and(
+                    &lt(ts, &TimestampMillisecondArray::new_scalar(100))?,
+                    &gt(float64, &Float64Array::new_scalar(95.0))?,
+                )?;
+                or(&string_branch, &scalar_branch)
+            }
+            FilterType::ProjectedDynamicFilters => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                let item_like = lt(int64, &Int64Array::new_scalar(12))?;
+                let date_like = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
+                and(&item_like, &date_like)
+            }
+            FilterType::ProjectedPredicate1Pct
+            | FilterType::ProjectedPredicate5Pct
+            | FilterType::ProjectedPredicate8Pct
+            | FilterType::ProjectedPredicate10Pct
+            | FilterType::ProjectedPredicate20Pct
+            | FilterType::ProjectedPredicate30Pct
+            | FilterType::ProjectedPredicate40Pct
+            | FilterType::ProjectedPredicate50Pct => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let threshold = match self {
+                    FilterType::ProjectedPredicate1Pct => 1,
+                    FilterType::ProjectedPredicate5Pct => 5,
+                    FilterType::ProjectedPredicate8Pct => 8,
+                    FilterType::ProjectedPredicate10Pct => 10,
+                    FilterType::ProjectedPredicate20Pct => 20,
+                    FilterType::ProjectedPredicate30Pct => 30,
+                    FilterType::ProjectedPredicate40Pct => 40,
+                    FilterType::ProjectedPredicate50Pct => 50,
+                    _ => unreachable!(),
+                };
+                lt(int64, &Int64Array::new_scalar(threshold))
+            }
+            FilterType::QuantityRangePredicate => {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                let lower = gt(int64, &Int64Array::new_scalar(0))?;
+                let upper = lt(int64, &Int64Array::new_scalar(21))?;
+                and(&lower, &upper)
+            }
+            FilterType::ClusteredTs8PctProjectedPredicate => {
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                lt(ts, &TimestampMillisecondArray::new_scalar(800))
+            }
+            FilterType::ClusteredTs20PctProjectedPredicate => {
+                let ts = batch.column(batch.schema().index_of("ts")?);
+                lt(ts, &TimestampMillisecondArray::new_scalar(2000))
+            }
+            FilterType::SparseProjectedFactScan => {
+                let ts = batch
+                    .column(batch.schema().index_of("ts")?)
+                    .as_any()
+                    .downcast_ref::<TimestampMillisecondArray>()
+                    .unwrap();
+                Ok(BooleanArray::from(
+                    ts.values()
+                        .iter()
+                        .map(|value| value % 1000 == 0)
+                        .collect::<Vec<_>>(),
+                ))
+            }
+        }
+    }
+
+    /// Return the indexes in the batch's schema that are used for filtering.
+    fn filter_projection(&self) -> &'static [usize] {
+        match self {
+            FilterType::PointLookup => &[0],
+            FilterType::SelectiveUnclustered => &[1],
+            FilterType::ModeratelySelectiveClustered => &[3],
+            FilterType::ModeratelySelectiveUnclustered => &[0],
+            FilterType::UnselectiveUnclustered => &[1],
+            FilterType::UnselectiveClustered => &[3],
+            FilterType::Composite => &[1, 3], // Use float64 column and ts column as representative for composite
+            FilterType::Utf8ViewNonEmpty | FilterType::Utf8ViewMissing => &[2],
+            FilterType::ScalarPrefixUtf8Output => &[0, 3],
+            FilterType::FixedThenVarWidthPredicates | FilterType::VarWidthThenFixedPredicates => {
+                &[0, 2]
+            }
+            FilterType::MultiScalarProjectedKey => &[0, 1, 3],
+            FilterType::SparseScalarFixedOutput
+            | FilterType::ProjectedDynamicFilters
+            | FilterType::SparseProjectedPredicatesFixedOutput => &[0, 3],
+            FilterType::ComplexOrMixedPredicates => &[0, 1, 2, 3],
+            FilterType::ProjectedPredicate1Pct
+            | FilterType::ProjectedPredicate5Pct
+            | FilterType::ProjectedPredicate8Pct
+            | FilterType::ProjectedPredicate10Pct
+            | FilterType::ProjectedPredicate20Pct
+            | FilterType::ProjectedPredicate30Pct
+            | FilterType::ProjectedPredicate40Pct
+            | FilterType::ProjectedPredicate50Pct => &[0],
+            FilterType::QuantityRangePredicate => &[0],
+            FilterType::ClusteredTs8PctProjectedPredicate
+            | FilterType::ClusteredTs20PctProjectedPredicate => &[3],
+            FilterType::SparseProjectedFactScan => &[3],
+        }
+    }
+}
+
+/// Benchmark filters and projections by reading the Parquet file.
+/// This benchmark iterates over all individual filter types and two projection cases.
+/// It measures the time to read and filter the Parquet file according to each scenario.
+fn benchmark_filters_and_projections(c: &mut Criterion) {
+    // make the parquet file in memory that can be shared
+    let parquet_file = Bytes::from(write_parquet_file());
+    let filter_types = vec![
+        FilterType::PointLookup,
+        FilterType::SelectiveUnclustered,
+        FilterType::ModeratelySelectiveClustered,
+        FilterType::ModeratelySelectiveUnclustered,
+        FilterType::UnselectiveUnclustered,
+        FilterType::UnselectiveClustered,
+        FilterType::Utf8ViewNonEmpty,
+        FilterType::Composite,
+    ];
+    let projection_cases = vec![
+        ProjectionCase::AllColumns,
+        ProjectionCase::ExcludeFilterColumn,
+    ];
+
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()
+        .unwrap();
+
+    let mut group = c.benchmark_group("arrow_reader_row_filter");
+
+    for filter_type in filter_types {
+        for proj_case in &projection_cases {
+            let filter_col = filter_type.filter_projection().to_vec();
+            let output_projection = output_projection_for(filter_type, proj_case);
+
+            let reader = InMemoryReader::try_new(&parquet_file).unwrap();
+            let metadata = Arc::clone(reader.metadata());
+
+            let schema_descr = metadata.file_metadata().schema_descr();
+            let projection_mask = ProjectionMask::roots(schema_descr, output_projection.clone());
+            let pred_mask = ProjectionMask::roots(schema_descr, filter_col.clone());
+
+            let benchmark_name = format!("{filter_type}/{proj_case}",);
+
+            // run the benchmark for the async reader
+            let bench_id = BenchmarkId::new(benchmark_name.clone(), "async");
+            let rt_captured = rt.handle().clone();
+            group.bench_function(bench_id, |b| {
+                b.iter(|| {
+                    let reader = reader.clone();
+                    let pred_mask = pred_mask.clone();
+                    let projection_mask = projection_mask.clone();
+                    // row filters are not clone, so must make it each iter
+                    let filter = ArrowPredicateFn::new(pred_mask, move |batch: RecordBatch| {
+                        Ok(filter_type.filter_batch(&batch).unwrap())
+                    });
+                    let row_filter = RowFilter::new(vec![Box::new(filter)]);
+
+                    rt_captured.block_on(async {
+                        benchmark_async_reader(reader, projection_mask, row_filter).await;
+                    })
+                });
+            });
+
+            // run the benchmark for the sync reader
+            let bench_id = BenchmarkId::new(benchmark_name, "sync");
+            group.bench_function(bench_id, |b| {
+                b.iter(|| {
+                    let reader = reader.clone();
+                    let pred_mask = pred_mask.clone();
+                    let projection_mask = projection_mask.clone();
+                    // row filters are not clone, so must make it each iter
+                    let filter = ArrowPredicateFn::new(pred_mask, move |batch: RecordBatch| {
+                        Ok(filter_type.filter_batch(&batch).unwrap())
+                    });
+                    let row_filter = RowFilter::new(vec![Box::new(filter)]);
+
+                    benchmark_sync_reader(reader, projection_mask, row_filter)
+                });
+            });
+        }
+    }
+}
+
+/// Compare full scan plus post-filtering against row-level pushdown strategies.
+///
+/// This group is intentionally sync-only and smaller than
+/// [`benchmark_filters_and_projections`]. It tracks the cases most likely to
+/// inform a future default `Auto` policy: selective random filters, clustered
+/// filters, ClickBench-like string filters, and the forced selector strategy
+/// that originally motivated apache/arrow-rs#8565.
+fn benchmark_sync_strategy_matrix(c: &mut Criterion) {
+    let parquet_file = Bytes::from(write_parquet_file());
+    let filter_types = [
+        FilterType::SelectiveUnclustered,
+        FilterType::ModeratelySelectiveClustered,
+        FilterType::ModeratelySelectiveUnclustered,
+        FilterType::Utf8ViewNonEmpty,
+    ];
+    let strategies = [
+        SyncStrategy::FullPostFilter,
+        SyncStrategy::PushdownAuto,
+        SyncStrategy::PushdownSelectors,
+        SyncStrategy::PushdownMask,
+    ];
+
+    let mut group = c.benchmark_group("arrow_reader_row_filter_strategy_matrix");
+
+    for filter_type in filter_types {
+        for projection_case in [
+            ProjectionCase::AllColumns,
+            ProjectionCase::ExcludeFilterColumn,
+        ] {
+            let reader = InMemoryReader::try_new(&parquet_file).unwrap();
+            let metadata = Arc::clone(reader.metadata());
+            let schema_descr = metadata.file_metadata().schema_descr();
+            let output_projection = output_projection_for(filter_type, &projection_case);
+            let read_projection = full_post_filter_read_projection(filter_type, &output_projection);
+            let output_column_names = projection_names(&output_projection);
+            let projection_mask = ProjectionMask::roots(schema_descr, output_projection);
+            let read_projection_mask = ProjectionMask::roots(schema_descr, read_projection);
+            let pred_mask = ProjectionMask::roots(
+                schema_descr,
+                filter_type.filter_projection().iter().copied(),
+            );
+
+            for strategy in strategies {
+                let bench_id = BenchmarkId::new(
+                    format!("{filter_type}/{projection_case}"),
+                    strategy.to_string(),
+                );
+
+                group.bench_function(bench_id, |b| {
+                    b.iter(|| {
+                        let reader = reader.clone();
+                        let pred_mask = pred_mask.clone();
+                        let projection_mask = projection_mask.clone();
+                        let read_projection_mask = read_projection_mask.clone();
+                        let output_column_names = output_column_names.clone();
+
+                        match strategy {
+                            SyncStrategy::FullPostFilter => benchmark_sync_reader_post_filter(
+                                reader,
+                                read_projection_mask,
+                                output_column_names,
+                                filter_type,
+                            ),
+                            SyncStrategy::PushdownAuto => {
+                                let row_filter = row_filter_for(filter_type, pred_mask);
+                                benchmark_sync_reader_with_policy(
+                                    reader,
+                                    projection_mask,
+                                    row_filter,
+                                    RowSelectionPolicy::default(),
+                                )
+                            }
+                            SyncStrategy::PushdownSelectors => {
+                                let row_filter = row_filter_for(filter_type, pred_mask);
+                                benchmark_sync_reader_with_policy(
+                                    reader,
+                                    projection_mask,
+                                    row_filter,
+                                    RowSelectionPolicy::Selectors,
+                                )
+                            }
+                            SyncStrategy::PushdownMask => {
+                                let row_filter = row_filter_for(filter_type, pred_mask);
+                                benchmark_sync_reader_with_policy(
+                                    reader,
+                                    projection_mask,
+                                    row_filter,
+                                    RowSelectionPolicy::Mask,
+                                )
+                            }
+                        }
+                    });
+                });
+            }
+        }
+    }
+}
+
+/// Compare async full scan plus post-filtering against async row-level pushdown
+/// strategies. This is the matrix that exercises the current reader `Auto`
+/// policy through the async stream backed by the push decoder row-group pipeline.
+fn benchmark_async_strategy_matrix(c: &mut Criterion) {
+    let parquet_file = Bytes::from(write_parquet_file());
+    let filter_types = [
+        FilterType::SelectiveUnclustered,
+        FilterType::ModeratelySelectiveClustered,
+        FilterType::ModeratelySelectiveUnclustered,
+        FilterType::Utf8ViewNonEmpty,
+    ];
+    let strategies = [
+        AsyncStrategy::FullPostFilter,
+        AsyncStrategy::PushdownAuto,
+        AsyncStrategy::PushdownSelectors,
+        AsyncStrategy::PushdownMask,
+    ];
+
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()
+        .unwrap();
+
+    let mut group = c.benchmark_group("arrow_reader_row_filter_async_strategy_matrix");
+
+    for filter_type in filter_types {
+        for projection_case in [
+            ProjectionCase::AllColumns,
+            ProjectionCase::ExcludeFilterColumn,
+        ] {
+            let reader = InMemoryReader::try_new(&parquet_file).unwrap();
+            let metadata = Arc::clone(reader.metadata());
+            let schema_descr = metadata.file_metadata().schema_descr();
+            let output_projection = output_projection_for(filter_type, &projection_case);
+            let read_projection = full_post_filter_read_projection(filter_type, &output_projection);
+            let output_column_names = projection_names(&output_projection);
+            let projection_mask = ProjectionMask::roots(schema_descr, output_projection);
+            let read_projection_mask = ProjectionMask::roots(schema_descr, read_projection);
+            let pred_mask = ProjectionMask::roots(
+                schema_descr,
+                filter_type.filter_projection().iter().copied(),
+            );
+
+            for strategy in strategies {
+                let bench_id = BenchmarkId::new(
+                    format!("{filter_type}/{projection_case}"),
+                    strategy.to_string(),
+                );
+                let rt_captured = rt.handle().clone();
+
+                group.bench_function(bench_id, |b| {
+                    b.iter(|| {
+                        let reader = reader.clone();
+                        let pred_mask = pred_mask.clone();
+                        let projection_mask = projection_mask.clone();
+                        let read_projection_mask = read_projection_mask.clone();
+                        let output_column_names = output_column_names.clone();
+
+                        rt_captured.block_on(async {
+                            match strategy {
+                                AsyncStrategy::FullPostFilter => {
+                                    benchmark_async_reader_post_filter(
+                                        reader,
+                                        read_projection_mask,
+                                        output_column_names,
+                                        filter_type,
+                                    )
+                                    .await
+                                }
+                                AsyncStrategy::PushdownAuto => {
+                                    let row_filter = row_filter_for(filter_type, pred_mask);
+                                    benchmark_async_reader_with_policy(
+                                        reader,
+                                        projection_mask,
+                                        row_filter,
+                                        RowSelectionPolicy::default(),
+                                    )
+                                    .await
+                                }
+                                AsyncStrategy::PushdownSelectors => {
+                                    let row_filter = row_filter_for(filter_type, pred_mask);
+                                    benchmark_async_reader_with_policy(
+                                        reader,
+                                        projection_mask,
+                                        row_filter,
+                                        RowSelectionPolicy::Selectors,
+                                    )
+                                    .await
+                                }
+                                AsyncStrategy::PushdownMask => {
+                                    let row_filter = row_filter_for(filter_type, pred_mask);
+                                    benchmark_async_reader_with_policy(
+                                        reader,
+                                        projection_mask,
+                                        row_filter,
+                                        RowSelectionPolicy::Mask,
+                                    )
+                                    .await
+                                }
+                            }
+                        })
+                    });
+                });
+            }
+        }
+    }
+}
+
+/// A focused async-only matrix that isolates the cases most relevant to the
+/// row-filter Auto policy. This is intentionally narrower than
+/// [`benchmark_async_strategy_matrix`]: it keeps the benchmark output focused
+/// on cases where later PRs may teach `Auto` to switch execution modes or
+/// explicitly keep predicate pushdown.
+///
+/// The cases use structure-oriented names. Comments on [`FilterType`] keep the
+/// ClickBench and TPC-DS provenance, but these are synthetic reader shapes, not
+/// end-to-end query benchmarks.
+///
+/// Coverage is organized by reader-level dimensions instead of individual
+/// queries:
+/// - selection shape: point lookup, sparse fragmented, moderate fragmented,
+///   dense fragmented, and clustered ranges.
+/// - output relationship: filter-only, count-only, deferred fixed-width,
+///   deferred variable-width, and projected predicate columns.
+/// - predicate shape: single scalar, scalar conjunctions, scalar plus
+///   variable-width predicates, mixed OR predicates, and dynamic-filter-like
+///   projected predicates.
+/// - policy boundary: strategy rows compare full post-filtering with `Auto`,
+///   forced selectors, and forced masks for every shape.
+///
+/// Individual [`FilterType`] variants include shaded-row diagrams for the
+/// representative selection shapes.
+fn benchmark_async_auto_policy_focus(c: &mut Criterion) {
+    const SMALL_TOTAL_ROWS: usize = 20_000;
+    const SMALL_ROW_GROUP_SIZE: usize = 5_000;
+
+    let parquet_file = Bytes::from(write_parquet_file());
+    let small_parquet_file = Bytes::from(write_parquet_file_with_rows(
+        SMALL_TOTAL_ROWS,
+        SMALL_ROW_GROUP_SIZE,
+    ));
+    let mut cases = Vec::new();
+    push_baseline_selectivity_cases(&mut cases, &parquet_file);
+    push_filter_only_cases(&mut cases, &parquet_file, &small_parquet_file);
+    push_deferred_output_cases(&mut cases, &parquet_file, &small_parquet_file);
+    push_predicate_cost_cases(&mut cases, &parquet_file);
+    push_projected_predicate_cases(&mut cases, &parquet_file);
+
+    let strategies = [
+        AsyncStrategy::FullPostFilter,
+        AsyncStrategy::PushdownAuto,
+        AsyncStrategy::PushdownMask,
+        AsyncStrategy::PushdownSelectors,
+    ];
+
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()
+        .unwrap();
+
+    let mut group = c.benchmark_group("arrow_reader_row_filter_async_auto_policy_focus");
+
+    for case in cases {
+        benchmark_async_focus_case(&mut group, &rt, case, &strategies);
+    }
+}
+
+fn push_focus_cases(
+    cases: &mut Vec<AsyncFocusCase>,
+    parquet_file: &Bytes,
+    specs: &[(&'static str, FilterType, ProjectionCase)],
+) {
+    cases.extend(
+        specs
+            .iter()
+            .copied()
+            .map(|(case_name, filter_type, projection_case)| {
+                AsyncFocusCase::new(
+                    case_name,
+                    parquet_file.clone(),
+                    filter_type,
+                    projection_case,
+                )
+            }),
+    );
+}
+
+fn push_baseline_selectivity_cases(cases: &mut Vec<AsyncFocusCase>, parquet_file: &Bytes) {
+    push_focus_cases(
+        cases,
+        parquet_file,
+        &[
+            (
+                "utf8_non_empty",
+                FilterType::Utf8ViewNonEmpty,
+                ProjectionCase::ExcludeFilterColumn,
+            ),
+            (
+                "utf8_non_empty",
+                FilterType::Utf8ViewNonEmpty,
+                ProjectionCase::AllColumns,
+            ),
+            (
+                "high_selectivity_float64",
+                FilterType::UnselectiveUnclustered,
+                ProjectionCase::ExcludeFilterColumn,
+            ),
+            (
+                "high_selectivity_ts_clustered",
+                FilterType::UnselectiveClustered,
+                ProjectionCase::ExcludeFilterColumn,
+            ),
+            (
+                "fragmented_int64_10pct",
+                FilterType::ModeratelySelectiveUnclustered,
+                ProjectionCase::ExcludeFilterColumn,
+            ),
+            (
+                "selective_float64_1pct",
+                FilterType::SelectiveUnclustered,
+                ProjectionCase::ExcludeFilterColumn,
+            ),
+        ],
+    );
+}
+
+fn push_filter_only_cases(
+    cases: &mut Vec<AsyncFocusCase>,
+    parquet_file: &Bytes,
+    small_parquet_file: &Bytes,
+) {
+    push_focus_cases(
+        cases,
+        parquet_file,
+        &[
+            (
+                "point_lookup_filter_only",
+                FilterType::PointLookup,
+                ProjectionCase::FilterColumnsOnly,
+            ),
+            (
+                "projected_predicate_8pct_filter_only",
+                FilterType::ProjectedPredicate8Pct,
+                ProjectionCase::FilterColumnsOnly,
+            ),
+            (
+                "sparse_scalar_count_only",
+                FilterType::SparseScalarFixedOutput,
+                ProjectionCase::CountOnly,
+            ),
+            (
+                "quantity_range_filter_columns_only",
+                FilterType::QuantityRangePredicate,
+                ProjectionCase::FilterColumnsOnly,
+            ),
+        ],
+    );
+    push_focus_cases(
+        cases,
+        small_parquet_file,
+        &[(
+            "small_fragmented_scalar_filter_only",
+            FilterType::ModeratelySelectiveUnclustered,
+            ProjectionCase::FilterColumnsOnly,
+        )],
+    );
+}
+
+fn push_deferred_output_cases(
+    cases: &mut Vec<AsyncFocusCase>,
+    parquet_file: &Bytes,
+    small_parquet_file: &Bytes,
+) {
+    push_focus_cases(
+        cases,
+        parquet_file,
+        &[
+            (
+                "scalar_prefix_utf8_output",
+                FilterType::ScalarPrefixUtf8Output,
+                ProjectionCase::Utf8Only,
+            ),
+            (
+                "point_lookup_deferred_fixed_output",
+                FilterType::PointLookup,
+                ProjectionCase::Float64Only,
+            ),
+            (
+                "sparse_scalar_fixed_output",
+                FilterType::SparseScalarFixedOutput,
+                ProjectionCase::Float64Only,
+            ),
+            (
+                "quantity_range_numeric_output",
+                FilterType::QuantityRangePredicate,
+                ProjectionCase::Float64Only,
+            ),
+        ],
+    );
+    push_focus_cases(
+        cases,
+        small_parquet_file,
+        &[(
+            "small_scalar_prefix_utf8_output",
+            FilterType::ScalarPrefixUtf8Output,
+            ProjectionCase::Utf8Only,
+        )],
+    );
+}
+
+fn push_predicate_cost_cases(cases: &mut Vec<AsyncFocusCase>, parquet_file: &Bytes) {
+    push_focus_cases(
+        cases,
+        parquet_file,
+        &[
+            (
+                "fixed_then_varwidth_predicates",
+                FilterType::FixedThenVarWidthPredicates,
+                ProjectionCase::Float64Only,
+            ),
+            (
+                "varwidth_then_fixed_predicates",
+                FilterType::VarWidthThenFixedPredicates,
+                ProjectionCase::Float64Only,
+            ),
+            (
+                "multi_scalar_projected_key",
+                FilterType::MultiScalarProjectedKey,
+                ProjectionCase::Float64AndTs,
+            ),
+            (
+                "complex_or_mixed_predicates",
+                FilterType::ComplexOrMixedPredicates,
+                ProjectionCase::Float64Only,
+            ),
+        ],
+    );
+}
+
+fn push_projected_predicate_cases(cases: &mut Vec<AsyncFocusCase>, parquet_file: &Bytes) {
+    // Projected-predicate shapes. The predicate column is also projected, so
+    // pushdown must not assume the predicate decode is purely overhead.
+    push_focus_cases(
+        cases,
+        parquet_file,
+        &[
+            (
+                "projected_dynamic_filters",
+                FilterType::ProjectedDynamicFilters,
+                ProjectionCase::FixedColumns,
+            ),
+            (
+                "sparse_projected_predicates_fixed_output",
+                FilterType::SparseProjectedPredicatesFixedOutput,
+                ProjectionCase::FixedColumns,
+            ),
+        ],
+    );
+
+    push_projected_predicate_sweep(cases, parquet_file);
+    push_clustered_projected_predicate_cases(cases, parquet_file);
+    push_focus_cases(
+        cases,
+        parquet_file,
+        &[
+            (
+                "sparse_projected_fact_scan",
+                FilterType::SparseProjectedFactScan,
+                ProjectionCase::FixedColumns,
+            ),
+            (
+                "sparse_utf8_projected_predicate",
+                FilterType::Utf8ViewMissing,
+                ProjectionCase::AllColumns,
+            ),
+        ],
+    );
+}
+
+fn push_projected_predicate_sweep(cases: &mut Vec<AsyncFocusCase>, parquet_file: &Bytes) {
+    // The fixed-output sweep anchors the post-filter shortcut across
+    // fragmented selectivity. Variable-width guardrails make the deferred-output
+    // cost boundary explicit without expanding the full Cartesian product.
+    push_focus_cases(
+        cases,
+        parquet_file,
+        &[
+            (
+                "projected_predicate_1pct_fixed_output",
+                FilterType::ProjectedPredicate1Pct,
+                ProjectionCase::Int64AndFloat64,
+            ),
+            (
+                "projected_predicate_5pct_fixed_output",
+                FilterType::ProjectedPredicate5Pct,
+                ProjectionCase::Int64AndFloat64,
+            ),
+            (
+                "projected_predicate_8pct_fixed_output",
+                FilterType::ProjectedPredicate8Pct,
+                ProjectionCase::Int64AndFloat64,
+            ),
+            (
+                "projected_predicate_10pct_fixed_output",
+                FilterType::ProjectedPredicate10Pct,
+                ProjectionCase::Int64AndFloat64,
+            ),
+            (
+                "projected_predicate_20pct_fixed_output",
+                FilterType::ProjectedPredicate20Pct,
+                ProjectionCase::Int64AndFloat64,
+            ),
+            (
+                "projected_predicate_30pct_fixed_output",
+                FilterType::ProjectedPredicate30Pct,
+                ProjectionCase::Int64AndFloat64,
+            ),
+            (
+                "projected_predicate_40pct_fixed_output",
+                FilterType::ProjectedPredicate40Pct,
+                ProjectionCase::Int64AndFloat64,
+            ),
+            (
+                "projected_predicate_50pct_fixed_output",
+                FilterType::ProjectedPredicate50Pct,
+                ProjectionCase::Int64AndFloat64,
+            ),
+            (
+                "projected_predicate_1pct_varwidth_output",
+                FilterType::ProjectedPredicate1Pct,
+                ProjectionCase::Int64AndUtf8,
+            ),
+            (
+                "projected_predicate_8pct_varwidth_output",
+                FilterType::ProjectedPredicate8Pct,
+                ProjectionCase::Int64AndUtf8,
+            ),
+            (
+                "projected_predicate_20pct_varwidth_output",
+                FilterType::ProjectedPredicate20Pct,
+                ProjectionCase::Int64AndUtf8,
+            ),
+            (
+                "projected_predicate_40pct_varwidth_output",
+                FilterType::ProjectedPredicate40Pct,
+                ProjectionCase::Int64AndUtf8,
+            ),
+        ],
+    );
+}
+
+fn push_clustered_projected_predicate_cases(cases: &mut Vec<AsyncFocusCase>, parquet_file: &Bytes) {
+    push_focus_cases(
+        cases,
+        parquet_file,
+        &[
+            (
+                "clustered_ts_8pct_fixed_output",
+                FilterType::ClusteredTs8PctProjectedPredicate,
+                ProjectionCase::Float64AndTs,
+            ),
+            (
+                "clustered_ts_8pct_varwidth_output",
+                FilterType::ClusteredTs8PctProjectedPredicate,
+                ProjectionCase::TsAndUtf8,
+            ),
+            (
+                "clustered_ts_20pct_fixed_output",
+                FilterType::ClusteredTs20PctProjectedPredicate,
+                ProjectionCase::Float64AndTs,
+            ),
+            (
+                "clustered_ts_20pct_varwidth_output",
+                FilterType::ClusteredTs20PctProjectedPredicate,
+                ProjectionCase::TsAndUtf8,
+            ),
+        ],
+    );
+}
+
+/// Isolate projected scans that do not construct a [`RowFilter`].
+///
+/// This tracks the reader-level shape seen in TPC-DS Q83 return-table scans:
+/// a narrow primitive projection where row-level pushdown metrics are zero.
+/// It deliberately lives outside the adaptive-materialization matrix because there is no
+/// filter strategy to choose.
+///
+/// ```text
+/// no RowFilter             projected primitive columns
+/// ┌───────────────┐    ┌───────────────┐
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// │      ...      │    │      ...      │
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+/// └───────────────┘    └───────────────┘
+/// ```
+fn benchmark_projection_scan_focus(c: &mut Criterion) {
+    let parquet_file = Bytes::from(write_parquet_file());
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()
+        .unwrap();
+
+    let mut group = c.benchmark_group("arrow_reader_projection_scan_focus");
+
+    let case_name = "primitive_projection_only";
+    let projection = vec![0, 1, 3];
+    let reader = InMemoryReader::try_new(&parquet_file).unwrap();
+    let metadata = Arc::clone(reader.metadata());
+    let schema_descr = metadata.file_metadata().schema_descr();
+    let projection_mask = ProjectionMask::roots(schema_descr, projection);
+
+    let bench_id = BenchmarkId::new(case_name, "async");
+    let rt_captured = rt.handle().clone();
+    group.bench_function(bench_id, |b| {
+        b.iter(|| {
+            let reader = reader.clone();
+            let projection_mask = projection_mask.clone();
+            rt_captured.block_on(benchmark_async_reader_projected(reader, projection_mask));
+        });
+    });
+
+    let bench_id = BenchmarkId::new(case_name, "sync");
+    group.bench_function(bench_id, |b| {
+        b.iter(|| {
+            let reader = reader.clone();
+            let projection_mask = projection_mask.clone();
+            benchmark_sync_reader_projected(reader, projection_mask);
+        });
+    });
+}
+
+struct AsyncFocusCase {
+    case_name: &'static str,
+    parquet_file: Bytes,
+    filter_type: FilterType,
+    projection_case: ProjectionCase,
+}
+
+impl AsyncFocusCase {
+    fn new(
+        case_name: &'static str,
+        parquet_file: Bytes,
+        filter_type: FilterType,
+        projection_case: ProjectionCase,
+    ) -> Self {
+        Self {
+            case_name,
+            parquet_file,
+            filter_type,
+            projection_case,
+        }
+    }
+}
+
+fn benchmark_async_focus_case(
+    group: &mut BenchmarkGroup<'_, WallTime>,
+    rt: &tokio::runtime::Runtime,
+    case: AsyncFocusCase,
+    strategies: &[AsyncStrategy],
+) {
+    let AsyncFocusCase {
+        case_name,
+        parquet_file,
+        filter_type,
+        projection_case,
+    } = case;
+
+    let reader = InMemoryReader::try_new(&parquet_file).unwrap();
+    let metadata = Arc::clone(reader.metadata());
+    let schema_descr = metadata.file_metadata().schema_descr();
+    let output_projection = output_projection_for(filter_type, &projection_case);
+    let read_projection = full_post_filter_read_projection(filter_type, &output_projection);
+    let output_column_names = projection_names(&output_projection);
+    let projection_mask = ProjectionMask::roots(schema_descr, output_projection);
+    let read_projection_mask = ProjectionMask::roots(schema_descr, read_projection);
+    let pred_mask = ProjectionMask::roots(
+        schema_descr,
+        filter_type.filter_projection().iter().copied(),
+    );
+    let fixed_pred_mask = ProjectionMask::roots(schema_descr, [0]);
+    let varwidth_pred_mask = ProjectionMask::roots(schema_descr, [2]);
+    let sparse_int64_pred_mask = ProjectionMask::roots(schema_descr, [0]);
+    let sparse_ts_pred_mask = ProjectionMask::roots(schema_descr, [3]);
+    let scalar_float64_pred_mask = ProjectionMask::roots(schema_descr, [1]);
+
+    for strategy in strategies.iter().copied() {
+        let bench_id = BenchmarkId::new(
+            format!("{case_name}/{projection_case}"),
+            strategy.to_string(),
+        );
+        let rt_captured = rt.handle().clone();
+
+        group.bench_function(bench_id, |b| {
+            b.iter(|| {
+                let reader = reader.clone();
+                let pred_mask = pred_mask.clone();
+                let fixed_pred_mask = fixed_pred_mask.clone();
+                let varwidth_pred_mask = varwidth_pred_mask.clone();
+                let sparse_int64_pred_mask = sparse_int64_pred_mask.clone();
+                let sparse_ts_pred_mask = sparse_ts_pred_mask.clone();
+                let scalar_float64_pred_mask = scalar_float64_pred_mask.clone();
+                let projection_mask = projection_mask.clone();
+                let read_projection_mask = read_projection_mask.clone();
+                let output_column_names = output_column_names.clone();
+
+                rt_captured.block_on(async {
+                    match strategy {
+                        AsyncStrategy::FullPostFilter => {
+                            benchmark_async_reader_post_filter(
+                                reader,
+                                read_projection_mask,
+                                output_column_names,
+                                filter_type,
+                            )
+                            .await
+                        }
+                        AsyncStrategy::PushdownAuto => {
+                            let row_filter = row_filter_for_focus_case(
+                                filter_type,
+                                pred_mask,
+                                fixed_pred_mask,
+                                varwidth_pred_mask,
+                                sparse_int64_pred_mask,
+                                sparse_ts_pred_mask,
+                                scalar_float64_pred_mask,
+                            );
+                            benchmark_async_reader_with_policy(
+                                reader,
+                                projection_mask,
+                                row_filter,
+                                RowSelectionPolicy::default(),
+                            )
+                            .await
+                        }
+                        AsyncStrategy::PushdownSelectors => {
+                            let row_filter = row_filter_for_focus_case(
+                                filter_type,
+                                pred_mask,
+                                fixed_pred_mask,
+                                varwidth_pred_mask,
+                                sparse_int64_pred_mask,
+                                sparse_ts_pred_mask,
+                                scalar_float64_pred_mask,
+                            );
+                            benchmark_async_reader_with_policy(
+                                reader,
+                                projection_mask,
+                                row_filter,
+                                RowSelectionPolicy::Selectors,
+                            )
+                            .await
+                        }
+                        AsyncStrategy::PushdownMask => {
+                            let row_filter = row_filter_for_focus_case(
+                                filter_type,
+                                pred_mask,
+                                fixed_pred_mask,
+                                varwidth_pred_mask,
+                                sparse_int64_pred_mask,
+                                sparse_ts_pred_mask,
+                                scalar_float64_pred_mask,
+                            );
+                            benchmark_async_reader_with_policy(
+                                reader,
+                                projection_mask,
+                                row_filter,
+                                RowSelectionPolicy::Mask,
+                            )
+                            .await
+                        }
+                    }
+                })
+            });
+        });
+    }
+}
+
+fn output_projection_for(filter_type: FilterType, projection_case: &ProjectionCase) -> Vec<usize> {
+    let filter_columns = filter_type.filter_projection();
+    match projection_case {
+        ProjectionCase::AllColumns | ProjectionCase::ExcludeFilterColumn => COLUMN_NAMES
+            .iter()
+            .enumerate()
+            .map(|(idx, _)| idx)
+            .filter(move |idx| {
+                matches!(projection_case, ProjectionCase::AllColumns)
+                    || !filter_columns.contains(idx)
+            })
+            .collect(),
+        ProjectionCase::FilterColumnsOnly => filter_columns.to_vec(),
+        ProjectionCase::CountOnly => vec![],
+        ProjectionCase::FixedColumns => vec![0, 1, 3],
+        ProjectionCase::Float64AndTs => vec![1, 3],
+        ProjectionCase::Float64Only => vec![1],
+        ProjectionCase::Int64AndFloat64 => vec![0, 1],
+        ProjectionCase::Int64AndUtf8 => vec![0, 2],
+        ProjectionCase::TsAndUtf8 => vec![2, 3],
+        ProjectionCase::Utf8Only => vec![2],
+    }
+}
+
+fn full_post_filter_read_projection(
+    filter_type: FilterType,
+    output_projection: &[usize],
+) -> Vec<usize> {
+    let mut read_projection = output_projection.to_vec();
+    for filter_idx in filter_type.filter_projection() {
+        if !read_projection.contains(filter_idx) {
+            read_projection.push(*filter_idx);
+        }
+    }
+    read_projection.sort_unstable();
+    read_projection
+}
+
+fn projection_names(projection: &[usize]) -> Vec<&'static str> {
+    projection.iter().map(|idx| COLUMN_NAMES[*idx]).collect()
+}
+
+pub(crate) fn filter_projected_record_batch(
+    batch: &RecordBatch,
+    filter: &BooleanArray,
+    output_column_names: &[&str],
+) -> arrow::error::Result<RecordBatch> {
+    let output_projection = output_column_names
+        .iter()
+        .map(|name| batch.schema().index_of(name))
+        .collect::<arrow::error::Result<Vec<_>>>()?;
+    let output = batch.project(&output_projection)?;
+    arrow_select::filter::filter_record_batch(&output, filter)
+}
+
+pub(crate) fn post_filter_projected_num_rows(
+    batch: &RecordBatch,
+    filter: &BooleanArray,
+    output_column_names: &[&str],
+) -> arrow::error::Result<usize> {
+    if output_column_names.is_empty() {
+        return Ok(filter.true_count());
+    }
+
+    let output = filter_projected_record_batch(batch, filter, output_column_names)?;
+    Ok(output.num_rows())
+}
+
+fn row_filter_for(filter_type: FilterType, pred_mask: ProjectionMask) -> RowFilter {
+    let filter = ArrowPredicateFn::new(pred_mask, move |batch| filter_type.filter_batch(&batch));
+    RowFilter::new(vec![Box::new(filter)])
+}
+
+fn row_filter_for_focus_case(
+    filter_type: FilterType,
+    pred_mask: ProjectionMask,
+    fixed_pred_mask: ProjectionMask,
+    varwidth_pred_mask: ProjectionMask,
+    sparse_int64_pred_mask: ProjectionMask,
+    sparse_ts_pred_mask: ProjectionMask,
+    scalar_float64_pred_mask: ProjectionMask,
+) -> RowFilter {
+    match filter_type {
+        FilterType::FixedThenVarWidthPredicates | FilterType::VarWidthThenFixedPredicates => {
+            let int64_filter = ArrowPredicateFn::new(fixed_pred_mask, move |batch: RecordBatch| {
+                let int64 = batch.column(batch.schema().index_of("int64")?);
+                eq(int64, &Int64Array::new_scalar(9999))
+            });
+            let utf8_filter =
+                ArrowPredicateFn::new(varwidth_pred_mask, move |batch: RecordBatch| {
+                    let utf8 = batch.column(batch.schema().index_of("utf8View")?);
+                    neq(utf8, &StringViewArray::new_scalar(""))
+                });
+
+            match filter_type {
+                FilterType::FixedThenVarWidthPredicates => {
+                    RowFilter::new(vec![Box::new(int64_filter), Box::new(utf8_filter)])
+                }
+                FilterType::VarWidthThenFixedPredicates => {
+                    RowFilter::new(vec![Box::new(utf8_filter), Box::new(int64_filter)])
+                }
+                _ => unreachable!(),
+            }
+        }
+        FilterType::MultiScalarProjectedKey => {
+            let int64_filter =
+                ArrowPredicateFn::new(sparse_int64_pred_mask, move |batch: RecordBatch| {
+                    let int64 = batch.column(batch.schema().index_of("int64")?);
+                    eq(int64, &Int64Array::new_scalar(62))
+                });
+            let float64_filter =
+                ArrowPredicateFn::new(scalar_float64_pred_mask, move |batch: RecordBatch| {
+                    let float64 = batch.column(batch.schema().index_of("float64")?);
+                    gt(float64, &Float64Array::new_scalar(10.0))
+                });
+            let ts_filter =
+                ArrowPredicateFn::new(sparse_ts_pred_mask, move |batch: RecordBatch| {
+                    let ts = batch.column(batch.schema().index_of("ts")?);
+                    lt(ts, &TimestampMillisecondArray::new_scalar(9000))
+                });
+
+            RowFilter::new(vec![
+                Box::new(int64_filter),
+                Box::new(float64_filter),
+                Box::new(ts_filter),
+            ])
+        }
+        FilterType::SparseScalarFixedOutput
+        | FilterType::ProjectedDynamicFilters
+        | FilterType::SparseProjectedPredicatesFixedOutput => {
+            let int64_filter =
+                ArrowPredicateFn::new(sparse_int64_pred_mask, move |batch: RecordBatch| {
+                    let int64 = batch.column(batch.schema().index_of("int64")?);
+                    let scalar = match filter_type {
+                        FilterType::ProjectedDynamicFilters => 12,
+                        _ => 8,
+                    };
+                    lt(int64, &Int64Array::new_scalar(scalar))
+                });
+            let ts_filter =
+                ArrowPredicateFn::new(sparse_ts_pred_mask, move |batch: RecordBatch| {
+                    let ts = batch.column(batch.schema().index_of("ts")?);
+                    lt(ts, &TimestampMillisecondArray::new_scalar(9000))
+                });
+
+            RowFilter::new(vec![Box::new(int64_filter), Box::new(ts_filter)])
+        }
+        _ => row_filter_for(filter_type, pred_mask),
+    }
+}
+
+#[derive(Clone, Copy)]
+enum NestedFilterType {
+    AlwaysTrueTag,
+    TagNotZero,
+}
+
+impl std::fmt::Display for NestedFilterType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::AlwaysTrueTag => write!(f, "always_true_tag"),
+            Self::TagNotZero => write!(f, "tag_not_zero"),
+        }
+    }
+}
+
+impl NestedFilterType {
+    fn filter_batch(self, batch: &RecordBatch) -> arrow::error::Result<BooleanArray> {
+        match self {
+            Self::AlwaysTrueTag => Ok(BooleanArray::from(vec![true; batch.num_rows()])),
+            Self::TagNotZero => {
+                let tag = batch.column(batch.schema().index_of("tag")?);
+                let scalar = StringViewArray::new_scalar("tag_0");
+                neq(tag, &scalar)
+            }
+        }
+    }
+}
+
+fn nested_row_filter_for(filter_type: NestedFilterType, pred_mask: ProjectionMask) -> RowFilter {
+    let filter = ArrowPredicateFn::new(pred_mask, move |batch| filter_type.filter_batch(&batch));
+    RowFilter::new(vec![Box::new(filter)])
+}
+
+/// Use async API
+async fn benchmark_async_reader(
+    reader: InMemoryReader,
+    projection_mask: ProjectionMask,
+    row_filter: RowFilter,
+) {
+    let mut stream = ParquetRecordBatchStreamBuilder::new(reader)
+        .await
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(projection_mask)
+        .with_row_filter(row_filter)
+        .build()
+        .unwrap();
+    while let Some(b) = stream.next().await {
+        b.unwrap(); // consume the batches, no buffering
+    }
+}
+
+async fn benchmark_async_reader_with_policy(
+    reader: InMemoryReader,
+    projection_mask: ProjectionMask,
+    row_filter: RowFilter,
+    row_selection_policy: RowSelectionPolicy,
+) {
+    let mut stream = ParquetRecordBatchStreamBuilder::new(reader)
+        .await
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(projection_mask)
+        .with_row_filter(row_filter)
+        .with_row_selection_policy(row_selection_policy)
+        .build()
+        .unwrap();
+    while let Some(b) = stream.next().await {
+        b.unwrap(); // consume the batches, no buffering
+    }
+}
+
+async fn benchmark_async_reader_post_filter(
+    reader: InMemoryReader,
+    read_projection: ProjectionMask,
+    output_column_names: Vec<&'static str>,
+    filter_type: FilterType,
+) {
+    let mut stream = ParquetRecordBatchStreamBuilder::new(reader)
+        .await
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(read_projection)
+        .build()
+        .unwrap();
+
+    while let Some(b) = stream.next().await {
+        let batch = b.unwrap();
+        let filter = filter_type.filter_batch(&batch).unwrap();
+        let output_rows =
+            post_filter_projected_num_rows(&batch, &filter, &output_column_names).unwrap();
+        std::hint::black_box(output_rows);
+    }
+}
+
+async fn benchmark_async_reader_post_filter_nested(
+    reader: InMemoryReader,
+    read_projection: ProjectionMask,
+    output_column_names: &[&str],
+    filter_type: NestedFilterType,
+) {
+    let mut stream = ParquetRecordBatchStreamBuilder::new(reader)
+        .await
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(read_projection)
+        .build()
+        .unwrap();
+
+    while let Some(b) = stream.next().await {
+        let batch = b.unwrap();
+        let filter = filter_type.filter_batch(&batch).unwrap();
+        let output_rows =
+            post_filter_projected_num_rows(&batch, &filter, output_column_names).unwrap();
+        std::hint::black_box(output_rows);
+    }
+}
+
+async fn benchmark_async_reader_projected(reader: InMemoryReader, projection_mask: ProjectionMask) {
+    let mut stream = ParquetRecordBatchStreamBuilder::new(reader)
+        .await
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(projection_mask)
+        .build()
+        .unwrap();
+    while let Some(b) = stream.next().await {
+        let batch = b.unwrap();
+        std::hint::black_box(batch.num_rows());
+    }
+}
+
+/// Like [`benchmark_async_reader`] but also threads `with_limit(limit)` into
+/// the stream builder. Used by the `LIMIT` benchmark below.
+async fn benchmark_async_reader_with_limit(
+    reader: InMemoryReader,
+    projection_mask: ProjectionMask,
+    row_filter: RowFilter,
+    limit: usize,
+) {
+    let mut stream = ParquetRecordBatchStreamBuilder::new(reader)
+        .await
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(projection_mask)
+        .with_row_filter(row_filter)
+        .with_limit(limit)
+        .build()
+        .unwrap();
+    while let Some(b) = stream.next().await {
+        b.unwrap(); // consume the batches, no buffering
+    }
+}
+
+/// Use sync API
+fn benchmark_sync_reader(
+    reader: InMemoryReader,
+    projection_mask: ProjectionMask,
+    row_filter: RowFilter,
+) {
+    let stream = ParquetRecordBatchReaderBuilder::try_new(reader.into_inner())
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(projection_mask)
+        .with_row_filter(row_filter)
+        .build()
+        .unwrap();
+    for b in stream {
+        b.unwrap(); // consume the batches, no buffering
+    }
+}
+
+fn benchmark_sync_reader_with_policy(
+    reader: InMemoryReader,
+    projection_mask: ProjectionMask,
+    row_filter: RowFilter,
+    row_selection_policy: RowSelectionPolicy,
+) {
+    let stream = ParquetRecordBatchReaderBuilder::try_new(reader.into_inner())
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(projection_mask)
+        .with_row_filter(row_filter)
+        .with_row_selection_policy(row_selection_policy)
+        .build()
+        .unwrap();
+    for b in stream {
+        b.unwrap(); // consume the batches, no buffering
+    }
+}
+
+fn benchmark_sync_reader_post_filter(
+    reader: InMemoryReader,
+    read_projection: ProjectionMask,
+    output_column_names: Vec<&'static str>,
+    filter_type: FilterType,
+) {
+    let stream = ParquetRecordBatchReaderBuilder::try_new(reader.into_inner())
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(read_projection)
+        .build()
+        .unwrap();
+
+    for b in stream {
+        let batch = b.unwrap();
+        let filter = filter_type.filter_batch(&batch).unwrap();
+        let output_rows =
+            post_filter_projected_num_rows(&batch, &filter, &output_column_names).unwrap();
+        std::hint::black_box(output_rows);
+    }
+}
+
+fn benchmark_sync_reader_projected(reader: InMemoryReader, projection_mask: ProjectionMask) {
+    let stream = ParquetRecordBatchReaderBuilder::try_new(reader.into_inner())
+        .unwrap()
+        .with_batch_size(8192)
+        .with_projection(projection_mask)
+        .build()
+        .unwrap();
+
+    for b in stream {
+        let batch = b.unwrap();
+        std::hint::black_box(batch.num_rows());
+    }
+}
+
+/// Adapter to read asynchronously from in memory bytes and always loads the
+/// metadata with page indexes.
+#[derive(Debug, Clone)]
+struct InMemoryReader {
+    inner: Bytes,
+    metadata: Arc<ParquetMetaData>,
+}
+
+impl InMemoryReader {
+    fn try_new(inner: &Bytes) -> parquet::errors::Result<Self> {
+        let mut metadata_reader =
+            ParquetMetaDataReader::new().with_page_index_policy(PageIndexPolicy::Required);
+        metadata_reader.try_parse(inner)?;
+        let metadata = metadata_reader.finish().map(Arc::new)?;
+
+        Ok(Self {
+            // clone of bytes is cheap -- increments a refcount
+            inner: inner.clone(),
+            metadata,
+        })
+    }
+
+    fn metadata(&self) -> &Arc<ParquetMetaData> {
+        &self.metadata
+    }
+
+    fn into_inner(self) -> Bytes {
+        self.inner
+    }
+}
+
+impl AsyncFileReader for InMemoryReader {
+    fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, parquet::errors::Result<Bytes>> {
+        let data = self.inner.slice(range.start as usize..range.end as usize);
+        async move { Ok(data) }.boxed()
+    }
+
+    fn get_metadata<'a>(
+        &'a mut self,
+        _options: Option<&'a ArrowReaderOptions>,
+    ) -> BoxFuture<'a, parquet::errors::Result<Arc<ParquetMetaData>>> {
+        let metadata = Arc::clone(&self.metadata);
+        async move { Ok(metadata) }.boxed()
+    }
+}
+
+/// Benchmark filters with `LIMIT` short-circuit (`with_limit(N)`)
+///
+/// `PointLookup` is excluded because the filter has only 1 match in the
+/// whole file; `LIMIT 10` is not binding.
+fn benchmark_filters_with_limit(c: &mut Criterion) {
+    const LIMIT: usize = 10;
+
+    let parquet_file = Bytes::from(write_parquet_file());
+    let filter_types = vec![
+        FilterType::SelectiveUnclustered,
+        FilterType::ModeratelySelectiveClustered,
+        FilterType::ModeratelySelectiveUnclustered,
+        FilterType::UnselectiveUnclustered,
+        FilterType::UnselectiveClustered,
+        FilterType::Utf8ViewNonEmpty,
+        FilterType::Composite,
+    ];
+    let projection_cases = vec![
+        ProjectionCase::AllColumns,
+        ProjectionCase::ExcludeFilterColumn,
+    ];
+
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()
+        .unwrap();
+
+    let mut group = c.benchmark_group("arrow_reader_row_filter_limit");
+
+    for filter_type in filter_types {
+        for proj_case in &projection_cases {
+            let filter_col = filter_type.filter_projection().to_vec();
+            let output_projection = output_projection_for(filter_type, proj_case);
+
+            let reader = InMemoryReader::try_new(&parquet_file).unwrap();
+            let metadata = Arc::clone(reader.metadata());
+            let schema_descr = metadata.file_metadata().schema_descr();
+            let projection_mask = ProjectionMask::roots(schema_descr, output_projection);
+            let pred_mask = ProjectionMask::roots(schema_descr, filter_col);
+
+            let benchmark_name = format!("{filter_type}/{proj_case}/limit{LIMIT}");
+
+            // async variant
+            let bench_id = BenchmarkId::new(benchmark_name.clone(), "async");
+            let rt_handle = rt.handle().clone();
+            let pred_mask_async = pred_mask.clone();
+            let projection_mask_async = projection_mask.clone();
+            let reader_async = reader.clone();
+            group.bench_function(bench_id, |b| {
+                b.iter(|| {
+                    let reader = reader_async.clone();
+                    let pred_mask = pred_mask_async.clone();
+                    let projection_mask = projection_mask_async.clone();
+                    // RowFilter and ArrowPredicateFn are not Clone — fresh each iter.
+                    let predicate = ArrowPredicateFn::new(pred_mask, move |batch: RecordBatch| {
+                        Ok(filter_type.filter_batch(&batch).unwrap())
+                    });
+                    let row_filter = RowFilter::new(vec![Box::new(predicate)]);
+                    rt_handle.block_on(benchmark_async_reader_with_limit(
+                        reader,
+                        projection_mask,
+                        row_filter,
+                        LIMIT,
+                    ));
+                });
+            });
+        }
+    }
+}
+
+/// Focused nested-output case for comparing manual post-filtering against
+/// row-filter pushdown policies.
+///
+/// The predicate column is an unprojected variable-width scalar column, and the
+/// output is a whole nested `Struct` root. This isolates the reader case enabled
+/// by root-aware post-filter projection without requiring recursive nested-child
+/// projection.
+fn benchmark_async_nested_post_filter_focus(c: &mut Criterion) {
+    let parquet_file = Bytes::from(write_nested_parquet_file_with_rows(
+        TOTAL_ROWS,
+        ROW_GROUP_SIZE,
+    ));
+    let strategies = [
+        AsyncStrategy::FullPostFilter,
+        AsyncStrategy::PushdownAuto,
+        AsyncStrategy::PushdownMask,
+        AsyncStrategy::PushdownSelectors,
+    ];
+
+    let rt = tokio::runtime::Builder::new_multi_thread()
+        .enable_all()
+        .build()
+        .unwrap();
+
+    let mut group = c.benchmark_group("arrow_reader_row_filter_async_nested_post_filter_focus");
+    let reader = InMemoryReader::try_new(&parquet_file).unwrap();
+    let metadata = Arc::clone(reader.metadata());
+    let schema_descr = metadata.file_metadata().schema_descr();
+    let output_projection = ProjectionMask::columns(schema_descr, ["payload"]);
+    let read_projection = ProjectionMask::columns(schema_descr, ["tag", "payload"]);
+    let pred_mask = ProjectionMask::columns(schema_descr, ["tag"]);
+    let filter_cases = [
+        NestedFilterType::AlwaysTrueTag,
+        NestedFilterType::TagNotZero,
+    ];
+
+    for filter_case in filter_cases {
+        for strategy in strategies {
+            let bench_id = BenchmarkId::new(
+                format!("whole_struct_output/{filter_case}"),
+                strategy.to_string(),
+            );
+            let rt_captured = rt.handle().clone();
+            group.bench_function(bench_id, |b| {
+                b.iter(|| {
+                    let reader = reader.clone();
+                    let pred_mask = pred_mask.clone();
+                    let output_projection = output_projection.clone();
+                    let read_projection = read_projection.clone();
+                    rt_captured.block_on(async {
+                        match strategy {
+                            AsyncStrategy::FullPostFilter => {
+                                benchmark_async_reader_post_filter_nested(
+                                    reader,
+                                    read_projection,
+                                    &["payload"],
+                                    filter_case,
+                                )
+                                .await
+                            }
+                            AsyncStrategy::PushdownAuto => {
+                                benchmark_async_reader_with_policy(
+                                    reader,
+                                    output_projection,
+                                    nested_row_filter_for(filter_case, pred_mask),
+                                    RowSelectionPolicy::default(),
+                                )
+                                .await
+                            }
+                            AsyncStrategy::PushdownSelectors => {
+                                benchmark_async_reader_with_policy(
+                                    reader,
+                                    output_projection,
+                                    nested_row_filter_for(filter_case, pred_mask),
+                                    RowSelectionPolicy::Selectors,
+                                )
+                                .await
+                            }
+                            AsyncStrategy::PushdownMask => {
+                                benchmark_async_reader_with_policy(
+                                    reader,
+                                    output_projection,
+                                    nested_row_filter_for(filter_case, pred_mask),
+                                    RowSelectionPolicy::Mask,
+                                )
+                                .await
+                            }
+                        }
+                    })
+                });
+            });
+        }
+    }
+}
+
+criterion_group!(benches, benchmark_async_auto_policy_focus,);
+criterion_main!(benches);
diff --git a/parquet/benches/arrow_reader_row_filter.rs b/parquet/benches/arrow_reader_row_filter.rs
index cb26443fa3b0..e1e634e43b14 100644
--- a/parquet/benches/arrow_reader_row_filter.rs
+++ b/parquet/benches/arrow_reader_row_filter.rs
@@ -47,11 +47,9 @@
 //! - `arrow_reader_row_filter_{async_,}strategy_matrix`: full post-filtering
 //!   versus row-filter pushdown with `Auto`, forced `Selectors`, and forced
 //!   `Mask`.
-//! - `arrow_reader_row_filter_async_auto_policy_focus`: focused synthetic shapes
-//!   derived from ClickBench and TPC-DS regressions, including sparse and dense
-//!   filters, clustered and fragmented selections, variable-width predicates,
-//!   projected predicate columns, count/filter-only outputs, and mixed predicate
-//!   order.
+//! - `arrow_reader_materialization_policy`: focused synthetic shapes for the
+//!   `Auto` materialization policy, split into a separate bench target to keep
+//!   baseline row-filter benchmarks small.
 //! - `arrow_reader_projection_scan_focus`: projection-only scans that do not
 //!   construct a `RowFilter`.
 //! - `arrow_reader_row_filter_async_nested_post_filter_focus`: nested root output
@@ -67,9 +65,7 @@ use arrow::record_batch::RecordBatch;
 use arrow_array::StringViewArray;
 use arrow_array::builder::{ArrayBuilder, StringViewBuilder};
 use bytes::Bytes;
-use criterion::{
-    BenchmarkGroup, BenchmarkId, Criterion, criterion_group, criterion_main, measurement::WallTime,
-};
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
 use futures::future::BoxFuture;
 use futures::{FutureExt, StreamExt};
 use parquet::arrow::arrow_reader::{
@@ -266,6 +262,7 @@ fn write_nested_parquet_file_with_rows(total_rows: usize, row_group_size: usize)
 
 /// ProjectionCase defines the projection mode for the benchmark:
 /// either projecting all columns or excluding the column that is used for filtering.
+#[allow(dead_code)]
 #[derive(Clone, Copy)]
 enum ProjectionCase {
     AllColumns,
@@ -339,6 +336,7 @@ impl std::fmt::Display for AsyncStrategy {
 
 /// FilterType encapsulates the different filter comparisons.
 /// The variants correspond to the different filter patterns.
+#[allow(dead_code)]
 #[derive(Clone, Copy, Debug)]
 pub(crate) enum FilterType {
     /// "Point Lookup": selects a single row
@@ -638,8 +636,12 @@ pub(crate) enum FilterType {
     /// dynamic filter applies to the date key, the same date key is projected,
     /// and an additional fixed-width sales value can still be deferred by
     /// predicate pushdown.
-    /// Selectivity ranges from 5% to 50%: approx 25K to 250K selected rows in
+    /// Selectivity ranges from 1% to 50%: approx 5K to 250K selected rows in
     /// 500K.
+    /// The 1% variants also cover a TPC-DS Q41-like item scan where predicate
+    /// and output overlap, selection is highly fragmented, and the deferred
+    /// output payload is small enough that post-filtering can be faster than
+    /// row-filter pushdown.
     ///
     /// ```text
     /// ┌───────────────┐    ┌───────────────┐
@@ -654,6 +656,7 @@ pub(crate) enum FilterType {
     /// │      ...      │    │      ...      │
     /// └───────────────┘    └───────────────┘
     /// ```
+    ProjectedPredicate1Pct,
     ProjectedPredicate5Pct,
     ProjectedPredicate8Pct,
     ProjectedPredicate10Pct,
@@ -746,6 +749,7 @@ impl std::fmt::Display for FilterType {
             FilterType::SparseProjectedPredicatesFixedOutput => {
                 "int64 < 8 AND ts < 9000 projected predicates"
             }
+            FilterType::ProjectedPredicate1Pct => "int64 < 1 projected predicate",
             FilterType::ProjectedPredicate10Pct => {
                 "int64 < 10 projected predicate with fixed output"
             }
@@ -885,7 +889,8 @@ impl FilterType {
                 let date_like = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
                 and(&item_like, &date_like)
             }
-            FilterType::ProjectedPredicate5Pct
+            FilterType::ProjectedPredicate1Pct
+            | FilterType::ProjectedPredicate5Pct
             | FilterType::ProjectedPredicate8Pct
             | FilterType::ProjectedPredicate10Pct
             | FilterType::ProjectedPredicate20Pct
@@ -894,6 +899,7 @@ impl FilterType {
             | FilterType::ProjectedPredicate50Pct => {
                 let int64 = batch.column(batch.schema().index_of("int64")?);
                 let threshold = match self {
+                    FilterType::ProjectedPredicate1Pct => 1,
                     FilterType::ProjectedPredicate5Pct => 5,
                     FilterType::ProjectedPredicate8Pct => 8,
                     FilterType::ProjectedPredicate10Pct => 10,
@@ -955,7 +961,8 @@ impl FilterType {
             | FilterType::ProjectedDynamicFilters
             | FilterType::SparseProjectedPredicatesFixedOutput => &[0, 3],
             FilterType::ComplexOrMixedPredicates => &[0, 1, 2, 3],
-            FilterType::ProjectedPredicate5Pct
+            FilterType::ProjectedPredicate1Pct
+            | FilterType::ProjectedPredicate5Pct
             | FilterType::ProjectedPredicate8Pct
             | FilterType::ProjectedPredicate10Pct
             | FilterType::ProjectedPredicate20Pct
@@ -1258,279 +1265,6 @@ fn benchmark_async_strategy_matrix(c: &mut Criterion) {
     }
 }
 
-/// A focused async-only matrix that isolates the cases most relevant to the
-/// row-filter Auto policy. This is intentionally narrower than
-/// [`benchmark_async_strategy_matrix`]: it keeps the benchmark output focused
-/// on cases where later PRs may teach `Auto` to switch execution modes or
-/// explicitly keep predicate pushdown.
-///
-/// The cases use structure-oriented names. Comments on [`FilterType`] keep the
-/// ClickBench and TPC-DS provenance, but these are synthetic reader shapes, not
-/// end-to-end query benchmarks.
-///
-/// Individual [`FilterType`] variants include shaded-row diagrams for the
-/// representative selection shapes.
-fn benchmark_async_auto_policy_focus(c: &mut Criterion) {
-    const SMALL_TOTAL_ROWS: usize = 20_000;
-    const SMALL_ROW_GROUP_SIZE: usize = 5_000;
-
-    let parquet_file = Bytes::from(write_parquet_file());
-    let small_parquet_file = Bytes::from(write_parquet_file_with_rows(
-        SMALL_TOTAL_ROWS,
-        SMALL_ROW_GROUP_SIZE,
-    ));
-    let cases = [
-        // Baseline selectivity shapes.
-        AsyncFocusCase::new(
-            "utf8_non_empty",
-            parquet_file.clone(),
-            FilterType::Utf8ViewNonEmpty,
-            ProjectionCase::ExcludeFilterColumn,
-        ),
-        AsyncFocusCase::new(
-            "utf8_non_empty",
-            parquet_file.clone(),
-            FilterType::Utf8ViewNonEmpty,
-            ProjectionCase::AllColumns,
-        ),
-        AsyncFocusCase::new(
-            "high_selectivity_float64",
-            parquet_file.clone(),
-            FilterType::UnselectiveUnclustered,
-            ProjectionCase::ExcludeFilterColumn,
-        ),
-        AsyncFocusCase::new(
-            "high_selectivity_ts_clustered",
-            parquet_file.clone(),
-            FilterType::UnselectiveClustered,
-            ProjectionCase::ExcludeFilterColumn,
-        ),
-        AsyncFocusCase::new(
-            "fragmented_int64_10pct",
-            parquet_file.clone(),
-            FilterType::ModeratelySelectiveUnclustered,
-            ProjectionCase::ExcludeFilterColumn,
-        ),
-        AsyncFocusCase::new(
-            "selective_float64_1pct",
-            parquet_file.clone(),
-            FilterType::SelectiveUnclustered,
-            ProjectionCase::ExcludeFilterColumn,
-        ),
-        // Filter-only and count-only shapes. These guard the cases where there
-        // is no deferred output column to amortize the cost of row selection.
-        AsyncFocusCase::new(
-            "point_lookup_filter_only",
-            parquet_file.clone(),
-            FilterType::PointLookup,
-            ProjectionCase::FilterColumnsOnly,
-        ),
-        AsyncFocusCase::new(
-            "projected_predicate_8pct_filter_only",
-            parquet_file.clone(),
-            FilterType::ProjectedPredicate8Pct,
-            ProjectionCase::FilterColumnsOnly,
-        ),
-        AsyncFocusCase::new(
-            "sparse_scalar_count_only",
-            parquet_file.clone(),
-            FilterType::SparseScalarFixedOutput,
-            ProjectionCase::CountOnly,
-        ),
-        AsyncFocusCase::new(
-            "small_fragmented_scalar_filter_only",
-            small_parquet_file.clone(),
-            FilterType::ModeratelySelectiveUnclustered,
-            ProjectionCase::FilterColumnsOnly,
-        ),
-        AsyncFocusCase::new(
-            "quantity_range_filter_columns_only",
-            parquet_file.clone(),
-            FilterType::QuantityRangePredicate,
-            ProjectionCase::FilterColumnsOnly,
-        ),
-        // Deferred-output shapes. Predicate columns are not part of the output,
-        // so pushdown can skip decoding projected columns for rejected rows.
-        AsyncFocusCase::new(
-            "scalar_prefix_utf8_output",
-            parquet_file.clone(),
-            FilterType::ScalarPrefixUtf8Output,
-            ProjectionCase::Utf8Only,
-        ),
-        AsyncFocusCase::new(
-            "small_scalar_prefix_utf8_output",
-            small_parquet_file.clone(),
-            FilterType::ScalarPrefixUtf8Output,
-            ProjectionCase::Utf8Only,
-        ),
-        AsyncFocusCase::new(
-            "point_lookup_deferred_fixed_output",
-            parquet_file.clone(),
-            FilterType::PointLookup,
-            ProjectionCase::Float64Only,
-        ),
-        AsyncFocusCase::new(
-            "sparse_scalar_fixed_output",
-            parquet_file.clone(),
-            FilterType::SparseScalarFixedOutput,
-            ProjectionCase::Float64Only,
-        ),
-        AsyncFocusCase::new(
-            "quantity_range_numeric_output",
-            parquet_file.clone(),
-            FilterType::QuantityRangePredicate,
-            ProjectionCase::Float64Only,
-        ),
-        // Multi-predicate shapes. These make predicate order and predicate
-        // evaluation cost visible separately from projection cost.
-        AsyncFocusCase::new(
-            "fixed_then_varwidth_predicates",
-            parquet_file.clone(),
-            FilterType::FixedThenVarWidthPredicates,
-            ProjectionCase::Float64Only,
-        ),
-        AsyncFocusCase::new(
-            "varwidth_then_fixed_predicates",
-            parquet_file.clone(),
-            FilterType::VarWidthThenFixedPredicates,
-            ProjectionCase::Float64Only,
-        ),
-        AsyncFocusCase::new(
-            "multi_scalar_projected_key",
-            parquet_file.clone(),
-            FilterType::MultiScalarProjectedKey,
-            ProjectionCase::Float64AndTs,
-        ),
-        AsyncFocusCase::new(
-            "complex_or_mixed_predicates",
-            parquet_file.clone(),
-            FilterType::ComplexOrMixedPredicates,
-            ProjectionCase::Float64Only,
-        ),
-        // Projected-predicate shapes. The predicate column is also projected,
-        // so pushdown must not assume the predicate decode is purely overhead.
-        AsyncFocusCase::new(
-            "projected_dynamic_filters",
-            parquet_file.clone(),
-            FilterType::ProjectedDynamicFilters,
-            ProjectionCase::FixedColumns,
-        ),
-        AsyncFocusCase::new(
-            "sparse_projected_predicates_fixed_output",
-            parquet_file.clone(),
-            FilterType::SparseProjectedPredicatesFixedOutput,
-            ProjectionCase::FixedColumns,
-        ),
-        AsyncFocusCase::new(
-            "projected_predicate_5pct_fixed_output",
-            parquet_file.clone(),
-            FilterType::ProjectedPredicate5Pct,
-            ProjectionCase::Int64AndFloat64,
-        ),
-        AsyncFocusCase::new(
-            "projected_predicate_8pct_fixed_output",
-            parquet_file.clone(),
-            FilterType::ProjectedPredicate8Pct,
-            ProjectionCase::Int64AndFloat64,
-        ),
-        AsyncFocusCase::new(
-            "projected_predicate_8pct_varwidth_output",
-            parquet_file.clone(),
-            FilterType::ProjectedPredicate8Pct,
-            ProjectionCase::Int64AndUtf8,
-        ),
-        AsyncFocusCase::new(
-            "projected_predicate_10pct_fixed_output",
-            parquet_file.clone(),
-            FilterType::ProjectedPredicate10Pct,
-            ProjectionCase::Int64AndFloat64,
-        ),
-        AsyncFocusCase::new(
-            "projected_predicate_20pct_fixed_output",
-            parquet_file.clone(),
-            FilterType::ProjectedPredicate20Pct,
-            ProjectionCase::Int64AndFloat64,
-        ),
-        AsyncFocusCase::new(
-            "projected_predicate_20pct_varwidth_output",
-            parquet_file.clone(),
-            FilterType::ProjectedPredicate20Pct,
-            ProjectionCase::Int64AndUtf8,
-        ),
-        AsyncFocusCase::new(
-            "projected_predicate_30pct_fixed_output",
-            parquet_file.clone(),
-            FilterType::ProjectedPredicate30Pct,
-            ProjectionCase::Int64AndFloat64,
-        ),
-        AsyncFocusCase::new(
-            "projected_predicate_40pct_fixed_output",
-            parquet_file.clone(),
-            FilterType::ProjectedPredicate40Pct,
-            ProjectionCase::Int64AndFloat64,
-        ),
-        AsyncFocusCase::new(
-            "projected_predicate_50pct_fixed_output",
-            parquet_file.clone(),
-            FilterType::ProjectedPredicate50Pct,
-            ProjectionCase::Int64AndFloat64,
-        ),
-        AsyncFocusCase::new(
-            "clustered_ts_8pct_fixed_output",
-            parquet_file.clone(),
-            FilterType::ClusteredTs8PctProjectedPredicate,
-            ProjectionCase::Float64AndTs,
-        ),
-        AsyncFocusCase::new(
-            "clustered_ts_8pct_varwidth_output",
-            parquet_file.clone(),
-            FilterType::ClusteredTs8PctProjectedPredicate,
-            ProjectionCase::TsAndUtf8,
-        ),
-        AsyncFocusCase::new(
-            "clustered_ts_20pct_fixed_output",
-            parquet_file.clone(),
-            FilterType::ClusteredTs20PctProjectedPredicate,
-            ProjectionCase::Float64AndTs,
-        ),
-        AsyncFocusCase::new(
-            "clustered_ts_20pct_varwidth_output",
-            parquet_file.clone(),
-            FilterType::ClusteredTs20PctProjectedPredicate,
-            ProjectionCase::TsAndUtf8,
-        ),
-        AsyncFocusCase::new(
-            "sparse_projected_fact_scan",
-            parquet_file.clone(),
-            FilterType::SparseProjectedFactScan,
-            ProjectionCase::FixedColumns,
-        ),
-        AsyncFocusCase::new(
-            "sparse_utf8_projected_predicate",
-            parquet_file.clone(),
-            FilterType::Utf8ViewMissing,
-            ProjectionCase::AllColumns,
-        ),
-    ];
-    let strategies = [
-        AsyncStrategy::FullPostFilter,
-        AsyncStrategy::PushdownAuto,
-        AsyncStrategy::PushdownMask,
-        AsyncStrategy::PushdownSelectors,
-    ];
-
-    let rt = tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .build()
-        .unwrap();
-
-    let mut group = c.benchmark_group("arrow_reader_row_filter_async_auto_policy_focus");
-
-    for case in cases {
-        benchmark_async_focus_case(&mut group, &rt, case, &strategies);
-    }
-}
-
 /// Isolate projected scans that do not construct a [`RowFilter`].
 ///
 /// This tracks the reader-level shape seen in TPC-DS Q83 return-table scans:
@@ -1588,152 +1322,6 @@ fn benchmark_projection_scan_focus(c: &mut Criterion) {
     });
 }
 
-struct AsyncFocusCase {
-    case_name: &'static str,
-    parquet_file: Bytes,
-    filter_type: FilterType,
-    projection_case: ProjectionCase,
-}
-
-impl AsyncFocusCase {
-    fn new(
-        case_name: &'static str,
-        parquet_file: Bytes,
-        filter_type: FilterType,
-        projection_case: ProjectionCase,
-    ) -> Self {
-        Self {
-            case_name,
-            parquet_file,
-            filter_type,
-            projection_case,
-        }
-    }
-}
-
-fn benchmark_async_focus_case(
-    group: &mut BenchmarkGroup<'_, WallTime>,
-    rt: &tokio::runtime::Runtime,
-    case: AsyncFocusCase,
-    strategies: &[AsyncStrategy],
-) {
-    let AsyncFocusCase {
-        case_name,
-        parquet_file,
-        filter_type,
-        projection_case,
-    } = case;
-
-    let reader = InMemoryReader::try_new(&parquet_file).unwrap();
-    let metadata = Arc::clone(reader.metadata());
-    let schema_descr = metadata.file_metadata().schema_descr();
-    let output_projection = output_projection_for(filter_type, &projection_case);
-    let read_projection = full_post_filter_read_projection(filter_type, &output_projection);
-    let output_column_names = projection_names(&output_projection);
-    let projection_mask = ProjectionMask::roots(schema_descr, output_projection);
-    let read_projection_mask = ProjectionMask::roots(schema_descr, read_projection);
-    let pred_mask = ProjectionMask::roots(
-        schema_descr,
-        filter_type.filter_projection().iter().copied(),
-    );
-    let fixed_pred_mask = ProjectionMask::roots(schema_descr, [0]);
-    let varwidth_pred_mask = ProjectionMask::roots(schema_descr, [2]);
-    let sparse_int64_pred_mask = ProjectionMask::roots(schema_descr, [0]);
-    let sparse_ts_pred_mask = ProjectionMask::roots(schema_descr, [3]);
-    let scalar_float64_pred_mask = ProjectionMask::roots(schema_descr, [1]);
-
-    for strategy in strategies.iter().copied() {
-        let bench_id = BenchmarkId::new(
-            format!("{case_name}/{projection_case}"),
-            strategy.to_string(),
-        );
-        let rt_captured = rt.handle().clone();
-
-        group.bench_function(bench_id, |b| {
-            b.iter(|| {
-                let reader = reader.clone();
-                let pred_mask = pred_mask.clone();
-                let fixed_pred_mask = fixed_pred_mask.clone();
-                let varwidth_pred_mask = varwidth_pred_mask.clone();
-                let sparse_int64_pred_mask = sparse_int64_pred_mask.clone();
-                let sparse_ts_pred_mask = sparse_ts_pred_mask.clone();
-                let scalar_float64_pred_mask = scalar_float64_pred_mask.clone();
-                let projection_mask = projection_mask.clone();
-                let read_projection_mask = read_projection_mask.clone();
-                let output_column_names = output_column_names.clone();
-
-                rt_captured.block_on(async {
-                    match strategy {
-                        AsyncStrategy::FullPostFilter => {
-                            benchmark_async_reader_post_filter(
-                                reader,
-                                read_projection_mask,
-                                output_column_names,
-                                filter_type,
-                            )
-                            .await
-                        }
-                        AsyncStrategy::PushdownAuto => {
-                            let row_filter = row_filter_for_focus_case(
-                                filter_type,
-                                pred_mask,
-                                fixed_pred_mask,
-                                varwidth_pred_mask,
-                                sparse_int64_pred_mask,
-                                sparse_ts_pred_mask,
-                                scalar_float64_pred_mask,
-                            );
-                            benchmark_async_reader_with_policy(
-                                reader,
-                                projection_mask,
-                                row_filter,
-                                RowSelectionPolicy::default(),
-                            )
-                            .await
-                        }
-                        AsyncStrategy::PushdownSelectors => {
-                            let row_filter = row_filter_for_focus_case(
-                                filter_type,
-                                pred_mask,
-                                fixed_pred_mask,
-                                varwidth_pred_mask,
-                                sparse_int64_pred_mask,
-                                sparse_ts_pred_mask,
-                                scalar_float64_pred_mask,
-                            );
-                            benchmark_async_reader_with_policy(
-                                reader,
-                                projection_mask,
-                                row_filter,
-                                RowSelectionPolicy::Selectors,
-                            )
-                            .await
-                        }
-                        AsyncStrategy::PushdownMask => {
-                            let row_filter = row_filter_for_focus_case(
-                                filter_type,
-                                pred_mask,
-                                fixed_pred_mask,
-                                varwidth_pred_mask,
-                                sparse_int64_pred_mask,
-                                sparse_ts_pred_mask,
-                                scalar_float64_pred_mask,
-                            );
-                            benchmark_async_reader_with_policy(
-                                reader,
-                                projection_mask,
-                                row_filter,
-                                RowSelectionPolicy::Mask,
-                            )
-                            .await
-                        }
-                    }
-                })
-            });
-        });
-    }
-}
-
 fn output_projection_for(filter_type: FilterType, projection_case: &ProjectionCase) -> Vec<usize> {
     let filter_columns = filter_type.filter_projection();
     match projection_case {
@@ -1807,84 +1395,6 @@ fn row_filter_for(filter_type: FilterType, pred_mask: ProjectionMask) -> RowFilt
     RowFilter::new(vec![Box::new(filter)])
 }
 
-fn row_filter_for_focus_case(
-    filter_type: FilterType,
-    pred_mask: ProjectionMask,
-    fixed_pred_mask: ProjectionMask,
-    varwidth_pred_mask: ProjectionMask,
-    sparse_int64_pred_mask: ProjectionMask,
-    sparse_ts_pred_mask: ProjectionMask,
-    scalar_float64_pred_mask: ProjectionMask,
-) -> RowFilter {
-    match filter_type {
-        FilterType::FixedThenVarWidthPredicates | FilterType::VarWidthThenFixedPredicates => {
-            let int64_filter = ArrowPredicateFn::new(fixed_pred_mask, move |batch: RecordBatch| {
-                let int64 = batch.column(batch.schema().index_of("int64")?);
-                eq(int64, &Int64Array::new_scalar(9999))
-            });
-            let utf8_filter =
-                ArrowPredicateFn::new(varwidth_pred_mask, move |batch: RecordBatch| {
-                    let utf8 = batch.column(batch.schema().index_of("utf8View")?);
-                    neq(utf8, &StringViewArray::new_scalar(""))
-                });
-
-            match filter_type {
-                FilterType::FixedThenVarWidthPredicates => {
-                    RowFilter::new(vec![Box::new(int64_filter), Box::new(utf8_filter)])
-                }
-                FilterType::VarWidthThenFixedPredicates => {
-                    RowFilter::new(vec![Box::new(utf8_filter), Box::new(int64_filter)])
-                }
-                _ => unreachable!(),
-            }
-        }
-        FilterType::MultiScalarProjectedKey => {
-            let int64_filter =
-                ArrowPredicateFn::new(sparse_int64_pred_mask, move |batch: RecordBatch| {
-                    let int64 = batch.column(batch.schema().index_of("int64")?);
-                    eq(int64, &Int64Array::new_scalar(62))
-                });
-            let float64_filter =
-                ArrowPredicateFn::new(scalar_float64_pred_mask, move |batch: RecordBatch| {
-                    let float64 = batch.column(batch.schema().index_of("float64")?);
-                    gt(float64, &Float64Array::new_scalar(10.0))
-                });
-            let ts_filter =
-                ArrowPredicateFn::new(sparse_ts_pred_mask, move |batch: RecordBatch| {
-                    let ts = batch.column(batch.schema().index_of("ts")?);
-                    lt(ts, &TimestampMillisecondArray::new_scalar(9000))
-                });
-
-            RowFilter::new(vec![
-                Box::new(int64_filter),
-                Box::new(float64_filter),
-                Box::new(ts_filter),
-            ])
-        }
-        FilterType::SparseScalarFixedOutput
-        | FilterType::ProjectedDynamicFilters
-        | FilterType::SparseProjectedPredicatesFixedOutput => {
-            let int64_filter =
-                ArrowPredicateFn::new(sparse_int64_pred_mask, move |batch: RecordBatch| {
-                    let int64 = batch.column(batch.schema().index_of("int64")?);
-                    let scalar = match filter_type {
-                        FilterType::ProjectedDynamicFilters => 12,
-                        _ => 8,
-                    };
-                    lt(int64, &Int64Array::new_scalar(scalar))
-                });
-            let ts_filter =
-                ArrowPredicateFn::new(sparse_ts_pred_mask, move |batch: RecordBatch| {
-                    let ts = batch.column(batch.schema().index_of("ts")?);
-                    lt(ts, &TimestampMillisecondArray::new_scalar(9000))
-                });
-
-            RowFilter::new(vec![Box::new(int64_filter), Box::new(ts_filter)])
-        }
-        _ => row_filter_for(filter_type, pred_mask),
-    }
-}
-
 #[derive(Clone, Copy)]
 enum NestedFilterType {
     AlwaysTrueTag,
@@ -2328,7 +1838,6 @@ criterion_group!(
     benchmark_filters_and_projections,
     benchmark_sync_strategy_matrix,
     benchmark_async_strategy_matrix,
-    benchmark_async_auto_policy_focus,
     benchmark_projection_scan_focus,
     benchmark_filters_with_limit,
     benchmark_async_nested_post_filter_focus,

From 3b65fc48582e961dc9e7db7a641afdeb083c5efb Mon Sep 17 00:00:00 2001
From: Qiwei Huang <qiwei.huang@jsessh.com>
Date: Tue, 16 Jun 2026 21:50:06 +0800
Subject: [PATCH 12/14] bench(parquet): trim split row filter benches

---
 .../arrow_reader_materialization_policy.rs    | 793 +-----------------
 parquet/benches/arrow_reader_row_filter.rs    | 578 +------------
 2 files changed, 5 insertions(+), 1366 deletions(-)

diff --git a/parquet/benches/arrow_reader_materialization_policy.rs b/parquet/benches/arrow_reader_materialization_policy.rs
index 297efbe4eaf6..e58b2ec936b6 100644
--- a/parquet/benches/arrow_reader_materialization_policy.rs
+++ b/parquet/benches/arrow_reader_materialization_policy.rs
@@ -17,8 +17,6 @@
 
 //! Focused benchmark for Parquet reader materialization policy decisions.
 //!
-#![allow(dead_code)]
-//!
 //! # Background:
 //!
 //! As described in [Efficient Filter Pushdown in Parquet], evaluating
@@ -56,9 +54,7 @@
 //! complex OR predicates, and sparse scalar prefixes should not be swept into
 //! that shortcut without their own evidence.
 
-use arrow::array::{
-    ArrayRef, BooleanArray, Float64Array, Int64Array, StructArray, TimestampMillisecondArray,
-};
+use arrow::array::{ArrayRef, BooleanArray, Float64Array, Int64Array, TimestampMillisecondArray};
 use arrow::compute::kernels::cmp::{eq, gt, lt, lt_eq, neq};
 use arrow::compute::{and, or};
 use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
@@ -72,8 +68,7 @@ use criterion::{
 use futures::future::BoxFuture;
 use futures::{FutureExt, StreamExt};
 use parquet::arrow::arrow_reader::{
-    ArrowPredicateFn, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowFilter,
-    RowSelectionPolicy,
+    ArrowPredicateFn, ArrowReaderOptions, RowFilter, RowSelectionPolicy,
 };
 use parquet::arrow::async_reader::AsyncFileReader;
 use parquet::arrow::{ArrowWriter, ParquetRecordBatchStreamBuilder, ProjectionMask};
@@ -232,37 +227,6 @@ fn write_record_batch_to_parquet(batch: &RecordBatch, row_group_size: usize) ->
     buffer
 }
 
-fn create_nested_record_batch(size: usize) -> RecordBatch {
-    let tag = Arc::new(StringViewArray::from_iter_values(
-        (0..size).map(|idx| format!("tag_{}", idx % 7)),
-    )) as ArrayRef;
-    let payload = StructArray::from(vec![
-        (
-            Arc::new(Field::new("id", DataType::Int64, false)),
-            Arc::new(Int64Array::from_iter_values(
-                (0..size).map(|idx| idx as i64 + 1_000),
-            )) as ArrayRef,
-        ),
-        (
-            Arc::new(Field::new("label", DataType::Utf8View, false)),
-            Arc::new(StringViewArray::from_iter_values(
-                (0..size).map(|idx| format!("payload_{idx}")),
-            )) as ArrayRef,
-        ),
-    ]);
-    let payload = Arc::new(payload) as ArrayRef;
-    let value = Arc::new(Int64Array::from_iter_values(
-        (0..size).map(|idx| idx as i64 + 10_000),
-    )) as ArrayRef;
-
-    RecordBatch::try_from_iter(vec![("tag", tag), ("payload", payload), ("value", value)]).unwrap()
-}
-
-fn write_nested_parquet_file_with_rows(total_rows: usize, row_group_size: usize) -> Vec<u8> {
-    let batch = create_nested_record_batch(total_rows);
-    write_record_batch_to_parquet(&batch, row_group_size)
-}
-
 /// ProjectionCase defines the projection mode for the benchmark:
 /// either projecting all columns or excluding the column that is used for filtering.
 #[derive(Clone, Copy)]
@@ -298,25 +262,6 @@ impl std::fmt::Display for ProjectionCase {
     }
 }
 
-#[derive(Clone, Copy)]
-enum SyncStrategy {
-    FullPostFilter,
-    PushdownAuto,
-    PushdownSelectors,
-    PushdownMask,
-}
-
-impl std::fmt::Display for SyncStrategy {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            SyncStrategy::FullPostFilter => write!(f, "full_post_filter"),
-            SyncStrategy::PushdownAuto => write!(f, "pushdown_auto"),
-            SyncStrategy::PushdownSelectors => write!(f, "pushdown_selectors"),
-            SyncStrategy::PushdownMask => write!(f, "pushdown_mask"),
-        }
-    }
-}
-
 #[derive(Clone, Copy)]
 enum AsyncStrategy {
     FullPostFilter,
@@ -374,22 +319,6 @@ pub(crate) enum FilterType {
     SelectiveUnclustered,
     /// moderately selective (10%) clustered filter
     /// ```text
-    ///  ┌───────────────┐    ┌───────────────┐
-    ///  │               │    │               │
-    ///  │               │    │               │
-    ///  │               │    │     ...       │
-    ///  │               │    │               │
-    ///  │     ...       │    │               │
-    ///  │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    ///  │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    ///  │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    ///  │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    ///  └───────────────┘    └───────────────┘
-    /// ```
-    /// (10 RowSelections of 10,000 rows each)
-    ModeratelySelectiveClustered,
-    /// moderately selective (10%) clustered filter
-    /// ```text
     /// ┌───────────────┐    ┌───────────────┐
     /// │      ...      │    │               │
     /// │               │    │               │
@@ -437,9 +366,6 @@ pub(crate) enum FilterType {
     /// ```
     /// (99 RowSelection of 10,000 rows each)
     UnselectiveClustered,
-    /// [`Self::SelectivelUnclusered`] `AND`
-    /// [`Self::ModeratelySelectiveClustered`]
-    Composite,
     /// `utf8View <> ''` modeling [ClickBench] [Q21-Q27]
     ///
     /// [ClickBench]: https://github.com/ClickHouse/ClickBench
@@ -729,11 +655,9 @@ impl std::fmt::Display for FilterType {
         let s = match self {
             FilterType::PointLookup => "int64 == 9999",
             FilterType::SelectiveUnclustered => "float64 > 99.0",
-            FilterType::ModeratelySelectiveClustered => "ts >= 9000",
             FilterType::ModeratelySelectiveUnclustered => "int64 > 90",
             FilterType::UnselectiveUnclustered => "float64 <= 99.0",
             FilterType::UnselectiveClustered => "ts < 9000",
-            FilterType::Composite => "float64 > 99.0 AND ts >= 9000",
             FilterType::Utf8ViewNonEmpty => "utf8View <> ''",
             FilterType::Utf8ViewMissing => "utf8View == '<missing>'",
             FilterType::ScalarPrefixUtf8Output => "int64 == 62 AND ts < 9000",
@@ -797,11 +721,6 @@ impl FilterType {
                 let scalar = Float64Array::new_scalar(99.0);
                 gt(array, &scalar)
             }
-            // Moderately Selective Clustered on ts column: ts >= 9000 (implemented as > 8999)
-            FilterType::ModeratelySelectiveClustered => {
-                let array = batch.column(batch.schema().index_of("ts")?);
-                gt(array, &TimestampMillisecondArray::new_scalar(8999))
-            }
             // Moderately Selective Unclustered on int64 column: int64 > 90
             FilterType::ModeratelySelectiveUnclustered => {
                 let array = batch.column(batch.schema().index_of("int64")?);
@@ -818,12 +737,6 @@ impl FilterType {
                 let array = batch.column(batch.schema().index_of("ts")?);
                 lt(array, &TimestampMillisecondArray::new_scalar(9000))
             }
-            // Composite filter: logical AND of (float64 > 99.0) and (ts >= 9000)
-            FilterType::Composite => {
-                let mask1 = FilterType::SelectiveUnclustered.filter_batch(batch)?;
-                let mask2 = FilterType::ModeratelySelectiveClustered.filter_batch(batch)?;
-                and(&mask1, &mask2)
-            }
             // Utf8ViewNonEmpty: selects rows where the utf8View column is not an empty string.
             FilterType::Utf8ViewNonEmpty => {
                 let array = batch.column(batch.schema().index_of("utf8View")?);
@@ -947,11 +860,9 @@ impl FilterType {
         match self {
             FilterType::PointLookup => &[0],
             FilterType::SelectiveUnclustered => &[1],
-            FilterType::ModeratelySelectiveClustered => &[3],
             FilterType::ModeratelySelectiveUnclustered => &[0],
             FilterType::UnselectiveUnclustered => &[1],
             FilterType::UnselectiveClustered => &[3],
-            FilterType::Composite => &[1, 3], // Use float64 column and ts column as representative for composite
             FilterType::Utf8ViewNonEmpty | FilterType::Utf8ViewMissing => &[2],
             FilterType::ScalarPrefixUtf8Output => &[0, 3],
             FilterType::FixedThenVarWidthPredicates | FilterType::VarWidthThenFixedPredicates => {
@@ -978,297 +889,9 @@ impl FilterType {
     }
 }
 
-/// Benchmark filters and projections by reading the Parquet file.
-/// This benchmark iterates over all individual filter types and two projection cases.
-/// It measures the time to read and filter the Parquet file according to each scenario.
-fn benchmark_filters_and_projections(c: &mut Criterion) {
-    // make the parquet file in memory that can be shared
-    let parquet_file = Bytes::from(write_parquet_file());
-    let filter_types = vec![
-        FilterType::PointLookup,
-        FilterType::SelectiveUnclustered,
-        FilterType::ModeratelySelectiveClustered,
-        FilterType::ModeratelySelectiveUnclustered,
-        FilterType::UnselectiveUnclustered,
-        FilterType::UnselectiveClustered,
-        FilterType::Utf8ViewNonEmpty,
-        FilterType::Composite,
-    ];
-    let projection_cases = vec![
-        ProjectionCase::AllColumns,
-        ProjectionCase::ExcludeFilterColumn,
-    ];
-
-    let rt = tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .build()
-        .unwrap();
-
-    let mut group = c.benchmark_group("arrow_reader_row_filter");
-
-    for filter_type in filter_types {
-        for proj_case in &projection_cases {
-            let filter_col = filter_type.filter_projection().to_vec();
-            let output_projection = output_projection_for(filter_type, proj_case);
-
-            let reader = InMemoryReader::try_new(&parquet_file).unwrap();
-            let metadata = Arc::clone(reader.metadata());
-
-            let schema_descr = metadata.file_metadata().schema_descr();
-            let projection_mask = ProjectionMask::roots(schema_descr, output_projection.clone());
-            let pred_mask = ProjectionMask::roots(schema_descr, filter_col.clone());
-
-            let benchmark_name = format!("{filter_type}/{proj_case}",);
-
-            // run the benchmark for the async reader
-            let bench_id = BenchmarkId::new(benchmark_name.clone(), "async");
-            let rt_captured = rt.handle().clone();
-            group.bench_function(bench_id, |b| {
-                b.iter(|| {
-                    let reader = reader.clone();
-                    let pred_mask = pred_mask.clone();
-                    let projection_mask = projection_mask.clone();
-                    // row filters are not clone, so must make it each iter
-                    let filter = ArrowPredicateFn::new(pred_mask, move |batch: RecordBatch| {
-                        Ok(filter_type.filter_batch(&batch).unwrap())
-                    });
-                    let row_filter = RowFilter::new(vec![Box::new(filter)]);
-
-                    rt_captured.block_on(async {
-                        benchmark_async_reader(reader, projection_mask, row_filter).await;
-                    })
-                });
-            });
-
-            // run the benchmark for the sync reader
-            let bench_id = BenchmarkId::new(benchmark_name, "sync");
-            group.bench_function(bench_id, |b| {
-                b.iter(|| {
-                    let reader = reader.clone();
-                    let pred_mask = pred_mask.clone();
-                    let projection_mask = projection_mask.clone();
-                    // row filters are not clone, so must make it each iter
-                    let filter = ArrowPredicateFn::new(pred_mask, move |batch: RecordBatch| {
-                        Ok(filter_type.filter_batch(&batch).unwrap())
-                    });
-                    let row_filter = RowFilter::new(vec![Box::new(filter)]);
-
-                    benchmark_sync_reader(reader, projection_mask, row_filter)
-                });
-            });
-        }
-    }
-}
-
-/// Compare full scan plus post-filtering against row-level pushdown strategies.
-///
-/// This group is intentionally sync-only and smaller than
-/// [`benchmark_filters_and_projections`]. It tracks the cases most likely to
-/// inform a future default `Auto` policy: selective random filters, clustered
-/// filters, ClickBench-like string filters, and the forced selector strategy
-/// that originally motivated apache/arrow-rs#8565.
-fn benchmark_sync_strategy_matrix(c: &mut Criterion) {
-    let parquet_file = Bytes::from(write_parquet_file());
-    let filter_types = [
-        FilterType::SelectiveUnclustered,
-        FilterType::ModeratelySelectiveClustered,
-        FilterType::ModeratelySelectiveUnclustered,
-        FilterType::Utf8ViewNonEmpty,
-    ];
-    let strategies = [
-        SyncStrategy::FullPostFilter,
-        SyncStrategy::PushdownAuto,
-        SyncStrategy::PushdownSelectors,
-        SyncStrategy::PushdownMask,
-    ];
-
-    let mut group = c.benchmark_group("arrow_reader_row_filter_strategy_matrix");
-
-    for filter_type in filter_types {
-        for projection_case in [
-            ProjectionCase::AllColumns,
-            ProjectionCase::ExcludeFilterColumn,
-        ] {
-            let reader = InMemoryReader::try_new(&parquet_file).unwrap();
-            let metadata = Arc::clone(reader.metadata());
-            let schema_descr = metadata.file_metadata().schema_descr();
-            let output_projection = output_projection_for(filter_type, &projection_case);
-            let read_projection = full_post_filter_read_projection(filter_type, &output_projection);
-            let output_column_names = projection_names(&output_projection);
-            let projection_mask = ProjectionMask::roots(schema_descr, output_projection);
-            let read_projection_mask = ProjectionMask::roots(schema_descr, read_projection);
-            let pred_mask = ProjectionMask::roots(
-                schema_descr,
-                filter_type.filter_projection().iter().copied(),
-            );
-
-            for strategy in strategies {
-                let bench_id = BenchmarkId::new(
-                    format!("{filter_type}/{projection_case}"),
-                    strategy.to_string(),
-                );
-
-                group.bench_function(bench_id, |b| {
-                    b.iter(|| {
-                        let reader = reader.clone();
-                        let pred_mask = pred_mask.clone();
-                        let projection_mask = projection_mask.clone();
-                        let read_projection_mask = read_projection_mask.clone();
-                        let output_column_names = output_column_names.clone();
-
-                        match strategy {
-                            SyncStrategy::FullPostFilter => benchmark_sync_reader_post_filter(
-                                reader,
-                                read_projection_mask,
-                                output_column_names,
-                                filter_type,
-                            ),
-                            SyncStrategy::PushdownAuto => {
-                                let row_filter = row_filter_for(filter_type, pred_mask);
-                                benchmark_sync_reader_with_policy(
-                                    reader,
-                                    projection_mask,
-                                    row_filter,
-                                    RowSelectionPolicy::default(),
-                                )
-                            }
-                            SyncStrategy::PushdownSelectors => {
-                                let row_filter = row_filter_for(filter_type, pred_mask);
-                                benchmark_sync_reader_with_policy(
-                                    reader,
-                                    projection_mask,
-                                    row_filter,
-                                    RowSelectionPolicy::Selectors,
-                                )
-                            }
-                            SyncStrategy::PushdownMask => {
-                                let row_filter = row_filter_for(filter_type, pred_mask);
-                                benchmark_sync_reader_with_policy(
-                                    reader,
-                                    projection_mask,
-                                    row_filter,
-                                    RowSelectionPolicy::Mask,
-                                )
-                            }
-                        }
-                    });
-                });
-            }
-        }
-    }
-}
-
-/// Compare async full scan plus post-filtering against async row-level pushdown
-/// strategies. This is the matrix that exercises the current reader `Auto`
-/// policy through the async stream backed by the push decoder row-group pipeline.
-fn benchmark_async_strategy_matrix(c: &mut Criterion) {
-    let parquet_file = Bytes::from(write_parquet_file());
-    let filter_types = [
-        FilterType::SelectiveUnclustered,
-        FilterType::ModeratelySelectiveClustered,
-        FilterType::ModeratelySelectiveUnclustered,
-        FilterType::Utf8ViewNonEmpty,
-    ];
-    let strategies = [
-        AsyncStrategy::FullPostFilter,
-        AsyncStrategy::PushdownAuto,
-        AsyncStrategy::PushdownSelectors,
-        AsyncStrategy::PushdownMask,
-    ];
-
-    let rt = tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .build()
-        .unwrap();
-
-    let mut group = c.benchmark_group("arrow_reader_row_filter_async_strategy_matrix");
-
-    for filter_type in filter_types {
-        for projection_case in [
-            ProjectionCase::AllColumns,
-            ProjectionCase::ExcludeFilterColumn,
-        ] {
-            let reader = InMemoryReader::try_new(&parquet_file).unwrap();
-            let metadata = Arc::clone(reader.metadata());
-            let schema_descr = metadata.file_metadata().schema_descr();
-            let output_projection = output_projection_for(filter_type, &projection_case);
-            let read_projection = full_post_filter_read_projection(filter_type, &output_projection);
-            let output_column_names = projection_names(&output_projection);
-            let projection_mask = ProjectionMask::roots(schema_descr, output_projection);
-            let read_projection_mask = ProjectionMask::roots(schema_descr, read_projection);
-            let pred_mask = ProjectionMask::roots(
-                schema_descr,
-                filter_type.filter_projection().iter().copied(),
-            );
-
-            for strategy in strategies {
-                let bench_id = BenchmarkId::new(
-                    format!("{filter_type}/{projection_case}"),
-                    strategy.to_string(),
-                );
-                let rt_captured = rt.handle().clone();
-
-                group.bench_function(bench_id, |b| {
-                    b.iter(|| {
-                        let reader = reader.clone();
-                        let pred_mask = pred_mask.clone();
-                        let projection_mask = projection_mask.clone();
-                        let read_projection_mask = read_projection_mask.clone();
-                        let output_column_names = output_column_names.clone();
-
-                        rt_captured.block_on(async {
-                            match strategy {
-                                AsyncStrategy::FullPostFilter => {
-                                    benchmark_async_reader_post_filter(
-                                        reader,
-                                        read_projection_mask,
-                                        output_column_names,
-                                        filter_type,
-                                    )
-                                    .await
-                                }
-                                AsyncStrategy::PushdownAuto => {
-                                    let row_filter = row_filter_for(filter_type, pred_mask);
-                                    benchmark_async_reader_with_policy(
-                                        reader,
-                                        projection_mask,
-                                        row_filter,
-                                        RowSelectionPolicy::default(),
-                                    )
-                                    .await
-                                }
-                                AsyncStrategy::PushdownSelectors => {
-                                    let row_filter = row_filter_for(filter_type, pred_mask);
-                                    benchmark_async_reader_with_policy(
-                                        reader,
-                                        projection_mask,
-                                        row_filter,
-                                        RowSelectionPolicy::Selectors,
-                                    )
-                                    .await
-                                }
-                                AsyncStrategy::PushdownMask => {
-                                    let row_filter = row_filter_for(filter_type, pred_mask);
-                                    benchmark_async_reader_with_policy(
-                                        reader,
-                                        projection_mask,
-                                        row_filter,
-                                        RowSelectionPolicy::Mask,
-                                    )
-                                    .await
-                                }
-                            }
-                        })
-                    });
-                });
-            }
-        }
-    }
-}
-
 /// A focused async-only matrix that isolates the cases most relevant to the
 /// row-filter Auto policy. This is intentionally narrower than
-/// [`benchmark_async_strategy_matrix`]: it keeps the benchmark output focused
+/// the smaller row-filter strategy matrix: it keeps the benchmark output focused
 /// on cases where later PRs may teach `Auto` to switch execution modes or
 /// explicitly keep predicate pushdown.
 ///
@@ -1638,63 +1261,6 @@ fn push_clustered_projected_predicate_cases(cases: &mut Vec<AsyncFocusCase>, par
     );
 }
 
-/// Isolate projected scans that do not construct a [`RowFilter`].
-///
-/// This tracks the reader-level shape seen in TPC-DS Q83 return-table scans:
-/// a narrow primitive projection where row-level pushdown metrics are zero.
-/// It deliberately lives outside the adaptive-materialization matrix because there is no
-/// filter strategy to choose.
-///
-/// ```text
-/// no RowFilter             projected primitive columns
-/// ┌───────────────┐    ┌───────────────┐
-/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-/// │      ...      │    │      ...      │
-/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-/// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-/// └───────────────┘    └───────────────┘
-/// ```
-fn benchmark_projection_scan_focus(c: &mut Criterion) {
-    let parquet_file = Bytes::from(write_parquet_file());
-    let rt = tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .build()
-        .unwrap();
-
-    let mut group = c.benchmark_group("arrow_reader_projection_scan_focus");
-
-    let case_name = "primitive_projection_only";
-    let projection = vec![0, 1, 3];
-    let reader = InMemoryReader::try_new(&parquet_file).unwrap();
-    let metadata = Arc::clone(reader.metadata());
-    let schema_descr = metadata.file_metadata().schema_descr();
-    let projection_mask = ProjectionMask::roots(schema_descr, projection);
-
-    let bench_id = BenchmarkId::new(case_name, "async");
-    let rt_captured = rt.handle().clone();
-    group.bench_function(bench_id, |b| {
-        b.iter(|| {
-            let reader = reader.clone();
-            let projection_mask = projection_mask.clone();
-            rt_captured.block_on(benchmark_async_reader_projected(reader, projection_mask));
-        });
-    });
-
-    let bench_id = BenchmarkId::new(case_name, "sync");
-    group.bench_function(bench_id, |b| {
-        b.iter(|| {
-            let reader = reader.clone();
-            let projection_mask = projection_mask.clone();
-            benchmark_sync_reader_projected(reader, projection_mask);
-        });
-    });
-}
-
 struct AsyncFocusCase {
     case_name: &'static str,
     parquet_file: Bytes,
@@ -1992,58 +1558,6 @@ fn row_filter_for_focus_case(
     }
 }
 
-#[derive(Clone, Copy)]
-enum NestedFilterType {
-    AlwaysTrueTag,
-    TagNotZero,
-}
-
-impl std::fmt::Display for NestedFilterType {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::AlwaysTrueTag => write!(f, "always_true_tag"),
-            Self::TagNotZero => write!(f, "tag_not_zero"),
-        }
-    }
-}
-
-impl NestedFilterType {
-    fn filter_batch(self, batch: &RecordBatch) -> arrow::error::Result<BooleanArray> {
-        match self {
-            Self::AlwaysTrueTag => Ok(BooleanArray::from(vec![true; batch.num_rows()])),
-            Self::TagNotZero => {
-                let tag = batch.column(batch.schema().index_of("tag")?);
-                let scalar = StringViewArray::new_scalar("tag_0");
-                neq(tag, &scalar)
-            }
-        }
-    }
-}
-
-fn nested_row_filter_for(filter_type: NestedFilterType, pred_mask: ProjectionMask) -> RowFilter {
-    let filter = ArrowPredicateFn::new(pred_mask, move |batch| filter_type.filter_batch(&batch));
-    RowFilter::new(vec![Box::new(filter)])
-}
-
-/// Use async API
-async fn benchmark_async_reader(
-    reader: InMemoryReader,
-    projection_mask: ProjectionMask,
-    row_filter: RowFilter,
-) {
-    let mut stream = ParquetRecordBatchStreamBuilder::new(reader)
-        .await
-        .unwrap()
-        .with_batch_size(8192)
-        .with_projection(projection_mask)
-        .with_row_filter(row_filter)
-        .build()
-        .unwrap();
-    while let Some(b) = stream.next().await {
-        b.unwrap(); // consume the batches, no buffering
-    }
-}
-
 async fn benchmark_async_reader_with_policy(
     reader: InMemoryReader,
     projection_mask: ProjectionMask,
@@ -2087,138 +1601,6 @@ async fn benchmark_async_reader_post_filter(
     }
 }
 
-async fn benchmark_async_reader_post_filter_nested(
-    reader: InMemoryReader,
-    read_projection: ProjectionMask,
-    output_column_names: &[&str],
-    filter_type: NestedFilterType,
-) {
-    let mut stream = ParquetRecordBatchStreamBuilder::new(reader)
-        .await
-        .unwrap()
-        .with_batch_size(8192)
-        .with_projection(read_projection)
-        .build()
-        .unwrap();
-
-    while let Some(b) = stream.next().await {
-        let batch = b.unwrap();
-        let filter = filter_type.filter_batch(&batch).unwrap();
-        let output_rows =
-            post_filter_projected_num_rows(&batch, &filter, output_column_names).unwrap();
-        std::hint::black_box(output_rows);
-    }
-}
-
-async fn benchmark_async_reader_projected(reader: InMemoryReader, projection_mask: ProjectionMask) {
-    let mut stream = ParquetRecordBatchStreamBuilder::new(reader)
-        .await
-        .unwrap()
-        .with_batch_size(8192)
-        .with_projection(projection_mask)
-        .build()
-        .unwrap();
-    while let Some(b) = stream.next().await {
-        let batch = b.unwrap();
-        std::hint::black_box(batch.num_rows());
-    }
-}
-
-/// Like [`benchmark_async_reader`] but also threads `with_limit(limit)` into
-/// the stream builder. Used by the `LIMIT` benchmark below.
-async fn benchmark_async_reader_with_limit(
-    reader: InMemoryReader,
-    projection_mask: ProjectionMask,
-    row_filter: RowFilter,
-    limit: usize,
-) {
-    let mut stream = ParquetRecordBatchStreamBuilder::new(reader)
-        .await
-        .unwrap()
-        .with_batch_size(8192)
-        .with_projection(projection_mask)
-        .with_row_filter(row_filter)
-        .with_limit(limit)
-        .build()
-        .unwrap();
-    while let Some(b) = stream.next().await {
-        b.unwrap(); // consume the batches, no buffering
-    }
-}
-
-/// Use sync API
-fn benchmark_sync_reader(
-    reader: InMemoryReader,
-    projection_mask: ProjectionMask,
-    row_filter: RowFilter,
-) {
-    let stream = ParquetRecordBatchReaderBuilder::try_new(reader.into_inner())
-        .unwrap()
-        .with_batch_size(8192)
-        .with_projection(projection_mask)
-        .with_row_filter(row_filter)
-        .build()
-        .unwrap();
-    for b in stream {
-        b.unwrap(); // consume the batches, no buffering
-    }
-}
-
-fn benchmark_sync_reader_with_policy(
-    reader: InMemoryReader,
-    projection_mask: ProjectionMask,
-    row_filter: RowFilter,
-    row_selection_policy: RowSelectionPolicy,
-) {
-    let stream = ParquetRecordBatchReaderBuilder::try_new(reader.into_inner())
-        .unwrap()
-        .with_batch_size(8192)
-        .with_projection(projection_mask)
-        .with_row_filter(row_filter)
-        .with_row_selection_policy(row_selection_policy)
-        .build()
-        .unwrap();
-    for b in stream {
-        b.unwrap(); // consume the batches, no buffering
-    }
-}
-
-fn benchmark_sync_reader_post_filter(
-    reader: InMemoryReader,
-    read_projection: ProjectionMask,
-    output_column_names: Vec<&'static str>,
-    filter_type: FilterType,
-) {
-    let stream = ParquetRecordBatchReaderBuilder::try_new(reader.into_inner())
-        .unwrap()
-        .with_batch_size(8192)
-        .with_projection(read_projection)
-        .build()
-        .unwrap();
-
-    for b in stream {
-        let batch = b.unwrap();
-        let filter = filter_type.filter_batch(&batch).unwrap();
-        let output_rows =
-            post_filter_projected_num_rows(&batch, &filter, &output_column_names).unwrap();
-        std::hint::black_box(output_rows);
-    }
-}
-
-fn benchmark_sync_reader_projected(reader: InMemoryReader, projection_mask: ProjectionMask) {
-    let stream = ParquetRecordBatchReaderBuilder::try_new(reader.into_inner())
-        .unwrap()
-        .with_batch_size(8192)
-        .with_projection(projection_mask)
-        .build()
-        .unwrap();
-
-    for b in stream {
-        let batch = b.unwrap();
-        std::hint::black_box(batch.num_rows());
-    }
-}
-
 /// Adapter to read asynchronously from in memory bytes and always loads the
 /// metadata with page indexes.
 #[derive(Debug, Clone)]
@@ -2244,10 +1626,6 @@ impl InMemoryReader {
     fn metadata(&self) -> &Arc<ParquetMetaData> {
         &self.metadata
     }
-
-    fn into_inner(self) -> Bytes {
-        self.inner
-    }
 }
 
 impl AsyncFileReader for InMemoryReader {
@@ -2265,170 +1643,5 @@ impl AsyncFileReader for InMemoryReader {
     }
 }
 
-/// Benchmark filters with `LIMIT` short-circuit (`with_limit(N)`)
-///
-/// `PointLookup` is excluded because the filter has only 1 match in the
-/// whole file; `LIMIT 10` is not binding.
-fn benchmark_filters_with_limit(c: &mut Criterion) {
-    const LIMIT: usize = 10;
-
-    let parquet_file = Bytes::from(write_parquet_file());
-    let filter_types = vec![
-        FilterType::SelectiveUnclustered,
-        FilterType::ModeratelySelectiveClustered,
-        FilterType::ModeratelySelectiveUnclustered,
-        FilterType::UnselectiveUnclustered,
-        FilterType::UnselectiveClustered,
-        FilterType::Utf8ViewNonEmpty,
-        FilterType::Composite,
-    ];
-    let projection_cases = vec![
-        ProjectionCase::AllColumns,
-        ProjectionCase::ExcludeFilterColumn,
-    ];
-
-    let rt = tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .build()
-        .unwrap();
-
-    let mut group = c.benchmark_group("arrow_reader_row_filter_limit");
-
-    for filter_type in filter_types {
-        for proj_case in &projection_cases {
-            let filter_col = filter_type.filter_projection().to_vec();
-            let output_projection = output_projection_for(filter_type, proj_case);
-
-            let reader = InMemoryReader::try_new(&parquet_file).unwrap();
-            let metadata = Arc::clone(reader.metadata());
-            let schema_descr = metadata.file_metadata().schema_descr();
-            let projection_mask = ProjectionMask::roots(schema_descr, output_projection);
-            let pred_mask = ProjectionMask::roots(schema_descr, filter_col);
-
-            let benchmark_name = format!("{filter_type}/{proj_case}/limit{LIMIT}");
-
-            // async variant
-            let bench_id = BenchmarkId::new(benchmark_name.clone(), "async");
-            let rt_handle = rt.handle().clone();
-            let pred_mask_async = pred_mask.clone();
-            let projection_mask_async = projection_mask.clone();
-            let reader_async = reader.clone();
-            group.bench_function(bench_id, |b| {
-                b.iter(|| {
-                    let reader = reader_async.clone();
-                    let pred_mask = pred_mask_async.clone();
-                    let projection_mask = projection_mask_async.clone();
-                    // RowFilter and ArrowPredicateFn are not Clone — fresh each iter.
-                    let predicate = ArrowPredicateFn::new(pred_mask, move |batch: RecordBatch| {
-                        Ok(filter_type.filter_batch(&batch).unwrap())
-                    });
-                    let row_filter = RowFilter::new(vec![Box::new(predicate)]);
-                    rt_handle.block_on(benchmark_async_reader_with_limit(
-                        reader,
-                        projection_mask,
-                        row_filter,
-                        LIMIT,
-                    ));
-                });
-            });
-        }
-    }
-}
-
-/// Focused nested-output case for comparing manual post-filtering against
-/// row-filter pushdown policies.
-///
-/// The predicate column is an unprojected variable-width scalar column, and the
-/// output is a whole nested `Struct` root. This isolates the reader case enabled
-/// by root-aware post-filter projection without requiring recursive nested-child
-/// projection.
-fn benchmark_async_nested_post_filter_focus(c: &mut Criterion) {
-    let parquet_file = Bytes::from(write_nested_parquet_file_with_rows(
-        TOTAL_ROWS,
-        ROW_GROUP_SIZE,
-    ));
-    let strategies = [
-        AsyncStrategy::FullPostFilter,
-        AsyncStrategy::PushdownAuto,
-        AsyncStrategy::PushdownMask,
-        AsyncStrategy::PushdownSelectors,
-    ];
-
-    let rt = tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .build()
-        .unwrap();
-
-    let mut group = c.benchmark_group("arrow_reader_row_filter_async_nested_post_filter_focus");
-    let reader = InMemoryReader::try_new(&parquet_file).unwrap();
-    let metadata = Arc::clone(reader.metadata());
-    let schema_descr = metadata.file_metadata().schema_descr();
-    let output_projection = ProjectionMask::columns(schema_descr, ["payload"]);
-    let read_projection = ProjectionMask::columns(schema_descr, ["tag", "payload"]);
-    let pred_mask = ProjectionMask::columns(schema_descr, ["tag"]);
-    let filter_cases = [
-        NestedFilterType::AlwaysTrueTag,
-        NestedFilterType::TagNotZero,
-    ];
-
-    for filter_case in filter_cases {
-        for strategy in strategies {
-            let bench_id = BenchmarkId::new(
-                format!("whole_struct_output/{filter_case}"),
-                strategy.to_string(),
-            );
-            let rt_captured = rt.handle().clone();
-            group.bench_function(bench_id, |b| {
-                b.iter(|| {
-                    let reader = reader.clone();
-                    let pred_mask = pred_mask.clone();
-                    let output_projection = output_projection.clone();
-                    let read_projection = read_projection.clone();
-                    rt_captured.block_on(async {
-                        match strategy {
-                            AsyncStrategy::FullPostFilter => {
-                                benchmark_async_reader_post_filter_nested(
-                                    reader,
-                                    read_projection,
-                                    &["payload"],
-                                    filter_case,
-                                )
-                                .await
-                            }
-                            AsyncStrategy::PushdownAuto => {
-                                benchmark_async_reader_with_policy(
-                                    reader,
-                                    output_projection,
-                                    nested_row_filter_for(filter_case, pred_mask),
-                                    RowSelectionPolicy::default(),
-                                )
-                                .await
-                            }
-                            AsyncStrategy::PushdownSelectors => {
-                                benchmark_async_reader_with_policy(
-                                    reader,
-                                    output_projection,
-                                    nested_row_filter_for(filter_case, pred_mask),
-                                    RowSelectionPolicy::Selectors,
-                                )
-                                .await
-                            }
-                            AsyncStrategy::PushdownMask => {
-                                benchmark_async_reader_with_policy(
-                                    reader,
-                                    output_projection,
-                                    nested_row_filter_for(filter_case, pred_mask),
-                                    RowSelectionPolicy::Mask,
-                                )
-                                .await
-                            }
-                        }
-                    })
-                });
-            });
-        }
-    }
-}
-
 criterion_group!(benches, benchmark_async_auto_policy_focus,);
 criterion_main!(benches);
diff --git a/parquet/benches/arrow_reader_row_filter.rs b/parquet/benches/arrow_reader_row_filter.rs
index e1e634e43b14..9b00a3876fde 100644
--- a/parquet/benches/arrow_reader_row_filter.rs
+++ b/parquet/benches/arrow_reader_row_filter.rs
@@ -58,8 +58,8 @@
 use arrow::array::{
     ArrayRef, BooleanArray, Float64Array, Int64Array, StructArray, TimestampMillisecondArray,
 };
+use arrow::compute::and;
 use arrow::compute::kernels::cmp::{eq, gt, lt, lt_eq, neq};
-use arrow::compute::{and, or};
 use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
 use arrow::record_batch::RecordBatch;
 use arrow_array::StringViewArray;
@@ -262,20 +262,10 @@ fn write_nested_parquet_file_with_rows(total_rows: usize, row_group_size: usize)
 
 /// ProjectionCase defines the projection mode for the benchmark:
 /// either projecting all columns or excluding the column that is used for filtering.
-#[allow(dead_code)]
 #[derive(Clone, Copy)]
 enum ProjectionCase {
     AllColumns,
     ExcludeFilterColumn,
-    FilterColumnsOnly,
-    CountOnly,
-    FixedColumns,
-    Float64AndTs,
-    Float64Only,
-    Int64AndFloat64,
-    Int64AndUtf8,
-    TsAndUtf8,
-    Utf8Only,
 }
 
 impl std::fmt::Display for ProjectionCase {
@@ -283,15 +273,6 @@ impl std::fmt::Display for ProjectionCase {
         match self {
             ProjectionCase::AllColumns => write!(f, "all_columns"),
             ProjectionCase::ExcludeFilterColumn => write!(f, "exclude_filter_column"),
-            ProjectionCase::FilterColumnsOnly => write!(f, "filter_columns_only"),
-            ProjectionCase::CountOnly => write!(f, "count_only"),
-            ProjectionCase::FixedColumns => write!(f, "fixed_columns"),
-            ProjectionCase::Float64AndTs => write!(f, "float64_and_ts"),
-            ProjectionCase::Float64Only => write!(f, "float64_only"),
-            ProjectionCase::Int64AndFloat64 => write!(f, "int64_and_float64"),
-            ProjectionCase::Int64AndUtf8 => write!(f, "int64_and_utf8"),
-            ProjectionCase::TsAndUtf8 => write!(f, "ts_and_utf8"),
-            ProjectionCase::Utf8Only => write!(f, "utf8_only"),
         }
     }
 }
@@ -336,391 +317,16 @@ impl std::fmt::Display for AsyncStrategy {
 
 /// FilterType encapsulates the different filter comparisons.
 /// The variants correspond to the different filter patterns.
-#[allow(dead_code)]
 #[derive(Clone, Copy, Debug)]
 pub(crate) enum FilterType {
-    /// "Point Lookup": selects a single row
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │               │    │               │
-    /// │               │    │      ...      │
-    /// │               │    │               │
-    /// │               │    │               │
-    /// │     ...       │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// │               │    │      ...      │
-    /// │               │    │               │
-    /// │               │    │               │
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    /// (1 RowSelection of 1 row)
     PointLookup,
-    /// selective (1%) unclustered filter
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │      ...      │    │               │
-    /// │               │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
-    /// │               │    │      ...      │
-    /// │               │    │               │
-    /// │               │    │               │
-    /// │      ...      │    │               │
-    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    /// (1000 RowSelection of 10 rows each)
     SelectiveUnclustered,
-    /// moderately selective (10%) clustered filter
-    /// ```text
-    ///  ┌───────────────┐    ┌───────────────┐
-    ///  │               │    │               │
-    ///  │               │    │               │
-    ///  │               │    │     ...       │
-    ///  │               │    │               │
-    ///  │     ...       │    │               │
-    ///  │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    ///  │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    ///  │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    ///  │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    ///  └───────────────┘    └───────────────┘
-    /// ```
-    /// (10 RowSelections of 10,000 rows each)
     ModeratelySelectiveClustered,
-    /// moderately selective (10%) clustered filter
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │      ...      │    │               │
-    /// │               │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
-    /// │               │    │               │
-    /// │               │    │      ...      │
-    /// │      ...      │    │               │
-    /// │               │    │               │
-    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    /// (10 RowSelections of 10,000 rows each)
     ModeratelySelectiveUnclustered,
-    /// unselective (99%) unclustered filter
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    /// (99,000 RowSelections of 10 rows each)
     UnselectiveUnclustered,
-    /// unselective (90%) clustered filter
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │               │    │               │
-    /// │               │    │               │
-    /// │               │    │     ...       │
-    /// │               │    │               │
-    /// │     ...       │    │               │
-    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    /// (99 RowSelection of 10,000 rows each)
     UnselectiveClustered,
-    /// [`Self::SelectivelUnclusered`] `AND`
-    /// [`Self::ModeratelySelectiveClustered`]
     Composite,
-    /// `utf8View <> ''` modeling [ClickBench] [Q21-Q27]
-    ///
-    /// [ClickBench]: https://github.com/ClickHouse/ClickBench
-    /// [Q21-Q27]: https://github.com/apache/datafusion/blob/b7177234e65cbbb2dcc04c252f6acd80bb026362/benchmarks/queries/clickbench/queries.sql#L22-L28
     Utf8ViewNonEmpty,
-
-    // Deferred-output shapes. Predicate columns are separate from the output,
-    // so rejected rows can skip output-column decoding.
-    /// Scalar-prefix shape derived from DataFusion ClickBench Q37:
-    ///
-    /// ```sql
-    /// WHERE CounterID = 62
-    ///   AND EventDate BETWEEN ...
-    ///   AND DontCountHits = 0
-    ///   AND IsRefresh = 0
-    ///   AND Title <> ''
-    /// ```
-    ///
-    /// DataFusion `Auto` does not push down the `Title <> ''` string predicate,
-    /// but it can push down the scalar prefix to defer decoding `Title`.
-    /// Fragmented ~0.9% selection: approx 4,500 selected rows in 500K.
-    ///
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │               │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    ScalarPrefixUtf8Output,
-    /// Sparse fragmented scalar predicates (~7%, approx 36,000 selected rows
-    /// in 500K) with a cheap fixed-width output projection, derived from a
-    /// ClickBench Q41-like shape.
-    ///
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// │               │    │               │
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    SparseScalarFixedOutput,
-    /// Scalar range predicate derived from TPC-DS Q9 `ss_quantity BETWEEN ...`
-    /// subqueries. The selected rows are random and moderately selective, and
-    /// benchmark projections cover both count-only and numeric aggregate cases.
-    /// Fragmented ~20% selection: approx 100,000 selected rows in 500K.
-    ///
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │               │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │               │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    QuantityRangePredicate,
-
-    // Multi-predicate shapes. These focus predicate ordering and predicate
-    // evaluation cost independently of projection cost.
-    /// Predicate-order shape derived from DataFusion ClickBench extended Q6:
-    /// an early cheap fixed-width predicate can prune almost all rows before a
-    /// later unprojected variable-width predicate is decoded.
-    /// Point-lookup prefix: at most 1 row reaches the variable-width predicate.
-    ///
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │      ...      │
-    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │      ...      │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │      ...      │
-    /// │               │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    FixedThenVarWidthPredicates,
-    /// Same scalar + variable-width predicate columns as
-    /// [`Self::FixedThenVarWidthPredicates`], but with the variable-width
-    /// predicate evaluated first. This anchors the static post-filter gate
-    /// against predicate-order drift.
-    /// At most 1 row survives the final point lookup.
-    ///
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │      ...      │
-    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │      ...      │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │      ...      │
-    /// │               │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    VarWidthThenFixedPredicates,
-    /// Multiple cheap scalar predicates, very small output, and projected
-    /// predicate columns used later by grouping. Derived from ClickBench Q40.
-    /// Fragmented ~0.8% selection: approx 4,000 selected rows in 500K.
-    ///
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │               │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    MultiScalarProjectedKey,
-    /// Complex OR predicate over dictionary/string-like and scalar columns
-    /// where predicate evaluation dominates reader time. Derived from TPC-DS
-    /// Q41.
-    /// Mixed string/scalar OR branches select approx 1% of rows.
-    ///
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// │      ...      │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │      ...      │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
-    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │      ...      │    │               │
-    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    ComplexOrMixedPredicates,
-
-    // Projected-predicate shapes. At least one predicate column is also needed
-    // in the final projection.
-    /// Multiple fixed-width dynamic filters where predicate columns are also
-    /// projected. Derived from TPC-DS Q20 catalog_sales.
-    /// Fragmented ~11% selection: approx 54,000 selected rows in 500K.
-    ///
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │               │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    ProjectedDynamicFilters,
-    /// Shape of TPC-DS Q21 after dynamic-filter pruning: sparse fragmented
-    /// fixed-width predicates where the final projection still includes the
-    /// predicate columns. This protects against choosing selectors for columns
-    /// that were already decoded/cached by predicate evaluation.
-    /// Fragmented ~7% selection: approx 36,000 selected rows in 500K.
-    ///
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// │               │    │               │
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    SparseProjectedPredicatesFixedOutput,
-    /// Projected-predicate shape derived from TPC-DS Q2 fact scans: the
-    /// dynamic filter applies to the date key, the same date key is projected,
-    /// and an additional fixed-width sales value can still be deferred by
-    /// predicate pushdown.
-    /// Selectivity ranges from 1% to 50%: approx 5K to 250K selected rows in
-    /// 500K.
-    /// The 1% variants also cover a TPC-DS Q41-like item scan where predicate
-    /// and output overlap, selection is highly fragmented, and the deferred
-    /// output payload is small enough that post-filtering can be faster than
-    /// row-filter pushdown.
-    ///
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │               │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │      ...      │    │      ...      │
-    /// │               │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    ProjectedPredicate1Pct,
-    ProjectedPredicate5Pct,
-    ProjectedPredicate8Pct,
-    ProjectedPredicate10Pct,
-    ProjectedPredicate20Pct,
-    ProjectedPredicate30Pct,
-    ProjectedPredicate40Pct,
-    ProjectedPredicate50Pct,
-    /// Exact shape for the projected-predicate moderate-selectivity gate:
-    /// a clustered 20% timestamp predicate where the predicate column is
-    /// projected and the deferred output is variable-width.
-    /// Clustered 8% or 20% selection: 40,000 or 100,000 selected rows in 500K.
-    ///
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │               │    │               │
-    /// │               │    │               │
-    /// │               │    │      ...      │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │      ...      │    │      ...      │
-    /// │               │    │               │
-    /// │               │    │               │
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    ClusteredTs8PctProjectedPredicate,
-    ClusteredTs20PctProjectedPredicate,
-    /// Sparse variable-width predicate shaped like TPC-DS Q83 dynamic
-    /// `i_item_id` filters, where the predicate column is also projected.
-    /// Sparse 0.1% selection: 500 sentinel rows in 500K, one every 1,000 rows.
-    ///
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │               │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    Utf8ViewMissing,
-    /// Very sparse projected fixed-width scan shaped like TPC-DS fact-table
-    /// filters where the predicate column is also needed in the output projection.
-    /// Sparse 0.1% selection: 500 rows in 500K, one timestamp match every
-    /// 1,000 rows.
-    ///
-    /// ```text
-    /// ┌───────────────┐    ┌───────────────┐
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │               │
-    /// │               │    │               │
-    /// │      ...      │    │      ...      │
-    /// │               │    │               │
-    /// │               │    │               │
-    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// └───────────────┘    └───────────────┘
-    /// ```
-    SparseProjectedFactScan,
 }
 
 impl std::fmt::Display for FilterType {
@@ -734,47 +340,6 @@ impl std::fmt::Display for FilterType {
             FilterType::UnselectiveClustered => "ts < 9000",
             FilterType::Composite => "float64 > 99.0 AND ts >= 9000",
             FilterType::Utf8ViewNonEmpty => "utf8View <> ''",
-            FilterType::Utf8ViewMissing => "utf8View == '<missing>'",
-            FilterType::ScalarPrefixUtf8Output => "int64 == 62 AND ts < 9000",
-            FilterType::FixedThenVarWidthPredicates => "int64 == 9999 AND utf8View <> ''",
-            FilterType::VarWidthThenFixedPredicates => "utf8View <> '' AND int64 == 9999",
-            FilterType::SparseScalarFixedOutput => "int64 < 8 AND ts < 9000",
-            FilterType::MultiScalarProjectedKey => "int64 == 62 AND float64 > 10.0 AND ts < 9000",
-            FilterType::ComplexOrMixedPredicates => {
-                "(utf8View <> '' AND int64 < 8) OR (ts < 100 AND float64 > 95.0)"
-            }
-            FilterType::ProjectedDynamicFilters => {
-                "int64 < 12 AND ts < 9000 projected dynamic filters"
-            }
-            FilterType::SparseProjectedPredicatesFixedOutput => {
-                "int64 < 8 AND ts < 9000 projected predicates"
-            }
-            FilterType::ProjectedPredicate1Pct => "int64 < 1 projected predicate",
-            FilterType::ProjectedPredicate10Pct => {
-                "int64 < 10 projected predicate with fixed output"
-            }
-            FilterType::ProjectedPredicate5Pct => "int64 < 5 projected predicate with fixed output",
-            FilterType::ProjectedPredicate8Pct => "int64 < 8 projected predicate with fixed output",
-            FilterType::ProjectedPredicate20Pct => {
-                "int64 < 20 projected predicate with fixed output"
-            }
-            FilterType::ProjectedPredicate30Pct => {
-                "int64 < 30 projected predicate with fixed output"
-            }
-            FilterType::ProjectedPredicate40Pct => {
-                "int64 < 40 projected predicate with fixed output"
-            }
-            FilterType::ProjectedPredicate50Pct => {
-                "int64 < 50 projected predicate with fixed output"
-            }
-            FilterType::QuantityRangePredicate => "int64 > 0 AND int64 < 21",
-            FilterType::ClusteredTs20PctProjectedPredicate => {
-                "ts < 2000 projected predicate with utf8 output"
-            }
-            FilterType::ClusteredTs8PctProjectedPredicate => {
-                "ts < 800 projected predicate with utf8 output"
-            }
-            FilterType::SparseProjectedFactScan => "ts % 1000 == 0",
         };
         write!(f, "{s}")
     }
@@ -829,115 +394,6 @@ impl FilterType {
                 let scalar = StringViewArray::new_scalar("");
                 neq(array, &scalar)
             }
-            FilterType::Utf8ViewMissing => {
-                let array = batch.column(batch.schema().index_of("utf8View")?);
-                let scalar = StringViewArray::new_scalar(UTF8_VIEW_MISSING_VALUE);
-                eq(array, &scalar)
-            }
-            // ScalarPrefixUtf8Output: a cheap fragmented scalar predicate
-            // evaluated before decoding a variable-width output column.
-            FilterType::ScalarPrefixUtf8Output => {
-                let int64 = batch.column(batch.schema().index_of("int64")?);
-                let ts = batch.column(batch.schema().index_of("ts")?);
-                let counter_match = eq(int64, &Int64Array::new_scalar(62))?;
-                let date_like_range = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
-                and(&counter_match, &date_like_range)
-            }
-            FilterType::FixedThenVarWidthPredicates | FilterType::VarWidthThenFixedPredicates => {
-                let int64 = batch.column(batch.schema().index_of("int64")?);
-                let utf8 = batch.column(batch.schema().index_of("utf8View")?);
-                let cheap_prefix = eq(int64, &Int64Array::new_scalar(9999))?;
-                let string_suffix = neq(utf8, &StringViewArray::new_scalar(""))?;
-                and(&cheap_prefix, &string_suffix)
-            }
-            FilterType::SparseScalarFixedOutput
-            | FilterType::SparseProjectedPredicatesFixedOutput => {
-                let int64 = batch.column(batch.schema().index_of("int64")?);
-                let ts = batch.column(batch.schema().index_of("ts")?);
-                let counter_like = lt(int64, &Int64Array::new_scalar(8))?;
-                let date_like = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
-                and(&counter_like, &date_like)
-            }
-            FilterType::MultiScalarProjectedKey => {
-                let int64 = batch.column(batch.schema().index_of("int64")?);
-                let float64 = batch.column(batch.schema().index_of("float64")?);
-                let ts = batch.column(batch.schema().index_of("ts")?);
-                let counter_match = eq(int64, &Int64Array::new_scalar(62))?;
-                let width_match = gt(float64, &Float64Array::new_scalar(10.0))?;
-                let date_like = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
-                and(&and(&counter_match, &width_match)?, &date_like)
-            }
-            FilterType::ComplexOrMixedPredicates => {
-                let int64 = batch.column(batch.schema().index_of("int64")?);
-                let float64 = batch.column(batch.schema().index_of("float64")?);
-                let utf8 = batch.column(batch.schema().index_of("utf8View")?);
-                let ts = batch.column(batch.schema().index_of("ts")?);
-                let string_branch = and(
-                    &neq(utf8, &StringViewArray::new_scalar(""))?,
-                    &lt(int64, &Int64Array::new_scalar(8))?,
-                )?;
-                let scalar_branch = and(
-                    &lt(ts, &TimestampMillisecondArray::new_scalar(100))?,
-                    &gt(float64, &Float64Array::new_scalar(95.0))?,
-                )?;
-                or(&string_branch, &scalar_branch)
-            }
-            FilterType::ProjectedDynamicFilters => {
-                let int64 = batch.column(batch.schema().index_of("int64")?);
-                let ts = batch.column(batch.schema().index_of("ts")?);
-                let item_like = lt(int64, &Int64Array::new_scalar(12))?;
-                let date_like = lt(ts, &TimestampMillisecondArray::new_scalar(9000))?;
-                and(&item_like, &date_like)
-            }
-            FilterType::ProjectedPredicate1Pct
-            | FilterType::ProjectedPredicate5Pct
-            | FilterType::ProjectedPredicate8Pct
-            | FilterType::ProjectedPredicate10Pct
-            | FilterType::ProjectedPredicate20Pct
-            | FilterType::ProjectedPredicate30Pct
-            | FilterType::ProjectedPredicate40Pct
-            | FilterType::ProjectedPredicate50Pct => {
-                let int64 = batch.column(batch.schema().index_of("int64")?);
-                let threshold = match self {
-                    FilterType::ProjectedPredicate1Pct => 1,
-                    FilterType::ProjectedPredicate5Pct => 5,
-                    FilterType::ProjectedPredicate8Pct => 8,
-                    FilterType::ProjectedPredicate10Pct => 10,
-                    FilterType::ProjectedPredicate20Pct => 20,
-                    FilterType::ProjectedPredicate30Pct => 30,
-                    FilterType::ProjectedPredicate40Pct => 40,
-                    FilterType::ProjectedPredicate50Pct => 50,
-                    _ => unreachable!(),
-                };
-                lt(int64, &Int64Array::new_scalar(threshold))
-            }
-            FilterType::QuantityRangePredicate => {
-                let int64 = batch.column(batch.schema().index_of("int64")?);
-                let lower = gt(int64, &Int64Array::new_scalar(0))?;
-                let upper = lt(int64, &Int64Array::new_scalar(21))?;
-                and(&lower, &upper)
-            }
-            FilterType::ClusteredTs8PctProjectedPredicate => {
-                let ts = batch.column(batch.schema().index_of("ts")?);
-                lt(ts, &TimestampMillisecondArray::new_scalar(800))
-            }
-            FilterType::ClusteredTs20PctProjectedPredicate => {
-                let ts = batch.column(batch.schema().index_of("ts")?);
-                lt(ts, &TimestampMillisecondArray::new_scalar(2000))
-            }
-            FilterType::SparseProjectedFactScan => {
-                let ts = batch
-                    .column(batch.schema().index_of("ts")?)
-                    .as_any()
-                    .downcast_ref::<TimestampMillisecondArray>()
-                    .unwrap();
-                Ok(BooleanArray::from(
-                    ts.values()
-                        .iter()
-                        .map(|value| value % 1000 == 0)
-                        .collect::<Vec<_>>(),
-                ))
-            }
         }
     }
 
@@ -951,28 +407,7 @@ impl FilterType {
             FilterType::UnselectiveUnclustered => &[1],
             FilterType::UnselectiveClustered => &[3],
             FilterType::Composite => &[1, 3], // Use float64 column and ts column as representative for composite
-            FilterType::Utf8ViewNonEmpty | FilterType::Utf8ViewMissing => &[2],
-            FilterType::ScalarPrefixUtf8Output => &[0, 3],
-            FilterType::FixedThenVarWidthPredicates | FilterType::VarWidthThenFixedPredicates => {
-                &[0, 2]
-            }
-            FilterType::MultiScalarProjectedKey => &[0, 1, 3],
-            FilterType::SparseScalarFixedOutput
-            | FilterType::ProjectedDynamicFilters
-            | FilterType::SparseProjectedPredicatesFixedOutput => &[0, 3],
-            FilterType::ComplexOrMixedPredicates => &[0, 1, 2, 3],
-            FilterType::ProjectedPredicate1Pct
-            | FilterType::ProjectedPredicate5Pct
-            | FilterType::ProjectedPredicate8Pct
-            | FilterType::ProjectedPredicate10Pct
-            | FilterType::ProjectedPredicate20Pct
-            | FilterType::ProjectedPredicate30Pct
-            | FilterType::ProjectedPredicate40Pct
-            | FilterType::ProjectedPredicate50Pct => &[0],
-            FilterType::QuantityRangePredicate => &[0],
-            FilterType::ClusteredTs8PctProjectedPredicate
-            | FilterType::ClusteredTs20PctProjectedPredicate => &[3],
-            FilterType::SparseProjectedFactScan => &[3],
+            FilterType::Utf8ViewNonEmpty => &[2],
         }
     }
 }
@@ -1334,15 +769,6 @@ fn output_projection_for(filter_type: FilterType, projection_case: &ProjectionCa
                     || !filter_columns.contains(idx)
             })
             .collect(),
-        ProjectionCase::FilterColumnsOnly => filter_columns.to_vec(),
-        ProjectionCase::CountOnly => vec![],
-        ProjectionCase::FixedColumns => vec![0, 1, 3],
-        ProjectionCase::Float64AndTs => vec![1, 3],
-        ProjectionCase::Float64Only => vec![1],
-        ProjectionCase::Int64AndFloat64 => vec![0, 1],
-        ProjectionCase::Int64AndUtf8 => vec![0, 2],
-        ProjectionCase::TsAndUtf8 => vec![2, 3],
-        ProjectionCase::Utf8Only => vec![2],
     }
 }
 

From e9493ea2b90576d8860cdc03ab22ca5feb92b34e Mon Sep 17 00:00:00 2001
From: Qiwei Huang <qiwei_huang@qq.com>
Date: Tue, 16 Jun 2026 22:06:46 +0800
Subject: [PATCH 13/14] Refresh parquet benchmark filter descriptions

---
 .../arrow_reader_materialization_policy.rs    |  31 ++---
 parquet/benches/arrow_reader_row_filter.rs    | 126 +++++++++++++++++-
 2 files changed, 140 insertions(+), 17 deletions(-)

diff --git a/parquet/benches/arrow_reader_materialization_policy.rs b/parquet/benches/arrow_reader_materialization_policy.rs
index e58b2ec936b6..e65a0cb6eeec 100644
--- a/parquet/benches/arrow_reader_materialization_policy.rs
+++ b/parquet/benches/arrow_reader_materialization_policy.rs
@@ -133,7 +133,7 @@ fn append_utf8_view_value(builder: &mut StringViewBuilder, value: &str) {
 /// * Number of RowSelections = 14054784
 /// * Average run length of each RowSelection: 99997497 / 14054784 = 7.114
 ///
-/// The properties of this array are:
+/// A 100K-row reference generated by this shape has:
 /// * Selectivity is: 15144 / 100000 = 0.15144
 /// * Number of RowSelections = 12904
 /// * Average run length of each RowSelection: 100000 / 12904 = 7.75
@@ -171,7 +171,8 @@ fn create_ts_array(size: usize) -> ArrayRef {
     Arc::new(TimestampMillisecondArray::from(values)) as ArrayRef
 }
 
-/// Creates a RecordBatch with 100K rows and 4 columns: int64, float64, utf8View, and ts.
+/// Creates a RecordBatch with `size` rows and 4 columns: int64, float64,
+/// utf8View, and ts.
 pub(crate) fn create_record_batch(size: usize) -> RecordBatch {
     let fields = vec![
         Field::new("int64", DataType::Int64, false),
@@ -315,9 +316,9 @@ pub(crate) enum FilterType {
     /// │               │    │               │
     /// └───────────────┘    └───────────────┘
     /// ```
-    /// (1000 RowSelection of 10 rows each)
+    /// (fragmented, approx 5K selected rows in 500K)
     SelectiveUnclustered,
-    /// moderately selective (10%) clustered filter
+    /// moderately selective (~9%) unclustered filter
     /// ```text
     /// ┌───────────────┐    ┌───────────────┐
     /// │      ...      │    │               │
@@ -331,7 +332,7 @@ pub(crate) enum FilterType {
     /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
     /// └───────────────┘    └───────────────┘
     /// ```
-    /// (10 RowSelections of 10,000 rows each)
+    /// (fragmented, approx 45K selected rows in 500K)
     ModeratelySelectiveUnclustered,
     /// unselective (99%) unclustered filter
     /// ```text
@@ -348,23 +349,23 @@ pub(crate) enum FilterType {
     /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
     /// └───────────────┘    └───────────────┘
     /// ```
-    /// (99,000 RowSelections of 10 rows each)
+    /// (fragmented, approx 495K selected rows in 500K)
     UnselectiveUnclustered,
     /// unselective (90%) clustered filter
     /// ```text
     /// ┌───────────────┐    ┌───────────────┐
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │      ...      │
     /// │               │    │               │
-    /// │               │    │               │
-    /// │               │    │     ...       │
-    /// │               │    │               │
-    /// │     ...       │    │               │
-    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
-    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
     /// └───────────────┘    └───────────────┘
     /// ```
-    /// (99 RowSelection of 10,000 rows each)
+    /// (50 selected runs of 9K rows each in 500K)
     UnselectiveClustered,
     /// `utf8View <> ''` modeling [ClickBench] [Q21-Q27]
     ///
diff --git a/parquet/benches/arrow_reader_row_filter.rs b/parquet/benches/arrow_reader_row_filter.rs
index 9b00a3876fde..32aac54ff771 100644
--- a/parquet/benches/arrow_reader_row_filter.rs
+++ b/parquet/benches/arrow_reader_row_filter.rs
@@ -135,7 +135,7 @@ fn append_utf8_view_value(builder: &mut StringViewBuilder, value: &str) {
 /// * Number of RowSelections = 14054784
 /// * Average run length of each RowSelection: 99997497 / 14054784 = 7.114
 ///
-/// The properties of this array are:
+/// A 100K-row reference generated by this shape has:
 /// * Selectivity is: 15144 / 100000 = 0.15144
 /// * Number of RowSelections = 12904
 /// * Average run length of each RowSelection: 100000 / 12904 = 7.75
@@ -173,7 +173,8 @@ fn create_ts_array(size: usize) -> ArrayRef {
     Arc::new(TimestampMillisecondArray::from(values)) as ArrayRef
 }
 
-/// Creates a RecordBatch with 100K rows and 4 columns: int64, float64, utf8View, and ts.
+/// Creates a RecordBatch with `size` rows and 4 columns: int64, float64,
+/// utf8View, and ts.
 pub(crate) fn create_record_batch(size: usize) -> RecordBatch {
     let fields = vec![
         Field::new("int64", DataType::Int64, false),
@@ -319,13 +320,134 @@ impl std::fmt::Display for AsyncStrategy {
 /// The variants correspond to the different filter patterns.
 #[derive(Clone, Copy, Debug)]
 pub(crate) enum FilterType {
+    /// point lookup: selects a single row in 500K.
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │               │    │      ...      │
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │      ...      │
+    /// │               │    │               │
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
     PointLookup,
+    /// selective (1%) unclustered filter: approx 5K selected rows in 500K.
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │      ...      │    │               │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │               │    │      ...      │
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │               │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
     SelectiveUnclustered,
+    /// moderately selective (10%) clustered filter: 50 selected runs of 1K
+    /// rows each in 500K.
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// └───────────────┘    └───────────────┘
+    /// ```
     ModeratelySelectiveClustered,
+    /// moderately selective (~9%) unclustered filter: approx 45K selected
+    /// rows in 500K.
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │      ...      │    │               │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │               │    │      ...      │
+    /// │      ...      │    │               │
+    /// │               │    │               │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// └───────────────┘    └───────────────┘
+    /// ```
     ModeratelySelectiveUnclustered,
+    /// unselective (99%) unclustered filter: approx 495K selected rows in
+    /// 500K.
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// └───────────────┘    └───────────────┘
+    /// ```
     UnselectiveUnclustered,
+    /// unselective (90%) clustered filter: 50 selected runs of 9K rows each
+    /// in 500K.
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │      ...      │
+    /// │               │    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
     UnselectiveClustered,
+    /// composite sparse filter: `SelectiveUnclustered` AND
+    /// `ModeratelySelectiveClustered`, approx 0.1% selected rows in 500K.
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │               │    │      ...      │
+    /// │               │    │               │
+    /// │               │    │               │
+    /// │      ...      │    │               │
+    /// │               │    │               │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
     Composite,
+    /// `utf8View <> ''` modeling [ClickBench] [Q21-Q27] with fragmented
+    /// short string runs and sentinel values every 1K rows.
+    /// ```text
+    /// ┌───────────────┐    ┌───────────────┐
+    /// │               │    │               │
+    /// │      ...      │    │      ...      │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │               │    │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│
+    /// │      ...      │    │      ...      │
+    /// │               │    │               │
+    /// │▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒│    │               │
+    /// └───────────────┘    └───────────────┘
+    /// ```
+    ///
+    /// [ClickBench]: https://github.com/ClickHouse/ClickBench
+    /// [Q21-Q27]: https://github.com/apache/datafusion/blob/b7177234e65cbbb2dcc04c252f6acd80bb026362/benchmarks/queries/clickbench/queries.sql#L22-L28
     Utf8ViewNonEmpty,
 }
 

From cf450b5f902936bf66b6ce1b769c4d3e0a6df9c9 Mon Sep 17 00:00:00 2001
From: Qiwei Huang <qiwei_huang@qq.com>
Date: Tue, 16 Jun 2026 22:29:58 +0800
Subject: [PATCH 14/14] ci: rerun rust lint