From a6d98094ef484315eb564a9b9d8e1569426ff3a4 Mon Sep 17 00:00:00 2001 From: Hu Shenggang Date: Fri, 22 May 2026 15:25:15 +0800 Subject: [PATCH] [refactor](be) Remove redundant remaining conjunct roots ### What problem does this PR solve? Issue Number: None Related PR: None Problem Summary: SegmentIterator kept a separate remaining_conjunct_roots list in addition to common_expr_ctxs_push_down. Both represented the same residual common expression state, so index pruning paths had to keep them synchronized and could drift when an expression was removed after index evaluation. This change removes the duplicated remaining_conjunct_roots plumbing and uses common_expr_ctxs_push_down as the single source of residual common expression state for condition cache decisions, lazy materialization column extraction, and expression execution checks. ### Release note None ### Check List (For Author) - Test: Build - ./build.sh --be - build-support/clang-format.sh - git diff --check - Behavior changed: No - Does this need documentation: No --- be/src/exec/scan/olap_scanner.cpp | 12 --------- be/src/storage/iterators.h | 2 -- be/src/storage/rowset/beta_rowset_reader.cpp | 1 - be/src/storage/rowset/rowset_reader_context.h | 1 - be/src/storage/segment/segment_iterator.cpp | 26 +++++-------------- be/src/storage/segment/segment_iterator.h | 4 +-- be/src/storage/tablet/tablet_reader.cpp | 1 - be/src/storage/tablet/tablet_reader.h | 1 - 8 files changed, 8 insertions(+), 40 deletions(-) diff --git a/be/src/exec/scan/olap_scanner.cpp b/be/src/exec/scan/olap_scanner.cpp index 3cfc9e22f9c323..8656576dbeb4cd 100644 --- a/be/src/exec/scan/olap_scanner.cpp +++ b/be/src/exec/scan/olap_scanner.cpp @@ -89,7 +89,6 @@ OlapScanner::OlapScanner(ScanLocalStateBase* parent, OlapScanner::Params&& param .rs_splits {}, .return_columns {}, .output_columns {}, - .remaining_conjunct_roots {}, .common_expr_ctxs_push_down {}, .topn_filter_source_node_ids {}, .key_group_cluster_key_idxes {}, @@ -316,17 +315,6 @@ Status OlapScanner::_init_tablet_reader_params( _tablet_reader_params.push_down_agg_type_opt = _local_state->get_push_down_agg_type(); - // TODO: If a new runtime filter arrives after `_conjuncts` move to `_common_expr_ctxs_push_down`, - if (_common_expr_ctxs_push_down.empty()) { - for (auto& conjunct : _conjuncts) { - _tablet_reader_params.remaining_conjunct_roots.emplace_back(conjunct->root()); - } - } else { - for (auto& ctx : _common_expr_ctxs_push_down) { - _tablet_reader_params.remaining_conjunct_roots.emplace_back(ctx->root()); - } - } - _tablet_reader_params.common_expr_ctxs_push_down = _common_expr_ctxs_push_down; _tablet_reader_params.virtual_column_exprs = _virtual_column_exprs; _tablet_reader_params.vir_cid_to_idx_in_block = _vir_cid_to_idx_in_block; diff --git a/be/src/storage/iterators.h b/be/src/storage/iterators.h index 192c17c29f0f33..ff2d48360155a7 100644 --- a/be/src/storage/iterators.h +++ b/be/src/storage/iterators.h @@ -124,8 +124,6 @@ class StorageReadOptions { // columns for orderby keys std::vector* read_orderby_key_columns = nullptr; io::IOContext io_ctx; - VExpr* remaining_vconjunct_root = nullptr; - std::vector remaining_conjunct_roots; VExprContextSPtrs common_expr_ctxs_push_down; const std::set* output_columns = nullptr; // runtime state diff --git a/be/src/storage/rowset/beta_rowset_reader.cpp b/be/src/storage/rowset/beta_rowset_reader.cpp index 3784d5360e6b06..9aa51eb47239d1 100644 --- a/be/src/storage/rowset/beta_rowset_reader.cpp +++ b/be/src/storage/rowset/beta_rowset_reader.cpp @@ -99,7 +99,6 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context _read_options.preferred_block_size_bytes = read_context->preferred_block_size_bytes; _read_options.stats = _stats; _read_options.push_down_agg_type_opt = _read_context->push_down_agg_type_opt; - _read_options.remaining_conjunct_roots = _read_context->remaining_conjunct_roots; _read_options.common_expr_ctxs_push_down = _read_context->common_expr_ctxs_push_down; _read_options.virtual_column_exprs = _read_context->virtual_column_exprs; diff --git a/be/src/storage/rowset/rowset_reader_context.h b/be/src/storage/rowset/rowset_reader_context.h index f78774aef86dab..dc25c3105d0624 100644 --- a/be/src/storage/rowset/rowset_reader_context.h +++ b/be/src/storage/rowset/rowset_reader_context.h @@ -66,7 +66,6 @@ struct RowsetReaderContext { const DeleteHandler* delete_handler = nullptr; OlapReaderStatistics* stats = nullptr; RuntimeState* runtime_state = nullptr; - std::vector remaining_conjunct_roots; VExprContextSPtrs common_expr_ctxs_push_down; bool use_page_cache = false; int sequence_id_idx = -1; diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index 8ac492769ea53a..4952ab557479e2 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -235,8 +235,7 @@ SegmentIterator::~SegmentIterator() = default; void SegmentIterator::_init_row_bitmap_by_condition_cache() { // Only dispose need column predicate and expr cal in condition cache - if (!_col_predicates.empty() || - (_enable_common_expr_pushdown && !_remaining_conjunct_roots.empty())) { + if (!_col_predicates.empty() || !_common_expr_ctxs_push_down.empty()) { if (_opts.condition_cache_digest) { auto* condition_cache = ConditionCache::instance(); ConditionCache::CacheKey cache_key(_opts.rowset_id, _segment->id(), @@ -522,8 +521,6 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { _initial_block_row_max = _opts.block_row_max; _block_size_predictor = _make_block_size_predictor(); - _remaining_conjunct_roots = opts.remaining_conjunct_roots; - if (_schema->rowid_col_idx() > 0) { _record_rowids = true; } @@ -580,7 +577,6 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { RETURN_IF_ERROR(init_iterators()); RETURN_IF_ERROR(_construct_compound_expr_context()); - _enable_common_expr_pushdown = !_common_expr_ctxs_push_down.empty(); VLOG_DEBUG << fmt::format( "Segment iterator init, virtual_column_exprs size: {}, " "_vir_cid_to_idx_in_block size: {}, common_expr_pushdown size: {}", @@ -597,7 +593,7 @@ void SegmentIterator::_initialize_predicate_results() { _column_predicate_index_exec_status[cid][pred] = false; } - _calculate_expr_in_remaining_conjunct_root(); + _calculate_common_expr_index_exec_status(); } Status SegmentIterator::init_iterators() { @@ -928,12 +924,6 @@ Status SegmentIterator::_get_row_ranges_by_column_conditions() { (*it)->root().get()); if (result != nullptr) { _row_bitmap &= *result->get_data_bitmap(); - auto root = (*it)->root(); - auto iter_find = std::find(_remaining_conjunct_roots.begin(), - _remaining_conjunct_roots.end(), root); - if (iter_find != _remaining_conjunct_roots.end()) { - _remaining_conjunct_roots.erase(iter_find); - } it = _common_expr_ctxs_push_down.erase(it); } } else { @@ -1404,8 +1394,6 @@ Status SegmentIterator::_apply_index_expr() { ++it; } } - // TODO:Do we need to remove these expr root from _remaining_conjunct_roots? - return Status::OK(); } @@ -2065,9 +2053,9 @@ Status SegmentIterator::_vec_init_lazy_materialization() { // Step2: extract columns that can execute expr context _is_common_expr_column.resize(_schema->columns().size(), false); - if (_enable_common_expr_pushdown && !_remaining_conjunct_roots.empty()) { - for (auto expr : _remaining_conjunct_roots) { - RETURN_IF_ERROR(_extract_common_expr_columns(expr)); + if (!_common_expr_ctxs_push_down.empty()) { + for (const auto& expr_ctx : _common_expr_ctxs_push_down) { + RETURN_IF_ERROR(_extract_common_expr_columns(expr_ctx->root())); } if (!_common_expr_columns.empty()) { _is_need_expr_eval = true; @@ -3238,7 +3226,7 @@ Status SegmentIterator::_process_common_expr(uint16_t* sel_rowid_idx, uint16_t& Status SegmentIterator::_execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, Block* block) { SCOPED_RAW_TIMER(&_opts.stats->expr_filter_ns); - DCHECK(!_remaining_conjunct_roots.empty()); + DCHECK(!_common_expr_ctxs_push_down.empty()); DCHECK(block->rows() != 0); int prev_columns = block->columns(); uint16_t original_size = selected_size; @@ -3435,7 +3423,7 @@ Status SegmentIterator::_construct_compound_expr_context() { return Status::OK(); } -void SegmentIterator::_calculate_expr_in_remaining_conjunct_root() { +void SegmentIterator::_calculate_common_expr_index_exec_status() { for (const auto& root_expr_ctx : _common_expr_ctxs_push_down) { const auto& root_expr = root_expr_ctx->root(); if (root_expr == nullptr) { diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h index 5105d050da1872..0afd6b782e1045 100644 --- a/be/src/storage/segment/segment_iterator.h +++ b/be/src/storage/segment/segment_iterator.h @@ -327,7 +327,7 @@ class SegmentIterator : public RowwiseIterator { bool _check_all_conditions_passed_inverted_index_for_column(ColumnId cid, bool default_return = false); - void _calculate_expr_in_remaining_conjunct_root(); + void _calculate_common_expr_index_exec_status(); Status _process_eof(Block* block); @@ -420,8 +420,6 @@ class SegmentIterator : public RowwiseIterator { // make a copy of `_opts.column_predicates` in order to make local changes std::vector> _col_predicates; VExprContextSPtrs _common_expr_ctxs_push_down; - bool _enable_common_expr_pushdown = false; - std::vector _remaining_conjunct_roots; std::set _not_apply_index_pred; // row schema of the key to seek diff --git a/be/src/storage/tablet/tablet_reader.cpp b/be/src/storage/tablet/tablet_reader.cpp index fb71c6f9541552..ffb98c469816a0 100644 --- a/be/src/storage/tablet/tablet_reader.cpp +++ b/be/src/storage/tablet/tablet_reader.cpp @@ -179,7 +179,6 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) { _reader_context.record_rowids = read_params.record_rowids; _reader_context.rowid_conversion = read_params.rowid_conversion; _reader_context.is_key_column_group = read_params.is_key_column_group; - _reader_context.remaining_conjunct_roots = read_params.remaining_conjunct_roots; _reader_context.common_expr_ctxs_push_down = read_params.common_expr_ctxs_push_down; _reader_context.output_columns = &read_params.output_columns; _reader_context.push_down_agg_type_opt = read_params.push_down_agg_type_opt; diff --git a/be/src/storage/tablet/tablet_reader.h b/be/src/storage/tablet/tablet_reader.h index d12fc6ccd6d802..6bf57dbe441868 100644 --- a/be/src/storage/tablet/tablet_reader.h +++ b/be/src/storage/tablet/tablet_reader.h @@ -166,7 +166,6 @@ class TabletReader { std::vector* origin_return_columns = nullptr; std::unordered_set* tablet_columns_convert_to_null_set = nullptr; TPushAggOp::type push_down_agg_type_opt = TPushAggOp::NONE; - std::vector remaining_conjunct_roots; VExprContextSPtrs common_expr_ctxs_push_down; // used for compaction to record row ids