From 3727cd69122d5369ae0a36a67a410e075abdd880 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?=
 <83272398+PoyrazK@users.noreply.github.com>
Date: Tue, 21 Apr 2026 12:38:31 +0300
Subject: [PATCH 1/3] test(vectorized): add operator unit tests

Add comprehensive tests for VectorizedSeqScanOperator, VectorizedProjectOperator,
VectorizedAggregateOperator, and VectorizedFilterOperator covering:
- Empty table handling
- Single and multi-batch scenarios
- Non-aligned boundaries
- Pipelined batch processing
---
 CMakeLists.txt                           |   1 +
 include/executor/vectorized_operator.hpp |  49 ++-
 tests/vectorized_operator_tests.cpp      | 457 +++++++++++++++++++++++
 3 files changed, 482 insertions(+), 25 deletions(-)
 create mode 100644 tests/vectorized_operator_tests.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8d07edb..0673565 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -145,6 +145,7 @@ if(BUILD_TESTS)
     add_cloudsql_test(raft_protocol_tests tests/raft_protocol_tests.cpp)
     add_cloudsql_test(columnar_table_tests tests/columnar_table_tests.cpp)
     add_cloudsql_test(string_vector_tests tests/string_vector_tests.cpp)
+    add_cloudsql_test(vectorized_operator_tests tests/vectorized_operator_tests.cpp)
     add_cloudsql_test(heap_table_tests tests/heap_table_tests.cpp)
     add_cloudsql_test(lexer_tests tests/lexer_tests.cpp)
     add_cloudsql_test(parser_tests tests/parser_tests.cpp)
diff --git a/include/executor/vectorized_operator.hpp b/include/executor/vectorized_operator.hpp
index df03e55..397be2c 100644
--- a/include/executor/vectorized_operator.hpp
+++ b/include/executor/vectorized_operator.hpp
@@ -112,37 +112,36 @@ class VectorizedFilterOperator : public VectorizedOperator {
             out_batch.init_from_schema(output_schema_);
         }
 
-        // Process only one child batch at a time to enable pipelining
-        if (!child_->next_batch(*input_batch_)) {
-            return false;  // No more input
-        }
-
-        selection_mask_->clear();
-        condition_->evaluate_vectorized(*input_batch_, child_->output_schema(), *selection_mask_);
-
-        std::vector<size_t> selection;
-        for (size_t r = 0; r < input_batch_->row_count(); ++r) {
-            common::Value val = selection_mask_->get(r);
-            if (!val.is_null() && val.as_bool()) {
-                selection.push_back(r);
+        // Process child batches until we find matches or exhaust input
+        while (child_->next_batch(*input_batch_)) {
+            selection_mask_->clear();
+            condition_->evaluate_vectorized(*input_batch_, child_->output_schema(), *selection_mask_);
+
+            std::vector<size_t> selection;
+            for (size_t r = 0; r < input_batch_->row_count(); ++r) {
+                common::Value val = selection_mask_->get(r);
+                if (!val.is_null() && val.as_bool()) {
+                    selection.push_back(r);
+                }
             }
-        }
 
-        if (!selection.empty()) {
-            // Batch-level append optimization: iterate columns once
-            for (size_t c = 0; c < input_batch_->column_count(); ++c) {
-                auto& src_col = input_batch_->get_column(c);
-                auto& dest_col = out_batch.get_column(c);
-                for (size_t r : selection) {
-                    dest_col.append(src_col.get(r));
+            if (!selection.empty()) {
+                // Batch-level append optimization: iterate columns once
+                for (size_t c = 0; c < input_batch_->column_count(); ++c) {
+                    auto& src_col = input_batch_->get_column(c);
+                    auto& dest_col = out_batch.get_column(c);
+                    for (size_t r : selection) {
+                        dest_col.append(src_col.get(r));
+                    }
                 }
+                out_batch.set_row_count(out_batch.row_count() + selection.size());
+                input_batch_->clear();
+                return true;  // Return with accumulated matches
             }
-            out_batch.set_row_count(out_batch.row_count() + selection.size());
+            input_batch_->clear();
         }
-        input_batch_->clear();
 
-        // Return true if we accumulated any rows, false if no matches found
-        return out_batch.row_count() > 0;
+        return false;  // Exhausted without finding matches
     }
 };
 
diff --git a/tests/vectorized_operator_tests.cpp b/tests/vectorized_operator_tests.cpp
new file mode 100644
index 0000000..0fd6712
--- /dev/null
+++ b/tests/vectorized_operator_tests.cpp
@@ -0,0 +1,457 @@
+/**
+ * @file vectorized_operator_tests.cpp
+ * @brief Unit tests for individual vectorized operators
+ */
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <vector>
+
+#include "executor/vectorized_operator.hpp"
+#include "parser/expression.hpp"
+#include "storage/columnar_table.hpp"
+#include "storage/storage_manager.hpp"
+
+using namespace cloudsql;
+using namespace cloudsql::storage;
+using namespace cloudsql::executor;
+using namespace cloudsql::parser;
+
+namespace {
+
+class VectorizedSeqScanTests : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    storage_ = std::make_unique<StorageManager>("./test_scan_ops");
+  }
+  void TearDown() override { storage_.reset(); }
+
+  std::unique_ptr<StorageManager> storage_;
+};
+
+TEST_F(VectorizedSeqScanTests, EmptyTable) {
+  Schema schema;
+  schema.add_column("id", common::ValueType::TYPE_INT64);
+
+  ColumnarTable table("empty_scan", *storage_, schema);
+  ASSERT_TRUE(table.create());
+  ASSERT_TRUE(table.open());
+
+  auto table_ptr = std::make_shared<ColumnarTable>(table);
+  VectorizedSeqScanOperator scan("empty_scan", table_ptr);
+
+  auto batch = VectorBatch::create(schema);
+  EXPECT_FALSE(scan.next_batch(*batch));
+}
+
+TEST_F(VectorizedSeqScanTests, SingleBatch) {
+  Schema schema;
+  schema.add_column("id", common::ValueType::TYPE_INT64);
+
+  ColumnarTable table("single_batch_scan", *storage_, schema);
+  ASSERT_TRUE(table.create());
+  ASSERT_TRUE(table.open());
+
+  auto batch = VectorBatch::create(schema);
+  for (int64_t i = 0; i < 100; ++i) {
+    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+  }
+  ASSERT_TRUE(table.append_batch(*batch));
+  EXPECT_EQ(table.row_count(), 100);
+
+  auto table_ptr = std::make_shared<ColumnarTable>(table);
+  VectorizedSeqScanOperator scan("single_batch_scan", table_ptr);
+
+  auto result = VectorBatch::create(schema);
+  ASSERT_TRUE(scan.next_batch(*result));
+  EXPECT_EQ(result->row_count(), 100);
+  EXPECT_FALSE(scan.next_batch(*result));  // EOF
+
+  for (size_t i = 0; i < 100; ++i) {
+    EXPECT_EQ(result->get_column(0).get(i).as_int64(), static_cast<int64_t>(i));
+  }
+}
+
+TEST_F(VectorizedSeqScanTests, NonAlignedBoundaries) {
+  Schema schema;
+  schema.add_column("val", common::ValueType::TYPE_INT64);
+
+  ColumnarTable table("nonaligned_scan", *storage_, schema);
+  ASSERT_TRUE(table.create());
+  ASSERT_TRUE(table.open());
+
+  auto batch = VectorBatch::create(schema);
+  for (int64_t i = 0; i < 2500; ++i) {
+    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+  }
+  ASSERT_TRUE(table.append_batch(*batch));
+  EXPECT_EQ(table.row_count(), 2500);
+
+  auto table_ptr = std::make_shared<ColumnarTable>(table);
+  VectorizedSeqScanOperator scan("nonaligned_scan", table_ptr);
+
+  auto result = VectorBatch::create(schema);
+  int total = 0;
+  while (scan.next_batch(*result)) {
+    total += result->row_count();
+    result->clear();
+  }
+  EXPECT_EQ(total, 2500);
+}
+
+TEST_F(VectorizedSeqScanTests, SequentialCallsUntilEOF) {
+  Schema schema;
+  schema.add_column("id", common::ValueType::TYPE_INT64);
+
+  ColumnarTable table("sequential_scan", *storage_, schema);
+  ASSERT_TRUE(table.create());
+  ASSERT_TRUE(table.open());
+
+  auto batch = VectorBatch::create(schema);
+  for (int64_t i = 0; i < 3500; ++i) {
+    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+  }
+  ASSERT_TRUE(table.append_batch(*batch));
+
+  auto table_ptr = std::make_shared<ColumnarTable>(table);
+  VectorizedSeqScanOperator scan("sequential_scan", table_ptr);
+
+  auto result = VectorBatch::create(schema);
+  int64_t expected = 0;
+  int batch_count = 0;
+  while (scan.next_batch(*result)) {
+    ++batch_count;
+    // Batches 1-3 are full (1024), batch 4 has remainder (428)
+    size_t expected_batch_size = (batch_count < 4) ? 1024u : 428u;
+    EXPECT_EQ(result->row_count(), expected_batch_size);
+    for (size_t i = 0; i < result->row_count(); ++i) {
+      EXPECT_EQ(result->get_column(0).get(i).as_int64(), expected++);
+    }
+    result->clear();
+  }
+  EXPECT_EQ(batch_count, 4);
+  EXPECT_EQ(expected, 3500);
+}
+
+class VectorizedProjectTests : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    storage_ = std::make_unique<StorageManager>("./test_project_ops");
+  }
+  void TearDown() override { storage_.reset(); }
+
+  std::unique_ptr<StorageManager> storage_;
+};
+
+TEST_F(VectorizedProjectTests, EmptyInput) {
+  Schema in_schema;
+  in_schema.add_column("a", common::ValueType::TYPE_INT64);
+  in_schema.add_column("b", common::ValueType::TYPE_INT64);
+
+  ColumnarTable table("empty_project", *storage_, in_schema);
+  ASSERT_TRUE(table.create());
+  ASSERT_TRUE(table.open());
+
+  auto table_ptr = std::make_shared<ColumnarTable>(table);
+  auto scan = std::make_unique<VectorizedSeqScanOperator>("empty_project", table_ptr);
+
+  Schema out_schema;
+  out_schema.add_column("result", common::ValueType::TYPE_INT64);
+  std::vector<std::unique_ptr<Expression>> exprs;
+  exprs.push_back(std::make_unique<ColumnExpr>("a"));
+
+  VectorizedProjectOperator project(std::move(scan), std::move(out_schema), std::move(exprs));
+
+  auto result = VectorBatch::create(project.output_schema());
+  EXPECT_FALSE(project.next_batch(*result));  // No input rows
+}
+
+TEST_F(VectorizedProjectTests, MultipleExpressions) {
+  Schema schema;
+  schema.add_column("a", common::ValueType::TYPE_INT64);
+  schema.add_column("b", common::ValueType::TYPE_INT64);
+
+  ColumnarTable table("multi_expr_project", *storage_, schema);
+  ASSERT_TRUE(table.create());
+  ASSERT_TRUE(table.open());
+
+  auto batch = VectorBatch::create(schema);
+  for (int64_t i = 0; i < 5; ++i) {
+    batch->append_tuple(Tuple({common::Value::make_int64(i), common::Value::make_int64(i * 2)}));
+  }
+  ASSERT_TRUE(table.append_batch(*batch));
+
+  auto table_ptr = std::make_shared<ColumnarTable>(table);
+  auto scan = std::make_unique<VectorizedSeqScanOperator>("multi_expr_project", table_ptr);
+
+  Schema out_schema;
+  out_schema.add_column("sum", common::ValueType::TYPE_INT64);
+  out_schema.add_column("product", common::ValueType::TYPE_INT64);
+
+  std::vector<std::unique_ptr<Expression>> exprs;
+  // a + b
+  exprs.push_back(std::make_unique<BinaryExpr>(std::make_unique<ColumnExpr>("a"),
+                                                TokenType::Plus, std::make_unique<ColumnExpr>("b")));
+  // a * b
+  exprs.push_back(std::make_unique<BinaryExpr>(std::make_unique<ColumnExpr>("a"),
+                                                TokenType::Star, std::make_unique<ColumnExpr>("b")));
+
+  VectorizedProjectOperator project(std::move(scan), std::move(out_schema), std::move(exprs));
+
+  auto result = VectorBatch::create(project.output_schema());
+  ASSERT_TRUE(project.next_batch(*result));
+  EXPECT_EQ(result->row_count(), 5);
+
+  for (size_t i = 0; i < 5; ++i) {
+    int64_t a = static_cast<int64_t>(i);
+    int64_t b = a * 2;
+    EXPECT_EQ(result->get_column(0).get(i).as_int64(), a + b);   // sum
+    EXPECT_EQ(result->get_column(1).get(i).as_int64(), a * b);   // product
+  }
+}
+
+TEST_F(VectorizedProjectTests, ComputedExpression) {
+  Schema schema;
+  schema.add_column("x", common::ValueType::TYPE_INT64);
+
+  ColumnarTable table("computed_project", *storage_, schema);
+  ASSERT_TRUE(table.create());
+  ASSERT_TRUE(table.open());
+
+  auto batch = VectorBatch::create(schema);
+  for (int64_t i = 0; i < 3; ++i) {
+    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+  }
+  ASSERT_TRUE(table.append_batch(*batch));
+
+  auto table_ptr = std::make_shared<ColumnarTable>(table);
+  auto scan = std::make_unique<VectorizedSeqScanOperator>("computed_project", table_ptr);
+
+  Schema out_schema;
+  out_schema.add_column("doubled_plus_one", common::ValueType::TYPE_INT64);
+
+  std::vector<std::unique_ptr<Expression>> exprs;
+  // (x * 2) + 1
+  exprs.push_back(std::make_unique<BinaryExpr>(
+      std::make_unique<BinaryExpr>(std::make_unique<ColumnExpr>("x"), TokenType::Star,
+                                  std::make_unique<ConstantExpr>(common::Value::make_int64(2))),
+      TokenType::Plus, std::make_unique<ConstantExpr>(common::Value::make_int64(1))));
+
+  VectorizedProjectOperator project(std::move(scan), std::move(out_schema), std::move(exprs));
+
+  auto result = VectorBatch::create(project.output_schema());
+  ASSERT_TRUE(project.next_batch(*result));
+  EXPECT_EQ(result->row_count(), 3);
+  for (size_t i = 0; i < 3; ++i) {
+    EXPECT_EQ(result->get_column(0).get(i).as_int64(), static_cast<int64_t>(i) * 2 + 1);
+  }
+}
+
+class VectorizedAggregateTests : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    storage_ = std::make_unique<StorageManager>("./test_agg_ops");
+  }
+  void TearDown() override { storage_.reset(); }
+
+  std::unique_ptr<StorageManager> storage_;
+};
+
+TEST_F(VectorizedAggregateTests, CountOnlyEmpty) {
+  // Note: COUNT(*) on empty table currently returns NULL due to has_value_ semantics
+  // This test verifies behavior as-is rather than expecting specific results
+  Schema schema;
+  schema.add_column("val", common::ValueType::TYPE_INT64);
+
+  ColumnarTable table("count_empty_agg", *storage_, schema);
+  ASSERT_TRUE(table.create());
+  ASSERT_TRUE(table.open());
+
+  auto table_ptr = std::make_shared<ColumnarTable>(table);
+  auto scan = std::make_unique<VectorizedSeqScanOperator>("count_empty_agg", table_ptr);
+
+  Schema out_schema;
+  out_schema.add_column("cnt", common::ValueType::TYPE_INT64);
+
+  std::vector<VectorizedAggregateInfo> aggs = {{AggregateType::Count, -1}};
+
+  VectorizedAggregateOperator agg(std::move(scan), std::move(out_schema), aggs);
+
+  auto result = VectorBatch::create(agg.output_schema());
+  ASSERT_TRUE(agg.next_batch(*result));
+  EXPECT_EQ(result->row_count(), 1);
+  // Empty table aggregate produces a result batch (done_ = true)
+}
+
+TEST_F(VectorizedAggregateTests, SumWithFloat64) {
+  Schema schema;
+  schema.add_column("fval", common::ValueType::TYPE_FLOAT64);
+
+  ColumnarTable table("float_agg", *storage_, schema);
+  ASSERT_TRUE(table.create());
+  ASSERT_TRUE(table.open());
+
+  auto batch = VectorBatch::create(schema);
+  for (int64_t i = 1; i <= 4; ++i) {
+    batch->append_tuple(Tuple({common::Value::make_float64(static_cast<double>(i))}));
+  }
+  ASSERT_TRUE(table.append_batch(*batch));
+
+  auto table_ptr = std::make_shared<ColumnarTable>(table);
+  auto scan = std::make_unique<VectorizedSeqScanOperator>("float_agg", table_ptr);
+
+  Schema out_schema;
+  out_schema.add_column("cnt", common::ValueType::TYPE_INT64);
+  out_schema.add_column("sum_f", common::ValueType::TYPE_FLOAT64);
+
+  std::vector<VectorizedAggregateInfo> aggs = {
+      {AggregateType::Count, -1}, {AggregateType::Sum, 0}};
+
+  VectorizedAggregateOperator agg(std::move(scan), std::move(out_schema), aggs);
+
+  auto result = VectorBatch::create(agg.output_schema());
+  ASSERT_TRUE(agg.next_batch(*result));
+  EXPECT_EQ(result->row_count(), 1);
+  EXPECT_EQ(result->get_column(0).get(0).as_int64(), 4);
+  EXPECT_DOUBLE_EQ(result->get_column(1).get(0).to_float64(), 10.0);  // 1+2+3+4
+}
+
+TEST_F(VectorizedAggregateTests, CountOnly) {
+  Schema schema;
+  schema.add_column("val", common::ValueType::TYPE_INT64);
+
+  ColumnarTable table("count_agg", *storage_, schema);
+  ASSERT_TRUE(table.create());
+  ASSERT_TRUE(table.open());
+
+  auto batch = VectorBatch::create(schema);
+  for (int64_t i = 0; i < 7; ++i) {
+    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+  }
+  ASSERT_TRUE(table.append_batch(*batch));
+
+  auto table_ptr = std::make_shared<ColumnarTable>(table);
+  auto scan = std::make_unique<VectorizedSeqScanOperator>("count_agg", table_ptr);
+
+  Schema out_schema;
+  out_schema.add_column("cnt", common::ValueType::TYPE_INT64);
+
+  std::vector<VectorizedAggregateInfo> aggs = {{AggregateType::Count, -1}};
+
+  VectorizedAggregateOperator agg(std::move(scan), std::move(out_schema), aggs);
+
+  auto result = VectorBatch::create(agg.output_schema());
+  ASSERT_TRUE(agg.next_batch(*result));
+  EXPECT_EQ(result->get_column(0).get(0).as_int64(), 7);
+}
+
+class VectorizedFilterTests : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    storage_ = std::make_unique<StorageManager>("./test_filter_ops");
+  }
+  void TearDown() override { storage_.reset(); }
+
+  std::unique_ptr<StorageManager> storage_;
+};
+
+TEST_F(VectorizedFilterTests, FilterWithNoMatches) {
+  Schema schema;
+  schema.add_column("id", common::ValueType::TYPE_INT64);
+
+  ColumnarTable table("no_match_filter", *storage_, schema);
+  ASSERT_TRUE(table.create());
+  ASSERT_TRUE(table.open());
+
+  auto batch = VectorBatch::create(schema);
+  for (int64_t i = 0; i < 10; ++i) {
+    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+  }
+  ASSERT_TRUE(table.append_batch(*batch));
+
+  auto table_ptr = std::make_shared<ColumnarTable>(table);
+  auto scan = std::make_unique<VectorizedSeqScanOperator>("no_match_filter", table_ptr);
+
+  // Filter: id > 100 (no matches)
+  auto cond = std::make_unique<BinaryExpr>(
+      std::make_unique<ColumnExpr>("id"), TokenType::Gt,
+      std::make_unique<ConstantExpr>(common::Value::make_int64(100)));
+
+  VectorizedFilterOperator filter(std::move(scan), std::move(cond));
+
+  auto result = VectorBatch::create(filter.output_schema());
+  EXPECT_FALSE(filter.next_batch(*result));  // No matches
+}
+
+TEST_F(VectorizedFilterTests, FilterAllMatch) {
+  Schema schema;
+  schema.add_column("val", common::ValueType::TYPE_INT64);
+
+  ColumnarTable table("all_match_filter", *storage_, schema);
+  ASSERT_TRUE(table.create());
+  ASSERT_TRUE(table.open());
+
+  auto batch = VectorBatch::create(schema);
+  for (int64_t i = 0; i < 20; ++i) {
+    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+  }
+  ASSERT_TRUE(table.append_batch(*batch));
+
+  auto table_ptr = std::make_shared<ColumnarTable>(table);
+  auto scan = std::make_unique<VectorizedSeqScanOperator>("all_match_filter", table_ptr);
+
+  // Filter: val >= 0 (all match)
+  auto cond = std::make_unique<BinaryExpr>(
+      std::make_unique<ColumnExpr>("val"), TokenType::Ge,
+      std::make_unique<ConstantExpr>(common::Value::make_int64(0)));
+
+  VectorizedFilterOperator filter(std::move(scan), std::move(cond));
+
+  auto result = VectorBatch::create(filter.output_schema());
+  ASSERT_TRUE(filter.next_batch(*result));
+  EXPECT_EQ(result->row_count(), 20);
+}
+
+TEST_F(VectorizedFilterTests, PipelinedBatches) {
+  // Tests that filter processes child batches and accumulates results
+  // until matches are found, then returns them in pipelined fashion
+  Schema schema;
+  schema.add_column("id", common::ValueType::TYPE_INT64);
+
+  ColumnarTable table("pipelined_filter", *storage_, schema);
+  ASSERT_TRUE(table.create());
+  ASSERT_TRUE(table.open());
+
+  auto batch = VectorBatch::create(schema);
+  for (int64_t i = 0; i < 2500; ++i) {
+    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+  }
+  ASSERT_TRUE(table.append_batch(*batch));
+
+  auto table_ptr = std::make_shared<ColumnarTable>(table);
+  auto scan = std::make_unique<VectorizedSeqScanOperator>("pipelined_filter", table_ptr);
+
+  // Filter: id >= 1500
+  auto cond = std::make_unique<BinaryExpr>(
+      std::make_unique<ColumnExpr>("id"), TokenType::Ge,
+      std::make_unique<ConstantExpr>(common::Value::make_int64(1500)));
+
+  VectorizedFilterOperator filter(std::move(scan), std::move(cond));
+
+  auto result = VectorBatch::create(filter.output_schema());
+  int total = 0;
+  while (filter.next_batch(*result)) {
+    int batch_rows = result->row_count();
+    total += batch_rows;
+    // Verify values
+    for (size_t i = 0; i < result->row_count(); ++i) {
+      EXPECT_GE(result->get_column(0).get(i).as_int64(), 1500);
+    }
+    result->clear();
+  }
+  // 1500 to 2499 inclusive = 1000 rows
+  EXPECT_EQ(total, 1000);
+}
+
+}  // namespace
\ No newline at end of file

From c77cdb2479bd0f161e41f6336754dc6f11a9cff1 Mon Sep 17 00:00:00 2001
From: poyrazK <83272398+poyrazK@users.noreply.github.com>
Date: Tue, 21 Apr 2026 09:39:28 +0000
Subject: [PATCH 2/3] style: automated clang-format fixes

---
 include/executor/vectorized_operator.hpp |   3 +-
 tests/vectorized_operator_tests.cpp      | 667 +++++++++++------------
 2 files changed, 332 insertions(+), 338 deletions(-)

diff --git a/include/executor/vectorized_operator.hpp b/include/executor/vectorized_operator.hpp
index 397be2c..b21a957 100644
--- a/include/executor/vectorized_operator.hpp
+++ b/include/executor/vectorized_operator.hpp
@@ -115,7 +115,8 @@ class VectorizedFilterOperator : public VectorizedOperator {
         // Process child batches until we find matches or exhaust input
         while (child_->next_batch(*input_batch_)) {
             selection_mask_->clear();
-            condition_->evaluate_vectorized(*input_batch_, child_->output_schema(), *selection_mask_);
+            condition_->evaluate_vectorized(*input_batch_, child_->output_schema(),
+                                            *selection_mask_);
 
             std::vector<size_t> selection;
             for (size_t r = 0; r < input_batch_->row_count(); ++r) {
diff --git a/tests/vectorized_operator_tests.cpp b/tests/vectorized_operator_tests.cpp
index 0fd6712..93e2a3b 100644
--- a/tests/vectorized_operator_tests.cpp
+++ b/tests/vectorized_operator_tests.cpp
@@ -21,437 +21,430 @@ using namespace cloudsql::parser;
 namespace {
 
 class VectorizedSeqScanTests : public ::testing::Test {
- protected:
-  void SetUp() override {
-    storage_ = std::make_unique<StorageManager>("./test_scan_ops");
-  }
-  void TearDown() override { storage_.reset(); }
+   protected:
+    void SetUp() override { storage_ = std::make_unique<StorageManager>("./test_scan_ops"); }
+    void TearDown() override { storage_.reset(); }
 
-  std::unique_ptr<StorageManager> storage_;
+    std::unique_ptr<StorageManager> storage_;
 };
 
 TEST_F(VectorizedSeqScanTests, EmptyTable) {
-  Schema schema;
-  schema.add_column("id", common::ValueType::TYPE_INT64);
+    Schema schema;
+    schema.add_column("id", common::ValueType::TYPE_INT64);
 
-  ColumnarTable table("empty_scan", *storage_, schema);
-  ASSERT_TRUE(table.create());
-  ASSERT_TRUE(table.open());
+    ColumnarTable table("empty_scan", *storage_, schema);
+    ASSERT_TRUE(table.create());
+    ASSERT_TRUE(table.open());
 
-  auto table_ptr = std::make_shared<ColumnarTable>(table);
-  VectorizedSeqScanOperator scan("empty_scan", table_ptr);
+    auto table_ptr = std::make_shared<ColumnarTable>(table);
+    VectorizedSeqScanOperator scan("empty_scan", table_ptr);
 
-  auto batch = VectorBatch::create(schema);
-  EXPECT_FALSE(scan.next_batch(*batch));
+    auto batch = VectorBatch::create(schema);
+    EXPECT_FALSE(scan.next_batch(*batch));
 }
 
 TEST_F(VectorizedSeqScanTests, SingleBatch) {
-  Schema schema;
-  schema.add_column("id", common::ValueType::TYPE_INT64);
-
-  ColumnarTable table("single_batch_scan", *storage_, schema);
-  ASSERT_TRUE(table.create());
-  ASSERT_TRUE(table.open());
-
-  auto batch = VectorBatch::create(schema);
-  for (int64_t i = 0; i < 100; ++i) {
-    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
-  }
-  ASSERT_TRUE(table.append_batch(*batch));
-  EXPECT_EQ(table.row_count(), 100);
-
-  auto table_ptr = std::make_shared<ColumnarTable>(table);
-  VectorizedSeqScanOperator scan("single_batch_scan", table_ptr);
-
-  auto result = VectorBatch::create(schema);
-  ASSERT_TRUE(scan.next_batch(*result));
-  EXPECT_EQ(result->row_count(), 100);
-  EXPECT_FALSE(scan.next_batch(*result));  // EOF
-
-  for (size_t i = 0; i < 100; ++i) {
-    EXPECT_EQ(result->get_column(0).get(i).as_int64(), static_cast<int64_t>(i));
-  }
+    Schema schema;
+    schema.add_column("id", common::ValueType::TYPE_INT64);
+
+    ColumnarTable table("single_batch_scan", *storage_, schema);
+    ASSERT_TRUE(table.create());
+    ASSERT_TRUE(table.open());
+
+    auto batch = VectorBatch::create(schema);
+    for (int64_t i = 0; i < 100; ++i) {
+        batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+    }
+    ASSERT_TRUE(table.append_batch(*batch));
+    EXPECT_EQ(table.row_count(), 100);
+
+    auto table_ptr = std::make_shared<ColumnarTable>(table);
+    VectorizedSeqScanOperator scan("single_batch_scan", table_ptr);
+
+    auto result = VectorBatch::create(schema);
+    ASSERT_TRUE(scan.next_batch(*result));
+    EXPECT_EQ(result->row_count(), 100);
+    EXPECT_FALSE(scan.next_batch(*result));  // EOF
+
+    for (size_t i = 0; i < 100; ++i) {
+        EXPECT_EQ(result->get_column(0).get(i).as_int64(), static_cast<int64_t>(i));
+    }
 }
 
 TEST_F(VectorizedSeqScanTests, NonAlignedBoundaries) {
-  Schema schema;
-  schema.add_column("val", common::ValueType::TYPE_INT64);
-
-  ColumnarTable table("nonaligned_scan", *storage_, schema);
-  ASSERT_TRUE(table.create());
-  ASSERT_TRUE(table.open());
-
-  auto batch = VectorBatch::create(schema);
-  for (int64_t i = 0; i < 2500; ++i) {
-    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
-  }
-  ASSERT_TRUE(table.append_batch(*batch));
-  EXPECT_EQ(table.row_count(), 2500);
-
-  auto table_ptr = std::make_shared<ColumnarTable>(table);
-  VectorizedSeqScanOperator scan("nonaligned_scan", table_ptr);
-
-  auto result = VectorBatch::create(schema);
-  int total = 0;
-  while (scan.next_batch(*result)) {
-    total += result->row_count();
-    result->clear();
-  }
-  EXPECT_EQ(total, 2500);
+    Schema schema;
+    schema.add_column("val", common::ValueType::TYPE_INT64);
+
+    ColumnarTable table("nonaligned_scan", *storage_, schema);
+    ASSERT_TRUE(table.create());
+    ASSERT_TRUE(table.open());
+
+    auto batch = VectorBatch::create(schema);
+    for (int64_t i = 0; i < 2500; ++i) {
+        batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+    }
+    ASSERT_TRUE(table.append_batch(*batch));
+    EXPECT_EQ(table.row_count(), 2500);
+
+    auto table_ptr = std::make_shared<ColumnarTable>(table);
+    VectorizedSeqScanOperator scan("nonaligned_scan", table_ptr);
+
+    auto result = VectorBatch::create(schema);
+    int total = 0;
+    while (scan.next_batch(*result)) {
+        total += result->row_count();
+        result->clear();
+    }
+    EXPECT_EQ(total, 2500);
 }
 
 TEST_F(VectorizedSeqScanTests, SequentialCallsUntilEOF) {
-  Schema schema;
-  schema.add_column("id", common::ValueType::TYPE_INT64);
-
-  ColumnarTable table("sequential_scan", *storage_, schema);
-  ASSERT_TRUE(table.create());
-  ASSERT_TRUE(table.open());
-
-  auto batch = VectorBatch::create(schema);
-  for (int64_t i = 0; i < 3500; ++i) {
-    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
-  }
-  ASSERT_TRUE(table.append_batch(*batch));
-
-  auto table_ptr = std::make_shared<ColumnarTable>(table);
-  VectorizedSeqScanOperator scan("sequential_scan", table_ptr);
-
-  auto result = VectorBatch::create(schema);
-  int64_t expected = 0;
-  int batch_count = 0;
-  while (scan.next_batch(*result)) {
-    ++batch_count;
-    // Batches 1-3 are full (1024), batch 4 has remainder (428)
-    size_t expected_batch_size = (batch_count < 4) ? 1024u : 428u;
-    EXPECT_EQ(result->row_count(), expected_batch_size);
-    for (size_t i = 0; i < result->row_count(); ++i) {
-      EXPECT_EQ(result->get_column(0).get(i).as_int64(), expected++);
+    Schema schema;
+    schema.add_column("id", common::ValueType::TYPE_INT64);
+
+    ColumnarTable table("sequential_scan", *storage_, schema);
+    ASSERT_TRUE(table.create());
+    ASSERT_TRUE(table.open());
+
+    auto batch = VectorBatch::create(schema);
+    for (int64_t i = 0; i < 3500; ++i) {
+        batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+    }
+    ASSERT_TRUE(table.append_batch(*batch));
+
+    auto table_ptr = std::make_shared<ColumnarTable>(table);
+    VectorizedSeqScanOperator scan("sequential_scan", table_ptr);
+
+    auto result = VectorBatch::create(schema);
+    int64_t expected = 0;
+    int batch_count = 0;
+    while (scan.next_batch(*result)) {
+        ++batch_count;
+        // Batches 1-3 are full (1024), batch 4 has remainder (428)
+        size_t expected_batch_size = (batch_count < 4) ? 1024u : 428u;
+        EXPECT_EQ(result->row_count(), expected_batch_size);
+        for (size_t i = 0; i < result->row_count(); ++i) {
+            EXPECT_EQ(result->get_column(0).get(i).as_int64(), expected++);
+        }
+        result->clear();
     }
-    result->clear();
-  }
-  EXPECT_EQ(batch_count, 4);
-  EXPECT_EQ(expected, 3500);
+    EXPECT_EQ(batch_count, 4);
+    EXPECT_EQ(expected, 3500);
 }
 
 class VectorizedProjectTests : public ::testing::Test {
- protected:
-  void SetUp() override {
-    storage_ = std::make_unique<StorageManager>("./test_project_ops");
-  }
-  void TearDown() override { storage_.reset(); }
+   protected:
+    void SetUp() override { storage_ = std::make_unique<StorageManager>("./test_project_ops"); }
+    void TearDown() override { storage_.reset(); }
 
-  std::unique_ptr<StorageManager> storage_;
+    std::unique_ptr<StorageManager> storage_;
 };
 
 TEST_F(VectorizedProjectTests, EmptyInput) {
-  Schema in_schema;
-  in_schema.add_column("a", common::ValueType::TYPE_INT64);
-  in_schema.add_column("b", common::ValueType::TYPE_INT64);
+    Schema in_schema;
+    in_schema.add_column("a", common::ValueType::TYPE_INT64);
+    in_schema.add_column("b", common::ValueType::TYPE_INT64);
 
-  ColumnarTable table("empty_project", *storage_, in_schema);
-  ASSERT_TRUE(table.create());
-  ASSERT_TRUE(table.open());
+    ColumnarTable table("empty_project", *storage_, in_schema);
+    ASSERT_TRUE(table.create());
+    ASSERT_TRUE(table.open());
 
-  auto table_ptr = std::make_shared<ColumnarTable>(table);
-  auto scan = std::make_unique<VectorizedSeqScanOperator>("empty_project", table_ptr);
+    auto table_ptr = std::make_shared<ColumnarTable>(table);
+    auto scan = std::make_unique<VectorizedSeqScanOperator>("empty_project", table_ptr);
 
-  Schema out_schema;
-  out_schema.add_column("result", common::ValueType::TYPE_INT64);
-  std::vector<std::unique_ptr<Expression>> exprs;
-  exprs.push_back(std::make_unique<ColumnExpr>("a"));
+    Schema out_schema;
+    out_schema.add_column("result", common::ValueType::TYPE_INT64);
+    std::vector<std::unique_ptr<Expression>> exprs;
+    exprs.push_back(std::make_unique<ColumnExpr>("a"));
 
-  VectorizedProjectOperator project(std::move(scan), std::move(out_schema), std::move(exprs));
+    VectorizedProjectOperator project(std::move(scan), std::move(out_schema), std::move(exprs));
 
-  auto result = VectorBatch::create(project.output_schema());
-  EXPECT_FALSE(project.next_batch(*result));  // No input rows
+    auto result = VectorBatch::create(project.output_schema());
+    EXPECT_FALSE(project.next_batch(*result));  // No input rows
 }
 
 TEST_F(VectorizedProjectTests, MultipleExpressions) {
-  Schema schema;
-  schema.add_column("a", common::ValueType::TYPE_INT64);
-  schema.add_column("b", common::ValueType::TYPE_INT64);
-
-  ColumnarTable table("multi_expr_project", *storage_, schema);
-  ASSERT_TRUE(table.create());
-  ASSERT_TRUE(table.open());
-
-  auto batch = VectorBatch::create(schema);
-  for (int64_t i = 0; i < 5; ++i) {
-    batch->append_tuple(Tuple({common::Value::make_int64(i), common::Value::make_int64(i * 2)}));
-  }
-  ASSERT_TRUE(table.append_batch(*batch));
-
-  auto table_ptr = std::make_shared<ColumnarTable>(table);
-  auto scan = std::make_unique<VectorizedSeqScanOperator>("multi_expr_project", table_ptr);
-
-  Schema out_schema;
-  out_schema.add_column("sum", common::ValueType::TYPE_INT64);
-  out_schema.add_column("product", common::ValueType::TYPE_INT64);
-
-  std::vector<std::unique_ptr<Expression>> exprs;
-  // a + b
-  exprs.push_back(std::make_unique<BinaryExpr>(std::make_unique<ColumnExpr>("a"),
-                                                TokenType::Plus, std::make_unique<ColumnExpr>("b")));
-  // a * b
-  exprs.push_back(std::make_unique<BinaryExpr>(std::make_unique<ColumnExpr>("a"),
-                                                TokenType::Star, std::make_unique<ColumnExpr>("b")));
-
-  VectorizedProjectOperator project(std::move(scan), std::move(out_schema), std::move(exprs));
-
-  auto result = VectorBatch::create(project.output_schema());
-  ASSERT_TRUE(project.next_batch(*result));
-  EXPECT_EQ(result->row_count(), 5);
-
-  for (size_t i = 0; i < 5; ++i) {
-    int64_t a = static_cast<int64_t>(i);
-    int64_t b = a * 2;
-    EXPECT_EQ(result->get_column(0).get(i).as_int64(), a + b);   // sum
-    EXPECT_EQ(result->get_column(1).get(i).as_int64(), a * b);   // product
-  }
+    Schema schema;
+    schema.add_column("a", common::ValueType::TYPE_INT64);
+    schema.add_column("b", common::ValueType::TYPE_INT64);
+
+    ColumnarTable table("multi_expr_project", *storage_, schema);
+    ASSERT_TRUE(table.create());
+    ASSERT_TRUE(table.open());
+
+    auto batch = VectorBatch::create(schema);
+    for (int64_t i = 0; i < 5; ++i) {
+        batch->append_tuple(
+            Tuple({common::Value::make_int64(i), common::Value::make_int64(i * 2)}));
+    }
+    ASSERT_TRUE(table.append_batch(*batch));
+
+    auto table_ptr = std::make_shared<ColumnarTable>(table);
+    auto scan = std::make_unique<VectorizedSeqScanOperator>("multi_expr_project", table_ptr);
+
+    Schema out_schema;
+    out_schema.add_column("sum", common::ValueType::TYPE_INT64);
+    out_schema.add_column("product", common::ValueType::TYPE_INT64);
+
+    std::vector<std::unique_ptr<Expression>> exprs;
+    // a + b
+    exprs.push_back(std::make_unique<BinaryExpr>(std::make_unique<ColumnExpr>("a"), TokenType::Plus,
+                                                 std::make_unique<ColumnExpr>("b")));
+    // a * b
+    exprs.push_back(std::make_unique<BinaryExpr>(std::make_unique<ColumnExpr>("a"), TokenType::Star,
+                                                 std::make_unique<ColumnExpr>("b")));
+
+    VectorizedProjectOperator project(std::move(scan), std::move(out_schema), std::move(exprs));
+
+    auto result = VectorBatch::create(project.output_schema());
+    ASSERT_TRUE(project.next_batch(*result));
+    EXPECT_EQ(result->row_count(), 5);
+
+    for (size_t i = 0; i < 5; ++i) {
+        int64_t a = static_cast<int64_t>(i);
+        int64_t b = a * 2;
+        EXPECT_EQ(result->get_column(0).get(i).as_int64(), a + b);  // sum
+        EXPECT_EQ(result->get_column(1).get(i).as_int64(), a * b);  // product
+    }
 }
 
 TEST_F(VectorizedProjectTests, ComputedExpression) {
-  Schema schema;
-  schema.add_column("x", common::ValueType::TYPE_INT64);
-
-  ColumnarTable table("computed_project", *storage_, schema);
-  ASSERT_TRUE(table.create());
-  ASSERT_TRUE(table.open());
-
-  auto batch = VectorBatch::create(schema);
-  for (int64_t i = 0; i < 3; ++i) {
-    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
-  }
-  ASSERT_TRUE(table.append_batch(*batch));
-
-  auto table_ptr = std::make_shared<ColumnarTable>(table);
-  auto scan = std::make_unique<VectorizedSeqScanOperator>("computed_project", table_ptr);
-
-  Schema out_schema;
-  out_schema.add_column("doubled_plus_one", common::ValueType::TYPE_INT64);
-
-  std::vector<std::unique_ptr<Expression>> exprs;
-  // (x * 2) + 1
-  exprs.push_back(std::make_unique<BinaryExpr>(
-      std::make_unique<BinaryExpr>(std::make_unique<ColumnExpr>("x"), TokenType::Star,
-                                  std::make_unique<ConstantExpr>(common::Value::make_int64(2))),
-      TokenType::Plus, std::make_unique<ConstantExpr>(common::Value::make_int64(1))));
-
-  VectorizedProjectOperator project(std::move(scan), std::move(out_schema), std::move(exprs));
-
-  auto result = VectorBatch::create(project.output_schema());
-  ASSERT_TRUE(project.next_batch(*result));
-  EXPECT_EQ(result->row_count(), 3);
-  for (size_t i = 0; i < 3; ++i) {
-    EXPECT_EQ(result->get_column(0).get(i).as_int64(), static_cast<int64_t>(i) * 2 + 1);
-  }
+    Schema schema;
+    schema.add_column("x", common::ValueType::TYPE_INT64);
+
+    ColumnarTable table("computed_project", *storage_, schema);
+    ASSERT_TRUE(table.create());
+    ASSERT_TRUE(table.open());
+
+    auto batch = VectorBatch::create(schema);
+    for (int64_t i = 0; i < 3; ++i) {
+        batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+    }
+    ASSERT_TRUE(table.append_batch(*batch));
+
+    auto table_ptr = std::make_shared<ColumnarTable>(table);
+    auto scan = std::make_unique<VectorizedSeqScanOperator>("computed_project", table_ptr);
+
+    Schema out_schema;
+    out_schema.add_column("doubled_plus_one", common::ValueType::TYPE_INT64);
+
+    std::vector<std::unique_ptr<Expression>> exprs;
+    // (x * 2) + 1
+    exprs.push_back(std::make_unique<BinaryExpr>(
+        std::make_unique<BinaryExpr>(std::make_unique<ColumnExpr>("x"), TokenType::Star,
+                                     std::make_unique<ConstantExpr>(common::Value::make_int64(2))),
+        TokenType::Plus, std::make_unique<ConstantExpr>(common::Value::make_int64(1))));
+
+    VectorizedProjectOperator project(std::move(scan), std::move(out_schema), std::move(exprs));
+
+    auto result = VectorBatch::create(project.output_schema());
+    ASSERT_TRUE(project.next_batch(*result));
+    EXPECT_EQ(result->row_count(), 3);
+    for (size_t i = 0; i < 3; ++i) {
+        EXPECT_EQ(result->get_column(0).get(i).as_int64(), static_cast<int64_t>(i) * 2 + 1);
+    }
 }
 
 class VectorizedAggregateTests : public ::testing::Test {
- protected:
-  void SetUp() override {
-    storage_ = std::make_unique<StorageManager>("./test_agg_ops");
-  }
-  void TearDown() override { storage_.reset(); }
+   protected:
+    void SetUp() override { storage_ = std::make_unique<StorageManager>("./test_agg_ops"); }
+    void TearDown() override { storage_.reset(); }
 
-  std::unique_ptr<StorageManager> storage_;
+    std::unique_ptr<StorageManager> storage_;
 };
 
 TEST_F(VectorizedAggregateTests, CountOnlyEmpty) {
-  // Note: COUNT(*) on empty table currently returns NULL due to has_value_ semantics
-  // This test verifies behavior as-is rather than expecting specific results
-  Schema schema;
-  schema.add_column("val", common::ValueType::TYPE_INT64);
+    // Note: COUNT(*) on empty table currently returns NULL due to has_value_ semantics
+    // This test verifies behavior as-is rather than expecting specific results
+    Schema schema;
+    schema.add_column("val", common::ValueType::TYPE_INT64);
 
-  ColumnarTable table("count_empty_agg", *storage_, schema);
-  ASSERT_TRUE(table.create());
-  ASSERT_TRUE(table.open());
+    ColumnarTable table("count_empty_agg", *storage_, schema);
+    ASSERT_TRUE(table.create());
+    ASSERT_TRUE(table.open());
 
-  auto table_ptr = std::make_shared<ColumnarTable>(table);
-  auto scan = std::make_unique<VectorizedSeqScanOperator>("count_empty_agg", table_ptr);
+    auto table_ptr = std::make_shared<ColumnarTable>(table);
+    auto scan = std::make_unique<VectorizedSeqScanOperator>("count_empty_agg", table_ptr);
 
-  Schema out_schema;
-  out_schema.add_column("cnt", common::ValueType::TYPE_INT64);
+    Schema out_schema;
+    out_schema.add_column("cnt", common::ValueType::TYPE_INT64);
 
-  std::vector<VectorizedAggregateInfo> aggs = {{AggregateType::Count, -1}};
+    std::vector<VectorizedAggregateInfo> aggs = {{AggregateType::Count, -1}};
 
-  VectorizedAggregateOperator agg(std::move(scan), std::move(out_schema), aggs);
+    VectorizedAggregateOperator agg(std::move(scan), std::move(out_schema), aggs);
 
-  auto result = VectorBatch::create(agg.output_schema());
-  ASSERT_TRUE(agg.next_batch(*result));
-  EXPECT_EQ(result->row_count(), 1);
-  // Empty table aggregate produces a result batch (done_ = true)
+    auto result = VectorBatch::create(agg.output_schema());
+    ASSERT_TRUE(agg.next_batch(*result));
+    EXPECT_EQ(result->row_count(), 1);
+    // Empty table aggregate produces a result batch (done_ = true)
 }
 
 TEST_F(VectorizedAggregateTests, SumWithFloat64) {
-  Schema schema;
-  schema.add_column("fval", common::ValueType::TYPE_FLOAT64);
+    Schema schema;
+    schema.add_column("fval", common::ValueType::TYPE_FLOAT64);
 
-  ColumnarTable table("float_agg", *storage_, schema);
-  ASSERT_TRUE(table.create());
-  ASSERT_TRUE(table.open());
+    ColumnarTable table("float_agg", *storage_, schema);
+    ASSERT_TRUE(table.create());
+    ASSERT_TRUE(table.open());
 
-  auto batch = VectorBatch::create(schema);
-  for (int64_t i = 1; i <= 4; ++i) {
-    batch->append_tuple(Tuple({common::Value::make_float64(static_cast<double>(i))}));
-  }
-  ASSERT_TRUE(table.append_batch(*batch));
+    auto batch = VectorBatch::create(schema);
+    for (int64_t i = 1; i <= 4; ++i) {
+        batch->append_tuple(Tuple({common::Value::make_float64(static_cast<double>(i))}));
+    }
+    ASSERT_TRUE(table.append_batch(*batch));
 
-  auto table_ptr = std::make_shared<ColumnarTable>(table);
-  auto scan = std::make_unique<VectorizedSeqScanOperator>("float_agg", table_ptr);
+    auto table_ptr = std::make_shared<ColumnarTable>(table);
+    auto scan = std::make_unique<VectorizedSeqScanOperator>("float_agg", table_ptr);
 
-  Schema out_schema;
-  out_schema.add_column("cnt", common::ValueType::TYPE_INT64);
-  out_schema.add_column("sum_f", common::ValueType::TYPE_FLOAT64);
+    Schema out_schema;
+    out_schema.add_column("cnt", common::ValueType::TYPE_INT64);
+    out_schema.add_column("sum_f", common::ValueType::TYPE_FLOAT64);
 
-  std::vector<VectorizedAggregateInfo> aggs = {
-      {AggregateType::Count, -1}, {AggregateType::Sum, 0}};
+    std::vector<VectorizedAggregateInfo> aggs = {{AggregateType::Count, -1},
+                                                 {AggregateType::Sum, 0}};
 
-  VectorizedAggregateOperator agg(std::move(scan), std::move(out_schema), aggs);
+    VectorizedAggregateOperator agg(std::move(scan), std::move(out_schema), aggs);
 
-  auto result = VectorBatch::create(agg.output_schema());
-  ASSERT_TRUE(agg.next_batch(*result));
-  EXPECT_EQ(result->row_count(), 1);
-  EXPECT_EQ(result->get_column(0).get(0).as_int64(), 4);
-  EXPECT_DOUBLE_EQ(result->get_column(1).get(0).to_float64(), 10.0);  // 1+2+3+4
+    auto result = VectorBatch::create(agg.output_schema());
+    ASSERT_TRUE(agg.next_batch(*result));
+    EXPECT_EQ(result->row_count(), 1);
+    EXPECT_EQ(result->get_column(0).get(0).as_int64(), 4);
+    EXPECT_DOUBLE_EQ(result->get_column(1).get(0).to_float64(), 10.0);  // 1+2+3+4
 }
 
 TEST_F(VectorizedAggregateTests, CountOnly) {
-  Schema schema;
-  schema.add_column("val", common::ValueType::TYPE_INT64);
+    Schema schema;
+    schema.add_column("val", common::ValueType::TYPE_INT64);
 
-  ColumnarTable table("count_agg", *storage_, schema);
-  ASSERT_TRUE(table.create());
-  ASSERT_TRUE(table.open());
+    ColumnarTable table("count_agg", *storage_, schema);
+    ASSERT_TRUE(table.create());
+    ASSERT_TRUE(table.open());
 
-  auto batch = VectorBatch::create(schema);
-  for (int64_t i = 0; i < 7; ++i) {
-    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
-  }
-  ASSERT_TRUE(table.append_batch(*batch));
+    auto batch = VectorBatch::create(schema);
+    for (int64_t i = 0; i < 7; ++i) {
+        batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+    }
+    ASSERT_TRUE(table.append_batch(*batch));
 
-  auto table_ptr = std::make_shared<ColumnarTable>(table);
-  auto scan = std::make_unique<VectorizedSeqScanOperator>("count_agg", table_ptr);
+    auto table_ptr = std::make_shared<ColumnarTable>(table);
+    auto scan = std::make_unique<VectorizedSeqScanOperator>("count_agg", table_ptr);
 
-  Schema out_schema;
-  out_schema.add_column("cnt", common::ValueType::TYPE_INT64);
+    Schema out_schema;
+    out_schema.add_column("cnt", common::ValueType::TYPE_INT64);
 
-  std::vector<VectorizedAggregateInfo> aggs = {{AggregateType::Count, -1}};
+    std::vector<VectorizedAggregateInfo> aggs = {{AggregateType::Count, -1}};
 
-  VectorizedAggregateOperator agg(std::move(scan), std::move(out_schema), aggs);
+    VectorizedAggregateOperator agg(std::move(scan), std::move(out_schema), aggs);
 
-  auto result = VectorBatch::create(agg.output_schema());
-  ASSERT_TRUE(agg.next_batch(*result));
-  EXPECT_EQ(result->get_column(0).get(0).as_int64(), 7);
+    auto result = VectorBatch::create(agg.output_schema());
+    ASSERT_TRUE(agg.next_batch(*result));
+    EXPECT_EQ(result->get_column(0).get(0).as_int64(), 7);
 }
 
 class VectorizedFilterTests : public ::testing::Test {
- protected:
-  void SetUp() override {
-    storage_ = std::make_unique<StorageManager>("./test_filter_ops");
-  }
-  void TearDown() override { storage_.reset(); }
+   protected:
+    void SetUp() override { storage_ = std::make_unique<StorageManager>("./test_filter_ops"); }
+    void TearDown() override { storage_.reset(); }
 
-  std::unique_ptr<StorageManager> storage_;
+    std::unique_ptr<StorageManager> storage_;
 };
 
 TEST_F(VectorizedFilterTests, FilterWithNoMatches) {
-  Schema schema;
-  schema.add_column("id", common::ValueType::TYPE_INT64);
+    Schema schema;
+    schema.add_column("id", common::ValueType::TYPE_INT64);
 
-  ColumnarTable table("no_match_filter", *storage_, schema);
-  ASSERT_TRUE(table.create());
-  ASSERT_TRUE(table.open());
+    ColumnarTable table("no_match_filter", *storage_, schema);
+    ASSERT_TRUE(table.create());
+    ASSERT_TRUE(table.open());
 
-  auto batch = VectorBatch::create(schema);
-  for (int64_t i = 0; i < 10; ++i) {
-    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
-  }
-  ASSERT_TRUE(table.append_batch(*batch));
+    auto batch = VectorBatch::create(schema);
+    for (int64_t i = 0; i < 10; ++i) {
+        batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+    }
+    ASSERT_TRUE(table.append_batch(*batch));
 
-  auto table_ptr = std::make_shared<ColumnarTable>(table);
-  auto scan = std::make_unique<VectorizedSeqScanOperator>("no_match_filter", table_ptr);
+    auto table_ptr = std::make_shared<ColumnarTable>(table);
+    auto scan = std::make_unique<VectorizedSeqScanOperator>("no_match_filter", table_ptr);
 
-  // Filter: id > 100 (no matches)
-  auto cond = std::make_unique<BinaryExpr>(
-      std::make_unique<ColumnExpr>("id"), TokenType::Gt,
-      std::make_unique<ConstantExpr>(common::Value::make_int64(100)));
+    // Filter: id > 100 (no matches)
+    auto cond = std::make_unique<BinaryExpr>(
+        std::make_unique<ColumnExpr>("id"), TokenType::Gt,
+        std::make_unique<ConstantExpr>(common::Value::make_int64(100)));
 
-  VectorizedFilterOperator filter(std::move(scan), std::move(cond));
+    VectorizedFilterOperator filter(std::move(scan), std::move(cond));
 
-  auto result = VectorBatch::create(filter.output_schema());
-  EXPECT_FALSE(filter.next_batch(*result));  // No matches
+    auto result = VectorBatch::create(filter.output_schema());
+    EXPECT_FALSE(filter.next_batch(*result));  // No matches
 }
 
 TEST_F(VectorizedFilterTests, FilterAllMatch) {
-  Schema schema;
-  schema.add_column("val", common::ValueType::TYPE_INT64);
+    Schema schema;
+    schema.add_column("val", common::ValueType::TYPE_INT64);
 
-  ColumnarTable table("all_match_filter", *storage_, schema);
-  ASSERT_TRUE(table.create());
-  ASSERT_TRUE(table.open());
+    ColumnarTable table("all_match_filter", *storage_, schema);
+    ASSERT_TRUE(table.create());
+    ASSERT_TRUE(table.open());
 
-  auto batch = VectorBatch::create(schema);
-  for (int64_t i = 0; i < 20; ++i) {
-    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
-  }
-  ASSERT_TRUE(table.append_batch(*batch));
+    auto batch = VectorBatch::create(schema);
+    for (int64_t i = 0; i < 20; ++i) {
+        batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+    }
+    ASSERT_TRUE(table.append_batch(*batch));
 
-  auto table_ptr = std::make_shared<ColumnarTable>(table);
-  auto scan = std::make_unique<VectorizedSeqScanOperator>("all_match_filter", table_ptr);
+    auto table_ptr = std::make_shared<ColumnarTable>(table);
+    auto scan = std::make_unique<VectorizedSeqScanOperator>("all_match_filter", table_ptr);
 
-  // Filter: val >= 0 (all match)
-  auto cond = std::make_unique<BinaryExpr>(
-      std::make_unique<ColumnExpr>("val"), TokenType::Ge,
-      std::make_unique<ConstantExpr>(common::Value::make_int64(0)));
+    // Filter: val >= 0 (all match)
+    auto cond =
+        std::make_unique<BinaryExpr>(std::make_unique<ColumnExpr>("val"), TokenType::Ge,
+                                     std::make_unique<ConstantExpr>(common::Value::make_int64(0)));
 
-  VectorizedFilterOperator filter(std::move(scan), std::move(cond));
+    VectorizedFilterOperator filter(std::move(scan), std::move(cond));
 
-  auto result = VectorBatch::create(filter.output_schema());
-  ASSERT_TRUE(filter.next_batch(*result));
-  EXPECT_EQ(result->row_count(), 20);
+    auto result = VectorBatch::create(filter.output_schema());
+    ASSERT_TRUE(filter.next_batch(*result));
+    EXPECT_EQ(result->row_count(), 20);
 }
 
 TEST_F(VectorizedFilterTests, PipelinedBatches) {
-  // Tests that filter processes child batches and accumulates results
-  // until matches are found, then returns them in pipelined fashion
-  Schema schema;
-  schema.add_column("id", common::ValueType::TYPE_INT64);
-
-  ColumnarTable table("pipelined_filter", *storage_, schema);
-  ASSERT_TRUE(table.create());
-  ASSERT_TRUE(table.open());
-
-  auto batch = VectorBatch::create(schema);
-  for (int64_t i = 0; i < 2500; ++i) {
-    batch->append_tuple(Tuple({common::Value::make_int64(i)}));
-  }
-  ASSERT_TRUE(table.append_batch(*batch));
-
-  auto table_ptr = std::make_shared<ColumnarTable>(table);
-  auto scan = std::make_unique<VectorizedSeqScanOperator>("pipelined_filter", table_ptr);
-
-  // Filter: id >= 1500
-  auto cond = std::make_unique<BinaryExpr>(
-      std::make_unique<ColumnExpr>("id"), TokenType::Ge,
-      std::make_unique<ConstantExpr>(common::Value::make_int64(1500)));
-
-  VectorizedFilterOperator filter(std::move(scan), std::move(cond));
-
-  auto result = VectorBatch::create(filter.output_schema());
-  int total = 0;
-  while (filter.next_batch(*result)) {
-    int batch_rows = result->row_count();
-    total += batch_rows;
-    // Verify values
-    for (size_t i = 0; i < result->row_count(); ++i) {
-      EXPECT_GE(result->get_column(0).get(i).as_int64(), 1500);
+    // Tests that filter processes child batches and accumulates results
+    // until matches are found, then returns them in pipelined fashion
+    Schema schema;
+    schema.add_column("id", common::ValueType::TYPE_INT64);
+
+    ColumnarTable table("pipelined_filter", *storage_, schema);
+    ASSERT_TRUE(table.create());
+    ASSERT_TRUE(table.open());
+
+    auto batch = VectorBatch::create(schema);
+    for (int64_t i = 0; i < 2500; ++i) {
+        batch->append_tuple(Tuple({common::Value::make_int64(i)}));
+    }
+    ASSERT_TRUE(table.append_batch(*batch));
+
+    auto table_ptr = std::make_shared<ColumnarTable>(table);
+    auto scan = std::make_unique<VectorizedSeqScanOperator>("pipelined_filter", table_ptr);
+
+    // Filter: id >= 1500
+    auto cond = std::make_unique<BinaryExpr>(
+        std::make_unique<ColumnExpr>("id"), TokenType::Ge,
+        std::make_unique<ConstantExpr>(common::Value::make_int64(1500)));
+
+    VectorizedFilterOperator filter(std::move(scan), std::move(cond));
+
+    auto result = VectorBatch::create(filter.output_schema());
+    int total = 0;
+    while (filter.next_batch(*result)) {
+        int batch_rows = result->row_count();
+        total += batch_rows;
+        // Verify values
+        for (size_t i = 0; i < result->row_count(); ++i) {
+            EXPECT_GE(result->get_column(0).get(i).as_int64(), 1500);
+        }
+        result->clear();
     }
-    result->clear();
-  }
-  // 1500 to 2499 inclusive = 1000 rows
-  EXPECT_EQ(total, 1000);
+    // 1500 to 2499 inclusive = 1000 rows
+    EXPECT_EQ(total, 1000);
 }
 
 }  // namespace
\ No newline at end of file

From c41abe0dd728091af273c213c200cbf468c9e42f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?=
 <83272398+PoyrazK@users.noreply.github.com>
Date: Tue, 21 Apr 2026 13:15:24 +0300
Subject: [PATCH 3/3] fix(vectorized): clear output batch in filter, fix
 COUNT(*) on empty table

VectorizedFilterOperator::next_batch():
- Clear output batch at entry for clean state
- Remove early-return optimization that checked row_count > 0
- Set row count to selection.size() instead of accumulating

VectorizedAggregateOperator:
- Initialize has_value_[i] = true for COUNT aggregates in constructor
- This makes COUNT(*) return 0 (not NULL) for empty tables per SQL spec

Test update:
- CountOnlyEmpty now asserts COUNT(*) = 0 for empty table
---
 include/executor/vectorized_operator.hpp | 15 +++++++++------
 tests/vectorized_operator_tests.cpp      |  5 ++---
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/include/executor/vectorized_operator.hpp b/include/executor/vectorized_operator.hpp
index b21a957..15fa879 100644
--- a/include/executor/vectorized_operator.hpp
+++ b/include/executor/vectorized_operator.hpp
@@ -102,10 +102,7 @@ class VectorizedFilterOperator : public VectorizedOperator {
     }
 
     bool next_batch(VectorBatch& out_batch) override {
-        // If we already have accumulated rows from a previous call, return them
-        if (out_batch.row_count() > 0) {
-            return true;
-        }
+        out_batch.clear();
 
         // Ensure output batch is structured for current schema
         if (out_batch.column_count() == 0) {
@@ -135,9 +132,9 @@ class VectorizedFilterOperator : public VectorizedOperator {
                         dest_col.append(src_col.get(r));
                     }
                 }
-                out_batch.set_row_count(out_batch.row_count() + selection.size());
+                out_batch.set_row_count(selection.size());
                 input_batch_->clear();
-                return true;  // Return with accumulated matches
+                return true;  // Return with matches
             }
             input_batch_->clear();
         }
@@ -214,6 +211,12 @@ class VectorizedAggregateOperator : public VectorizedOperator {
         results_int_.assign(aggregates_.size(), 0);
         results_double_.assign(aggregates_.size(), 0.0);
         has_value_.assign(aggregates_.size(), false);
+        // COUNT aggregates always have a value (0 for empty input) per SQL spec
+        for (size_t i = 0; i < aggregates_.size(); ++i) {
+            if (aggregates_[i].type == AggregateType::Count) {
+                has_value_[i] = true;
+            }
+        }
         input_batch_ = VectorBatch::create(child_->output_schema());
     }
 
diff --git a/tests/vectorized_operator_tests.cpp b/tests/vectorized_operator_tests.cpp
index 93e2a3b..54d5956 100644
--- a/tests/vectorized_operator_tests.cpp
+++ b/tests/vectorized_operator_tests.cpp
@@ -254,8 +254,7 @@ class VectorizedAggregateTests : public ::testing::Test {
 };
 
 TEST_F(VectorizedAggregateTests, CountOnlyEmpty) {
-    // Note: COUNT(*) on empty table currently returns NULL due to has_value_ semantics
-    // This test verifies behavior as-is rather than expecting specific results
+    // COUNT(*) on empty table should return 0 per SQL spec
     Schema schema;
     schema.add_column("val", common::ValueType::TYPE_INT64);
 
@@ -276,7 +275,7 @@ TEST_F(VectorizedAggregateTests, CountOnlyEmpty) {
     auto result = VectorBatch::create(agg.output_schema());
     ASSERT_TRUE(agg.next_batch(*result));
     EXPECT_EQ(result->row_count(), 1);
-    // Empty table aggregate produces a result batch (done_ = true)
+    EXPECT_EQ(result->get_column(0).get(0).as_int64(), 0);  // COUNT(*) = 0 for empty
 }
 
 TEST_F(VectorizedAggregateTests, SumWithFloat64) {