From cf34d87b15f2766ea004e8d1baff5a7d2383953e Mon Sep 17 00:00:00 2001 From: Pavel Tiunov Date: Fri, 22 May 2026 13:04:59 -0700 Subject: [PATCH 1/3] feat(cubesql): Flatten filter rules --- rust/cubesql/cubesql/src/compile/mod.rs | 88 ++++++++++++++ .../src/compile/rewrite/rules/dates.rs | 58 ++++++++- .../rewrite/rules/flatten/pass_through.rs | 7 +- .../rewrite/rules/flatten/top_level.rs | 110 +++++++++++++++++- 4 files changed, 258 insertions(+), 5 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 2cd5f8ede53f7..e84091a0100ae 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -14454,6 +14454,94 @@ ORDER BY "source"."str0" ASC ) } + /// ThoughtSpot-style day-of-quarter expression split across inner/outer query + /// with MEASURE() and CASE WHEN filter on the measure column. + /// + /// The inner query projects two parts of the quarter calculation as separate + /// columns plus a CASE WHEN filtered amount, with no GROUP BY. + /// The outer query computes day_of_quarter from those columns, wraps the + /// filtered amount in MEASURE(), and groups. + /// + /// This exercises the E-graph's ability to: + /// 1. Flatten the subquery so the quarter expression becomes a single tree + /// 2. Atomically rewrite the quarter expression to DATE_TRUNC('quarter', ...) + /// before sub-expression simplification rules break the pattern + /// 3. Expand MEASURE() at the correct aggregation level + /// 4. Avoid emitting INTERVAL '1 month' * expr (invalid on Snowflake) + #[tokio::test] + async fn test_thoughtspot_pg_day_of_quarter_split_with_measure() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let query_plan = convert_select_to_query_plan( + r#" + SELECT + CAST("inner_query"."order_date" AS date) + - CAST("inner_query"."quarter_start" AS date) + + 1 AS "day_of_quarter", + MEASURE("inner_query"."sumPrice") AS "revenue" + FROM ( + SELECT + "ta_1"."order_date" AS "order_date", + CAST( + EXTRACT(YEAR FROM "ta_1"."order_date") || '-' + || EXTRACT(MONTH FROM "ta_1"."order_date") || '-01' + AS DATE) + + (((MOD(CAST((EXTRACT(MONTH FROM "ta_1"."order_date") - 1) + AS numeric), 3) + 1) - 1) * -1) + * INTERVAL '1 month' + AS "quarter_start", + CASE WHEN "ta_1"."customer_gender" = 'female' + THEN "ta_1"."sumPrice" END AS "sumPrice" + FROM "db"."public"."KibanaSampleDataEcommerce" AS "ta_1" + ) "inner_query" + WHERE + CAST("inner_query"."order_date" AS date) + - CAST("inner_query"."quarter_start" AS date) + + 1 <= 45 + GROUP BY 1 + ORDER BY 1 + ;"# + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await; + + let logical_plan = query_plan.as_logical_plan(); + + let request = logical_plan.find_cube_scan().request; + + // The rewriter should recognize the complex quarter expression and + // simplify it to DATE_TRUNC('quarter', col) via the + // thoughtspot-pg-quarter-start-to-date-trunc rule, which then gets + // recognized as a quarter time dimension. + assert_eq!( + request, + V1LoadRequestQuery { + measures: Some(vec![ + "KibanaSampleDataEcommerce.sumPrice".to_string(), + ]), + dimensions: Some(vec![ + "KibanaSampleDataEcommerce.order_date".to_string(), + "KibanaSampleDataEcommerce.customer_gender".to_string(), + ]), + segments: Some(vec![]), + time_dimensions: Some(vec![ + V1LoadRequestQueryTimeDimension { + dimension: "KibanaSampleDataEcommerce.order_date".to_string(), + granularity: Some("quarter".to_string()), + date_range: None, + }, + ]), + order: Some(vec![]), + ungrouped: Some(true), + ..Default::default() + } + ); + } + #[tokio::test] async fn test_domo_filter_date_gt() { init_testing_logger(); diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs index 38520eaa10d02..4b0f6cf164e79 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs @@ -15,7 +15,7 @@ use crate::{ var, var_iter, }; use datafusion::{ - arrow::datatypes::{DataType, TimeUnit}, + arrow::datatypes::{DataType, DataType as ArrowDataType, TimeUnit}, logical_plan::DFSchema, scalar::ScalarValue, }; @@ -409,6 +409,62 @@ impl RewriteRules for DateRules { "?new_granularity", ), ), + // ThoughtSpot's PostgreSQL quarter start calculation uses INTERVAL arithmetic + // that is incompatible with non-PostgreSQL dialects. Recognize the pattern and + // replace with DATE_TRUNC('quarter', col) which all dialects support. + rewrite( + "thoughtspot-pg-quarter-start-to-date-trunc", + alias_expr( + binary_expr( + cast_expr_explicit( + binary_expr( + binary_expr( + binary_expr( + self.fun_expr( + "DatePart", + vec![ + literal_string("year"), + column_expr("?column"), + ], + ), + "||", + literal_string("-"), + ), + "||", + self.fun_expr( + "DatePart", + vec![ + literal_string("month"), + column_expr("?column"), + ], + ), + ), + "||", + literal_string("-01"), + ), + ArrowDataType::Date32, + ), + "+", + binary_expr( + binary_expr( + "?mod_part", + "*", + "?neg_one", + ), + "*", + "?interval_val", + ), + ), + "?alias", + ), + alias_expr( + self.fun_expr( + "DateTrunc", + vec![literal_string("quarter"), column_expr("?column")], + ), + "?alias", + ), + ), // AGE function seems to be a popular choice for this date arithmetic, // but it is not supported in SQL push down by most dialects. transforming_rewrite_with_root( diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/pass_through.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/pass_through.rs index b5f6637e971e7..36d64201a4306 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/pass_through.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/pass_through.rs @@ -1,7 +1,7 @@ use crate::compile::rewrite::{ agg_fun_expr, agg_fun_expr_within_group_empty_tail, alias_expr, binary_expr, cast_expr, flatten_pushdown_replacer, fun_expr_var_arg, is_not_null_expr, is_null_expr, rewrite, - rewriter::CubeRewrite, rules::flatten::FlattenRules, udf_expr_var_arg, + rewriter::CubeRewrite, rules::flatten::FlattenRules, udaf_expr_var_arg, udf_expr_var_arg, }; impl FlattenRules { @@ -30,6 +30,11 @@ impl FlattenRules { |expr| udf_expr_var_arg("?fun", expr), rules, ); + self.single_arg_pass_through_rules( + "udaf-function", + |expr| udaf_expr_var_arg("?fun", expr, "?distinct"), + rules, + ); self.single_arg_pass_through_rules("is-null", |expr| is_null_expr(expr), rules); self.single_arg_pass_through_rules("is-not-null", |expr| is_not_null_expr(expr), rules); rules.push(rewrite( diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs index a284dc170b0de..df4bb0adecf6e 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs @@ -1,10 +1,10 @@ use crate::{ compile::rewrite::{ - aggregate, cube_scan, flatten_pushdown_replacer, projection, + aggregate, cube_scan, filter, flatten_pushdown_replacer, projection, rewriter::{CubeEGraph, CubeRewrite}, rules::{flatten::FlattenRules, replacer_flat_push_down_node, replacer_push_down_node}, - transforming_chain_rewrite_with_root, FlattenPushdownReplacerInnerAlias, ListType, - LogicalPlanLanguage, ProjectionAlias, + transforming_chain_rewrite, transforming_chain_rewrite_with_root, + FlattenPushdownReplacerInnerAlias, ListType, LogicalPlanLanguage, ProjectionAlias, }, var, var_iter, }; @@ -159,6 +159,93 @@ impl FlattenRules { ), )]); + rules.extend(vec![transforming_chain_rewrite_with_root( + "flatten-filter-pushdown", + aggregate( + "?filter_node", + "?outer_group_expr", + "?outer_aggregate_expr", + "AggregateSplit:false", + ), + vec![ + ( + "?filter_node", + filter("?filter_expr", "?inner_projection"), + ), + ( + "?inner_projection", + projection( + "?inner_projection_expr", + "?cube_scan", + "?inner_projection_alias", + "ProjectionSplit:false", + ), + ), + ( + "?cube_scan", + cube_scan( + "?alias_to_cube", + "?members", + "?filters", + "?orders", + "?limit", + "?offset", + "CubeScanSplit:false", + "?can_pushdown_join", + "CubeScanWrapped:false", + "?ungrouped", + "?join_hints", + ), + ), + ], + aggregate( + filter( + flatten_pushdown_replacer( + "?filter_expr", + "?inner_projection_expr", + "?inner_alias", + "FlattenPushdownReplacerTopLevel:false", + ), + cube_scan( + "?alias_to_cube", + "?members", + "?filters", + "?orders", + "?limit", + "?offset", + "CubeScanSplit:false", + "?can_pushdown_join", + "CubeScanWrapped:false", + "?ungrouped", + "?join_hints", + ), + ), + flatten_pushdown_replacer( + "?outer_group_expr", + "?inner_projection_expr", + "?inner_alias", + "FlattenPushdownReplacerTopLevel:false", + ), + flatten_pushdown_replacer( + "?outer_aggregate_expr", + "?inner_projection_expr", + "?inner_alias", + "FlattenPushdownReplacerTopLevel:false", + ), + "AggregateSplit:false", + ), + self.flatten_aggregate( + "?inner_projection", + "?cube_scan", + "?members", + "?inner_projection_expr", + "?outer_group_expr", + "?outer_aggregate_expr", + "?inner_projection_alias", + "?inner_alias", + ), + )]); + if self.config_obj.push_down_pull_up_split() { Self::flat_list_pushdown_rules( "flatten-projection-expr", @@ -175,11 +262,28 @@ impl FlattenRules { ListType::AggregateGroupExpr, rules, ); + Self::flat_list_pushdown_rules( + "flatten-scalar-fun-args", + ListType::ScalarFunctionExprArgs, + rules, + ); } else { Self::list_pushdown_rules("flatten-projection-expr", "ProjectionExpr", rules); Self::list_pushdown_rules("flatten-aggregate-expr", "AggregateAggrExpr", rules); Self::list_pushdown_rules("flatten-group-expr", "AggregateGroupExpr", rules); + Self::list_pushdown_rules( + "flatten-scalar-fun-args", + "ScalarFunctionExprArgs", + rules, + ); } + Self::list_pushdown_rules("flatten-udf-fun-args", "ScalarUDFExprArgs", rules); + Self::list_pushdown_rules( + "flatten-agg-fun-args", + "AggregateFunctionExprArgs", + rules, + ); + Self::list_pushdown_rules("flatten-udaf-fun-args", "AggregateUDFExprArgs", rules); } pub fn flatten_projection( From c0fecf34e407f9df46431c9c7c231b528876fce4 Mon Sep 17 00:00:00 2001 From: Pavel Tiunov Date: Sat, 23 May 2026 19:11:21 -0700 Subject: [PATCH 2/3] Linter --- rust/cubesql/cubesql/src/compile/mod.rs | 16 ++++++---------- .../cubesql/src/compile/rewrite/rules/dates.rs | 16 +++------------- .../compile/rewrite/rules/flatten/top_level.rs | 17 +++-------------- 3 files changed, 12 insertions(+), 37 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index e84091a0100ae..e7f0b3ab1db4c 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -14520,21 +14520,17 @@ ORDER BY "source"."str0" ASC assert_eq!( request, V1LoadRequestQuery { - measures: Some(vec![ - "KibanaSampleDataEcommerce.sumPrice".to_string(), - ]), + measures: Some(vec!["KibanaSampleDataEcommerce.sumPrice".to_string(),]), dimensions: Some(vec![ "KibanaSampleDataEcommerce.order_date".to_string(), "KibanaSampleDataEcommerce.customer_gender".to_string(), ]), segments: Some(vec![]), - time_dimensions: Some(vec![ - V1LoadRequestQueryTimeDimension { - dimension: "KibanaSampleDataEcommerce.order_date".to_string(), - granularity: Some("quarter".to_string()), - date_range: None, - }, - ]), + time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension { + dimension: "KibanaSampleDataEcommerce.order_date".to_string(), + granularity: Some("quarter".to_string()), + date_range: None, + },]), order: Some(vec![]), ungrouped: Some(true), ..Default::default() diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs index 4b0f6cf164e79..6f2ec6e764e52 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs @@ -422,10 +422,7 @@ impl RewriteRules for DateRules { binary_expr( self.fun_expr( "DatePart", - vec![ - literal_string("year"), - column_expr("?column"), - ], + vec![literal_string("year"), column_expr("?column")], ), "||", literal_string("-"), @@ -433,10 +430,7 @@ impl RewriteRules for DateRules { "||", self.fun_expr( "DatePart", - vec![ - literal_string("month"), - column_expr("?column"), - ], + vec![literal_string("month"), column_expr("?column")], ), ), "||", @@ -446,11 +440,7 @@ impl RewriteRules for DateRules { ), "+", binary_expr( - binary_expr( - "?mod_part", - "*", - "?neg_one", - ), + binary_expr("?mod_part", "*", "?neg_one"), "*", "?interval_val", ), diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs index df4bb0adecf6e..e09471a83e718 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs @@ -168,10 +168,7 @@ impl FlattenRules { "AggregateSplit:false", ), vec![ - ( - "?filter_node", - filter("?filter_expr", "?inner_projection"), - ), + ("?filter_node", filter("?filter_expr", "?inner_projection")), ( "?inner_projection", projection( @@ -271,18 +268,10 @@ impl FlattenRules { Self::list_pushdown_rules("flatten-projection-expr", "ProjectionExpr", rules); Self::list_pushdown_rules("flatten-aggregate-expr", "AggregateAggrExpr", rules); Self::list_pushdown_rules("flatten-group-expr", "AggregateGroupExpr", rules); - Self::list_pushdown_rules( - "flatten-scalar-fun-args", - "ScalarFunctionExprArgs", - rules, - ); + Self::list_pushdown_rules("flatten-scalar-fun-args", "ScalarFunctionExprArgs", rules); } Self::list_pushdown_rules("flatten-udf-fun-args", "ScalarUDFExprArgs", rules); - Self::list_pushdown_rules( - "flatten-agg-fun-args", - "AggregateFunctionExprArgs", - rules, - ); + Self::list_pushdown_rules("flatten-agg-fun-args", "AggregateFunctionExprArgs", rules); Self::list_pushdown_rules("flatten-udaf-fun-args", "AggregateUDFExprArgs", rules); } From 93a3a06098627b67d3adcbdc4ef71a47dbe857da Mon Sep 17 00:00:00 2001 From: Pavel Tiunov Date: Sat, 23 May 2026 19:39:44 -0700 Subject: [PATCH 3/3] Clippy --- .../cubesql/src/compile/rewrite/rules/flatten/top_level.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs index e09471a83e718..581f7efa9d3cc 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs @@ -3,7 +3,7 @@ use crate::{ aggregate, cube_scan, filter, flatten_pushdown_replacer, projection, rewriter::{CubeEGraph, CubeRewrite}, rules::{flatten::FlattenRules, replacer_flat_push_down_node, replacer_push_down_node}, - transforming_chain_rewrite, transforming_chain_rewrite_with_root, + transforming_chain_rewrite_with_root, FlattenPushdownReplacerInnerAlias, ListType, LogicalPlanLanguage, ProjectionAlias, }, var, var_iter,