diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 2cd5f8ede53f7..e7f0b3ab1db4c 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -14454,6 +14454,90 @@ ORDER BY "source"."str0" ASC ) } + /// ThoughtSpot-style day-of-quarter expression split across inner/outer query + /// with MEASURE() and CASE WHEN filter on the measure column. + /// + /// The inner query projects two parts of the quarter calculation as separate + /// columns plus a CASE WHEN filtered amount, with no GROUP BY. + /// The outer query computes day_of_quarter from those columns, wraps the + /// filtered amount in MEASURE(), and groups. + /// + /// This exercises the E-graph's ability to: + /// 1. Flatten the subquery so the quarter expression becomes a single tree + /// 2. Atomically rewrite the quarter expression to DATE_TRUNC('quarter', ...) + /// before sub-expression simplification rules break the pattern + /// 3. Expand MEASURE() at the correct aggregation level + /// 4. Avoid emitting INTERVAL '1 month' * expr (invalid on Snowflake) + #[tokio::test] + async fn test_thoughtspot_pg_day_of_quarter_split_with_measure() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let query_plan = convert_select_to_query_plan( + r#" + SELECT + CAST("inner_query"."order_date" AS date) + - CAST("inner_query"."quarter_start" AS date) + + 1 AS "day_of_quarter", + MEASURE("inner_query"."sumPrice") AS "revenue" + FROM ( + SELECT + "ta_1"."order_date" AS "order_date", + CAST( + EXTRACT(YEAR FROM "ta_1"."order_date") || '-' + || EXTRACT(MONTH FROM "ta_1"."order_date") || '-01' + AS DATE) + + (((MOD(CAST((EXTRACT(MONTH FROM "ta_1"."order_date") - 1) + AS numeric), 3) + 1) - 1) * -1) + * INTERVAL '1 month' + AS "quarter_start", + CASE WHEN "ta_1"."customer_gender" = 'female' + THEN "ta_1"."sumPrice" END AS "sumPrice" + FROM "db"."public"."KibanaSampleDataEcommerce" AS "ta_1" + ) "inner_query" + WHERE + CAST("inner_query"."order_date" AS date) + - CAST("inner_query"."quarter_start" AS date) + + 1 <= 45 + GROUP BY 1 + ORDER BY 1 + ;"# + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await; + + let logical_plan = query_plan.as_logical_plan(); + + let request = logical_plan.find_cube_scan().request; + + // The rewriter should recognize the complex quarter expression and + // simplify it to DATE_TRUNC('quarter', col) via the + // thoughtspot-pg-quarter-start-to-date-trunc rule, which then gets + // recognized as a quarter time dimension. + assert_eq!( + request, + V1LoadRequestQuery { + measures: Some(vec!["KibanaSampleDataEcommerce.sumPrice".to_string(),]), + dimensions: Some(vec![ + "KibanaSampleDataEcommerce.order_date".to_string(), + "KibanaSampleDataEcommerce.customer_gender".to_string(), + ]), + segments: Some(vec![]), + time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension { + dimension: "KibanaSampleDataEcommerce.order_date".to_string(), + granularity: Some("quarter".to_string()), + date_range: None, + },]), + order: Some(vec![]), + ungrouped: Some(true), + ..Default::default() + } + ); + } + #[tokio::test] async fn test_domo_filter_date_gt() { init_testing_logger(); diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs index 38520eaa10d02..6f2ec6e764e52 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs @@ -15,7 +15,7 @@ use crate::{ var, var_iter, }; use datafusion::{ - arrow::datatypes::{DataType, TimeUnit}, + arrow::datatypes::{DataType, DataType as ArrowDataType, TimeUnit}, logical_plan::DFSchema, scalar::ScalarValue, }; @@ -409,6 +409,52 @@ impl RewriteRules for DateRules { "?new_granularity", ), ), + // ThoughtSpot's PostgreSQL quarter start calculation uses INTERVAL arithmetic + // that is incompatible with non-PostgreSQL dialects. Recognize the pattern and + // replace with DATE_TRUNC('quarter', col) which all dialects support. + rewrite( + "thoughtspot-pg-quarter-start-to-date-trunc", + alias_expr( + binary_expr( + cast_expr_explicit( + binary_expr( + binary_expr( + binary_expr( + self.fun_expr( + "DatePart", + vec![literal_string("year"), column_expr("?column")], + ), + "||", + literal_string("-"), + ), + "||", + self.fun_expr( + "DatePart", + vec![literal_string("month"), column_expr("?column")], + ), + ), + "||", + literal_string("-01"), + ), + ArrowDataType::Date32, + ), + "+", + binary_expr( + binary_expr("?mod_part", "*", "?neg_one"), + "*", + "?interval_val", + ), + ), + "?alias", + ), + alias_expr( + self.fun_expr( + "DateTrunc", + vec![literal_string("quarter"), column_expr("?column")], + ), + "?alias", + ), + ), // AGE function seems to be a popular choice for this date arithmetic, // but it is not supported in SQL push down by most dialects. transforming_rewrite_with_root( diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/pass_through.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/pass_through.rs index b5f6637e971e7..36d64201a4306 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/pass_through.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/pass_through.rs @@ -1,7 +1,7 @@ use crate::compile::rewrite::{ agg_fun_expr, agg_fun_expr_within_group_empty_tail, alias_expr, binary_expr, cast_expr, flatten_pushdown_replacer, fun_expr_var_arg, is_not_null_expr, is_null_expr, rewrite, - rewriter::CubeRewrite, rules::flatten::FlattenRules, udf_expr_var_arg, + rewriter::CubeRewrite, rules::flatten::FlattenRules, udaf_expr_var_arg, udf_expr_var_arg, }; impl FlattenRules { @@ -30,6 +30,11 @@ impl FlattenRules { |expr| udf_expr_var_arg("?fun", expr), rules, ); + self.single_arg_pass_through_rules( + "udaf-function", + |expr| udaf_expr_var_arg("?fun", expr, "?distinct"), + rules, + ); self.single_arg_pass_through_rules("is-null", |expr| is_null_expr(expr), rules); self.single_arg_pass_through_rules("is-not-null", |expr| is_not_null_expr(expr), rules); rules.push(rewrite( diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs index a284dc170b0de..581f7efa9d3cc 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs @@ -1,10 +1,10 @@ use crate::{ compile::rewrite::{ - aggregate, cube_scan, flatten_pushdown_replacer, projection, + aggregate, cube_scan, filter, flatten_pushdown_replacer, projection, rewriter::{CubeEGraph, CubeRewrite}, rules::{flatten::FlattenRules, replacer_flat_push_down_node, replacer_push_down_node}, - transforming_chain_rewrite_with_root, FlattenPushdownReplacerInnerAlias, ListType, - LogicalPlanLanguage, ProjectionAlias, + transforming_chain_rewrite_with_root, + FlattenPushdownReplacerInnerAlias, ListType, LogicalPlanLanguage, ProjectionAlias, }, var, var_iter, }; @@ -159,6 +159,90 @@ impl FlattenRules { ), )]); + rules.extend(vec![transforming_chain_rewrite_with_root( + "flatten-filter-pushdown", + aggregate( + "?filter_node", + "?outer_group_expr", + "?outer_aggregate_expr", + "AggregateSplit:false", + ), + vec![ + ("?filter_node", filter("?filter_expr", "?inner_projection")), + ( + "?inner_projection", + projection( + "?inner_projection_expr", + "?cube_scan", + "?inner_projection_alias", + "ProjectionSplit:false", + ), + ), + ( + "?cube_scan", + cube_scan( + "?alias_to_cube", + "?members", + "?filters", + "?orders", + "?limit", + "?offset", + "CubeScanSplit:false", + "?can_pushdown_join", + "CubeScanWrapped:false", + "?ungrouped", + "?join_hints", + ), + ), + ], + aggregate( + filter( + flatten_pushdown_replacer( + "?filter_expr", + "?inner_projection_expr", + "?inner_alias", + "FlattenPushdownReplacerTopLevel:false", + ), + cube_scan( + "?alias_to_cube", + "?members", + "?filters", + "?orders", + "?limit", + "?offset", + "CubeScanSplit:false", + "?can_pushdown_join", + "CubeScanWrapped:false", + "?ungrouped", + "?join_hints", + ), + ), + flatten_pushdown_replacer( + "?outer_group_expr", + "?inner_projection_expr", + "?inner_alias", + "FlattenPushdownReplacerTopLevel:false", + ), + flatten_pushdown_replacer( + "?outer_aggregate_expr", + "?inner_projection_expr", + "?inner_alias", + "FlattenPushdownReplacerTopLevel:false", + ), + "AggregateSplit:false", + ), + self.flatten_aggregate( + "?inner_projection", + "?cube_scan", + "?members", + "?inner_projection_expr", + "?outer_group_expr", + "?outer_aggregate_expr", + "?inner_projection_alias", + "?inner_alias", + ), + )]); + if self.config_obj.push_down_pull_up_split() { Self::flat_list_pushdown_rules( "flatten-projection-expr", @@ -175,11 +259,20 @@ impl FlattenRules { ListType::AggregateGroupExpr, rules, ); + Self::flat_list_pushdown_rules( + "flatten-scalar-fun-args", + ListType::ScalarFunctionExprArgs, + rules, + ); } else { Self::list_pushdown_rules("flatten-projection-expr", "ProjectionExpr", rules); Self::list_pushdown_rules("flatten-aggregate-expr", "AggregateAggrExpr", rules); Self::list_pushdown_rules("flatten-group-expr", "AggregateGroupExpr", rules); + Self::list_pushdown_rules("flatten-scalar-fun-args", "ScalarFunctionExprArgs", rules); } + Self::list_pushdown_rules("flatten-udf-fun-args", "ScalarUDFExprArgs", rules); + Self::list_pushdown_rules("flatten-agg-fun-args", "AggregateFunctionExprArgs", rules); + Self::list_pushdown_rules("flatten-udaf-fun-args", "AggregateUDFExprArgs", rules); } pub fn flatten_projection(