Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions rust/cubesql/cubesql/src/compile/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14454,6 +14454,90 @@ ORDER BY "source"."str0" ASC
)
}

/// ThoughtSpot-style day-of-quarter expression split across inner/outer query
/// with MEASURE() and CASE WHEN filter on the measure column.
///
/// The inner query projects two parts of the quarter calculation as separate
/// columns plus a CASE WHEN filtered amount, with no GROUP BY.
/// The outer query computes day_of_quarter from those columns, wraps the
/// filtered amount in MEASURE(), and groups.
///
/// This exercises the E-graph's ability to:
/// 1. Flatten the subquery so the quarter expression becomes a single tree
/// 2. Atomically rewrite the quarter expression to DATE_TRUNC('quarter', ...)
/// before sub-expression simplification rules break the pattern
/// 3. Expand MEASURE() at the correct aggregation level
/// 4. Avoid emitting INTERVAL '1 month' * expr (invalid on Snowflake)
#[tokio::test]
async fn test_thoughtspot_pg_day_of_quarter_split_with_measure() {
if !Rewriter::sql_push_down_enabled() {
return;
}
init_testing_logger();

let query_plan = convert_select_to_query_plan(
r#"
SELECT
CAST("inner_query"."order_date" AS date)
- CAST("inner_query"."quarter_start" AS date)
+ 1 AS "day_of_quarter",
MEASURE("inner_query"."sumPrice") AS "revenue"
FROM (
SELECT
"ta_1"."order_date" AS "order_date",
CAST(
EXTRACT(YEAR FROM "ta_1"."order_date") || '-'
|| EXTRACT(MONTH FROM "ta_1"."order_date") || '-01'
AS DATE)
+ (((MOD(CAST((EXTRACT(MONTH FROM "ta_1"."order_date") - 1)
AS numeric), 3) + 1) - 1) * -1)
* INTERVAL '1 month'
AS "quarter_start",
CASE WHEN "ta_1"."customer_gender" = 'female'
THEN "ta_1"."sumPrice" END AS "sumPrice"
FROM "db"."public"."KibanaSampleDataEcommerce" AS "ta_1"
) "inner_query"
WHERE
CAST("inner_query"."order_date" AS date)
- CAST("inner_query"."quarter_start" AS date)
+ 1 <= 45
GROUP BY 1
ORDER BY 1
;"#
.to_string(),
DatabaseProtocol::PostgreSQL,
)
.await;

let logical_plan = query_plan.as_logical_plan();

let request = logical_plan.find_cube_scan().request;

// The rewriter should recognize the complex quarter expression and
// simplify it to DATE_TRUNC('quarter', col) via the
// thoughtspot-pg-quarter-start-to-date-trunc rule, which then gets
// recognized as a quarter time dimension.
assert_eq!(
request,
V1LoadRequestQuery {
measures: Some(vec!["KibanaSampleDataEcommerce.sumPrice".to_string(),]),
dimensions: Some(vec![
"KibanaSampleDataEcommerce.order_date".to_string(),
"KibanaSampleDataEcommerce.customer_gender".to_string(),
]),
segments: Some(vec![]),
time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension {
dimension: "KibanaSampleDataEcommerce.order_date".to_string(),
granularity: Some("quarter".to_string()),
date_range: None,
},]),
order: Some(vec![]),
ungrouped: Some(true),
..Default::default()
}
);
}

#[tokio::test]
async fn test_domo_filter_date_gt() {
init_testing_logger();
Expand Down
48 changes: 47 additions & 1 deletion rust/cubesql/cubesql/src/compile/rewrite/rules/dates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::{
var, var_iter,
};
use datafusion::{
arrow::datatypes::{DataType, TimeUnit},
arrow::datatypes::{DataType, DataType as ArrowDataType, TimeUnit},
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: DataType is imported twice — once as DataType and once as DataType as ArrowDataType. Since they're the same type, you can just use DataType directly on line 445 instead of introducing the alias.

Suggested change
arrow::datatypes::{DataType, DataType as ArrowDataType, TimeUnit},
arrow::datatypes::{DataType, TimeUnit},

(And use DataType::Date32 on line 445 instead of ArrowDataType::Date32.)

logical_plan::DFSchema,
scalar::ScalarValue,
};
Expand Down Expand Up @@ -409,6 +409,52 @@ impl RewriteRules for DateRules {
"?new_granularity",
),
),
// ThoughtSpot's PostgreSQL quarter start calculation uses INTERVAL arithmetic
// that is incompatible with non-PostgreSQL dialects. Recognize the pattern and
// replace with DATE_TRUNC('quarter', col) which all dialects support.
rewrite(
"thoughtspot-pg-quarter-start-to-date-trunc",
alias_expr(
binary_expr(
cast_expr_explicit(
binary_expr(
binary_expr(
binary_expr(
self.fun_expr(
"DatePart",
vec![literal_string("year"), column_expr("?column")],
),
"||",
literal_string("-"),
),
"||",
self.fun_expr(
"DatePart",
vec![literal_string("month"), column_expr("?column")],
),
),
"||",
literal_string("-01"),
),
ArrowDataType::Date32,
),
"+",
binary_expr(
binary_expr("?mod_part", "*", "?neg_one"),
"*",
"?interval_val",
),
),
"?alias",
),
alias_expr(
self.fun_expr(
"DateTrunc",
vec![literal_string("quarter"), column_expr("?column")],
),
"?alias",
),
),
Comment on lines +415 to +457
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an unconditional rewrite — the wildcards ?mod_part, ?neg_one, and ?interval_val will match any e-graph node, not just the specific MOD/negative-one/interval expressions from the ThoughtSpot pattern.

For example, a different expression tree that happens to have the same shape ((cast(...) + (A * B) * C) with string concatenation inside the cast) would also be rewritten to DATE_TRUNC('quarter', col), even if A/B/C have nothing to do with quarter calculation.

Consider either:

  1. Using a transforming_rewrite with a condition that validates ?neg_one is actually -1 and ?interval_val is INTERVAL '1 month', or
  2. Expanding the pattern to structurally match the full MOD expression, negative literal, and interval literal.

This would prevent false-positive matches on structurally similar but semantically different expressions.

// AGE function seems to be a popular choice for this date arithmetic,
// but it is not supported in SQL push down by most dialects.
transforming_rewrite_with_root(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::compile::rewrite::{
agg_fun_expr, agg_fun_expr_within_group_empty_tail, alias_expr, binary_expr, cast_expr,
flatten_pushdown_replacer, fun_expr_var_arg, is_not_null_expr, is_null_expr, rewrite,
rewriter::CubeRewrite, rules::flatten::FlattenRules, udf_expr_var_arg,
rewriter::CubeRewrite, rules::flatten::FlattenRules, udaf_expr_var_arg, udf_expr_var_arg,
};

impl FlattenRules {
Expand Down Expand Up @@ -30,6 +30,11 @@ impl FlattenRules {
|expr| udf_expr_var_arg("?fun", expr),
rules,
);
self.single_arg_pass_through_rules(
"udaf-function",
|expr| udaf_expr_var_arg("?fun", expr, "?distinct"),
rules,
);
self.single_arg_pass_through_rules("is-null", |expr| is_null_expr(expr), rules);
self.single_arg_pass_through_rules("is-not-null", |expr| is_not_null_expr(expr), rules);
rules.push(rewrite(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use crate::{
compile::rewrite::{
aggregate, cube_scan, flatten_pushdown_replacer, projection,
aggregate, cube_scan, filter, flatten_pushdown_replacer, projection,

Check warning on line 3 in rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs

View workflow job for this annotation

GitHub Actions / Check fmt/clippy

Diff in /__w/cube/cube/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs

Check warning on line 3 in rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs

View workflow job for this annotation

GitHub Actions / lint

Diff in /home/runner/work/cube/cube/rust/cubesql/cubesql/src/compile/rewrite/rules/flatten/top_level.rs
rewriter::{CubeEGraph, CubeRewrite},
rules::{flatten::FlattenRules, replacer_flat_push_down_node, replacer_push_down_node},
transforming_chain_rewrite_with_root, FlattenPushdownReplacerInnerAlias, ListType,
LogicalPlanLanguage, ProjectionAlias,
transforming_chain_rewrite_with_root,
FlattenPushdownReplacerInnerAlias, ListType, LogicalPlanLanguage, ProjectionAlias,
},
var, var_iter,
};
Expand Down Expand Up @@ -159,6 +159,90 @@
),
)]);

rules.extend(vec![transforming_chain_rewrite_with_root(
"flatten-filter-pushdown",
aggregate(
"?filter_node",
"?outer_group_expr",
"?outer_aggregate_expr",
"AggregateSplit:false",
),
vec![
("?filter_node", filter("?filter_expr", "?inner_projection")),
(
"?inner_projection",
projection(
"?inner_projection_expr",
"?cube_scan",
"?inner_projection_alias",
"ProjectionSplit:false",
),
),
(
"?cube_scan",
cube_scan(
"?alias_to_cube",
"?members",
"?filters",
"?orders",
"?limit",
"?offset",
"CubeScanSplit:false",
"?can_pushdown_join",
"CubeScanWrapped:false",
"?ungrouped",
"?join_hints",
),
),
],
aggregate(
filter(
flatten_pushdown_replacer(
"?filter_expr",
"?inner_projection_expr",
"?inner_alias",
"FlattenPushdownReplacerTopLevel:false",
),
cube_scan(
"?alias_to_cube",
"?members",
"?filters",
"?orders",
"?limit",
"?offset",
"CubeScanSplit:false",
"?can_pushdown_join",
"CubeScanWrapped:false",
"?ungrouped",
"?join_hints",
),
),
flatten_pushdown_replacer(
"?outer_group_expr",
"?inner_projection_expr",
"?inner_alias",
"FlattenPushdownReplacerTopLevel:false",
),
flatten_pushdown_replacer(
"?outer_aggregate_expr",
"?inner_projection_expr",
"?inner_alias",
"FlattenPushdownReplacerTopLevel:false",
),
"AggregateSplit:false",
),
self.flatten_aggregate(
"?inner_projection",
"?cube_scan",
"?members",
"?inner_projection_expr",
"?outer_group_expr",
"?outer_aggregate_expr",
"?inner_projection_alias",
"?inner_alias",
),
)]);

if self.config_obj.push_down_pull_up_split() {
Self::flat_list_pushdown_rules(
"flatten-projection-expr",
Expand All @@ -175,11 +259,20 @@
ListType::AggregateGroupExpr,
rules,
);
Self::flat_list_pushdown_rules(
"flatten-scalar-fun-args",
ListType::ScalarFunctionExprArgs,
rules,
);
} else {
Self::list_pushdown_rules("flatten-projection-expr", "ProjectionExpr", rules);
Self::list_pushdown_rules("flatten-aggregate-expr", "AggregateAggrExpr", rules);
Self::list_pushdown_rules("flatten-group-expr", "AggregateGroupExpr", rules);
Self::list_pushdown_rules("flatten-scalar-fun-args", "ScalarFunctionExprArgs", rules);
}
Self::list_pushdown_rules("flatten-udf-fun-args", "ScalarUDFExprArgs", rules);
Self::list_pushdown_rules("flatten-agg-fun-args", "AggregateFunctionExprArgs", rules);
Self::list_pushdown_rules("flatten-udaf-fun-args", "AggregateUDFExprArgs", rules);
Comment on lines +273 to +275
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These three new list_pushdown_rules calls (ScalarUDFExprArgs, AggregateFunctionExprArgs, AggregateUDFExprArgs) are placed outside the if self.config_obj.push_down_pull_up_split() conditional, so they always use the string-based list_pushdown_rules variant. I see that ListType doesn't have corresponding variants for these three, so this is correct — just noting the asymmetry with ScalarFunctionExprArgs which does get the flat_list_pushdown_rules treatment.

If/when these list types get added to the ListType enum, they should also be moved inside the conditional to use flat_list_pushdown_rules when push_down_pull_up_split is enabled.

}

pub fn flatten_projection(
Expand Down
Loading