Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -470,19 +470,55 @@ impl MultiStageMemberQueryPlanner {
reduce_by: &Vec<Rc<MemberSymbol>>,
group_by: &Option<Vec<Rc<MemberSymbol>>>,
) -> Vec<Rc<MemberSymbol>> {
// Match a projected query dimension `d` against a measure-level
// `reduce_by` / `group_by` entry `m`, honouring the user's
// granularity intent on `m`:
//
// - If `m` carries an explicit granularity (e.g.
// `<time_dim>.quarter` resolves to a `TimeDimensionSymbol` with
// `has_granularity() == true`), match strictly on `full_name`. A
// `group_by: [<time_dim>.quarter]` should keep only the
// `_quarter` projection in PARTITION BY, not sibling granularities
// of the same base time dim.
//
// - Otherwise (bare reference: plain `DimensionSymbol`, or
// `TimeDimensionSymbol` with no granularity), match across
// granularities by unwrapping both sides to their base symbol. A
// `reduce_by: [<time_dim>]` should exclude the time dim from
// PARTITION BY regardless of the projection's granularity suffix
// — see cube-js/cube#10854: without unwrap, the `_day` /
// `_month` / etc. suffix on `TimeDimensionSymbol::full_name`
// caused string-equality mismatch and the time dim silently stayed
// in PARTITION BY, collapsing the window to one-row partitions.
fn matches_partition_member(d: &Rc<MemberSymbol>, m: &Rc<MemberSymbol>) -> bool {
let m_has_explicit_granularity = matches!(
m.as_ref(),
MemberSymbol::TimeDimension(td) if td.has_granularity()
);
if m_has_explicit_granularity {
d.has_member_in_reference_chain(m)
} else {
let base = |x: &Rc<MemberSymbol>| match x.as_ref() {
MemberSymbol::TimeDimension(td) => td.base_symbol().clone(),
_ => x.clone(),
};
base(d).has_member_in_reference_chain(&base(m))
}
}

let dimensions = self.all_dimensions();
let dimensions = if !reduce_by.is_empty() {
dimensions
.into_iter()
.filter(|d| !reduce_by.iter().any(|m| d.has_member_in_reference_chain(m)))
.filter(|d| !reduce_by.iter().any(|m| matches_partition_member(d, m)))
.collect_vec()
} else {
dimensions
};
let dimensions = if let Some(group_by) = group_by {
dimensions
.into_iter()
.filter(|d| group_by.iter().any(|m| d.has_member_in_reference_chain(m)))
.filter(|d| group_by.iter().any(|m| matches_partition_member(d, m)))
.collect_vec()
} else {
dimensions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,13 @@ cubes:
- orders.status
- orders.category

- name: amount_reduce_created_at
type: sum
sql: "{CUBE.total_amount}"
multi_stage: true
reduce_by:
- orders.created_at

- name: amount_group_by_status
type: sum
sql: "{CUBE.total_amount}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,41 @@ async fn test_group_by_equals_query_dims() {
insta::assert_snapshot!(result);
}
}

#[tokio::test(flavor = "multi_thread")]
async fn test_group_by_time_dim_multiple_granularities() {
let ctx = create_context();

// Regression test mirroring the BigECommerce postgres-driver case (see
// packages/cubejs-testing-drivers/src/tests/testQueries.ts
// "multi-stage group by time dimension"). The query projects two
// granularities of the same time dim (month + week), while the measure
// `amount_group_by_status_time` has `group_by: [status, created_at.month]`
// — only the month granularity should stay in PARTITION BY.
//
// Expected: every (month, week) row reports the per-month grand total
// (Jan=500, Feb=750, Mar=1000), NOT the per-week sum. If `group_by`
// matched via TimeDimensionSymbol → base unwrapping, the week granularity
// would also stay in PARTITION BY and the result would collapse to
// per-(month, week) sums.
let query = indoc! {r#"
measures:
- orders.amount_group_by_status_time
time_dimensions:
- dimension: orders.created_at
granularity: month
dateRange:
- "2024-01-01"
- "2024-03-31"
- dimension: orders.created_at
granularity: week
order:
- id: orders.created_at
"#};

ctx.build_sql(query).unwrap();

if let Some(result) = ctx.try_execute_pg(query, SEED).await {
insta::assert_snapshot!(result);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,43 @@ async fn test_reduce_by_with_time() {
insta::assert_snapshot!(result);
}
}

#[tokio::test(flavor = "multi_thread")]
async fn test_reduce_by_time_dim() {
let ctx = create_context();

// Regression test for cube-js/cube#10854: `reduce_by: [<time_dim>]` was
// silently ignored because `TimeDimensionSymbol::full_name` carries a
// granularity suffix (defaulting to "_day" even with no granularity)
// while the reduce_by entry resolves to a base DimensionSymbol without
// the suffix — string equality at the comparison site failed and the
// time dim stayed in PARTITION BY, collapsing each window to a one-row
// partition.
//
// Expected: `amount_reduce_created_at` (sum reducing by created_at)
// returns the per-status grand total across all months, not split per
// (status, month). Status totals from seed: completed=1400, pending=650,
// cancelled=200. With reduce_by working, every row for a given status
// shows the same total regardless of month.
let query = indoc! {r#"
measures:
- orders.amount_reduce_created_at
dimensions:
- orders.status
time_dimensions:
- dimension: orders.created_at
granularity: month
dateRange:
- "2024-01-01"
- "2024-03-31"
order:
- id: orders.status
- id: orders.created_at
"#};

ctx.build_sql(query).unwrap();

if let Some(result) = ctx.try_execute_pg(query, SEED).await {
insta::assert_snapshot!(result);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
source: cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs
assertion_line: 136
expression: result
---
orders__created_at_month | orders__created_at_week | orders__amount_group_by_status_time
-------------------------+-------------------------+------------------------------------
2024-01-01 00:00:00 | 2024-01-01 00:00:00 | 500.00
2024-01-01 00:00:00 | 2024-01-08 00:00:00 | 500.00
2024-01-01 00:00:00 | 2024-01-15 00:00:00 | 500.00
2024-01-01 00:00:00 | 2024-01-22 00:00:00 | 500.00
2024-02-01 00:00:00 | 2024-01-29 00:00:00 | 750.00
2024-02-01 00:00:00 | 2024-02-05 00:00:00 | 750.00
2024-02-01 00:00:00 | 2024-02-12 00:00:00 | 750.00
2024-02-01 00:00:00 | 2024-02-19 00:00:00 | 750.00
2024-03-01 00:00:00 | 2024-02-26 00:00:00 | 1000.00
2024-03-01 00:00:00 | 2024-03-04 00:00:00 | 1000.00
2024-03-01 00:00:00 | 2024-03-11 00:00:00 | 1000.00
2024-03-01 00:00:00 | 2024-03-18 00:00:00 | 1000.00
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
source: cubesqlplanner/src/tests/integration/multi_stage/reduce_by.rs
expression: result
---
orders__status | orders__created_at_month | orders__amount_reduce_created_at
---------------+--------------------------+---------------------------------
cancelled | 2024-01-01 00:00:00 | 200.00
cancelled | 2024-02-01 00:00:00 | 200.00
cancelled | 2024-03-01 00:00:00 | 200.00
completed | 2024-01-01 00:00:00 | 1400.00
completed | 2024-02-01 00:00:00 | 1400.00
completed | 2024-03-01 00:00:00 | 1400.00
pending | 2024-01-01 00:00:00 | 650.00
pending | 2024-02-01 00:00:00 | 650.00
pending | 2024-03-01 00:00:00 | 650.00
Loading