From 5f695297e28db58c84722df6d513a4e5b875172b Mon Sep 17 00:00:00 2001 From: Thomas Langton <155970791+tlangton3@users.noreply.github.com> Date: Mon, 11 May 2026 15:37:02 +0100 Subject: [PATCH 1/3] fix(tesseract): multi_stage reduce_by/group_by no-ops on time dimensions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `multi_stage: true` measures (rank, window aggregates) configured with `reduce_by: []` silently produced wrong results when the consumer query specified a granularity (e.g. `granularity: day`). The projected dim resolved as a TimeDimensionSymbol whose `full_name` carried a granularity suffix, while the reduce_by entry resolved to a base DimensionSymbol with bare name — the string-equality compare in `has_member_in_reference_chain` failed, the time dim stayed in PARTITION BY, and every row collapsed to a one-row partition with rank() = 1. Fix: unwrap any TimeDimensionSymbol to its base DimensionSymbol on both sides of the comparison in `member_partition_by_logical`. Symmetric unwrapping is required because `group_by` entries can themselves be TimeDimensionSymbols (e.g. `group_by: [.month]`); unwrapping only the projected dim would break the symmetric-granularity match and silently drop the time dim from PARTITION BY in cases like `test_group_by_with_time`. Adds a regression test (`test_reduce_by_time_dim`) using the existing multi-stage fixture: a sum measure with `reduce_by: [orders.created_at]` queried with `granularity: month`. Without the fix the snapshot shows per-(status, month) sums; with the fix every row for a given status shows the same per-status grand total across all months. Fixes #10854. --- .../multi_stage/member_query_planner.rs | 36 +++++++++++++++- .../common/integration_multi_stage.yaml | 7 ++++ .../integration/multi_stage/reduce_by.rs | 41 +++++++++++++++++++ ..._stage__reduce_by__reduce_by_time_dim.snap | 15 +++++++ 4 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/snapshots/cubesqlplanner__tests__integration__multi_stage__reduce_by__reduce_by_time_dim.snap diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs index 1cb2d37ffb4d1..9abaec750b364 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs @@ -470,11 +470,38 @@ impl MultiStageMemberQueryPlanner { reduce_by: &Vec>, group_by: &Option>>, ) -> Vec> { + // Unwrap any TimeDimensionSymbol to its base DimensionSymbol on BOTH + // sides of the comparison before testing `has_member_in_reference_chain`. + // `TimeDimensionSymbol::full_name` carries a granularity suffix that + // defaults to "_day" when no granularity was set (see + // TimeDimensionSymbol::new), so a `reduce_by: []` entry that + // resolves to a base DimensionSymbol with bare full_name never matches + // a projected query dim wrapped as TimeDimensionSymbol. Without + // unwrapping, the time dim silently remained in PARTITION BY, + // collapsing the window to one-row partitions and producing + // rank() = 1 for every row. See cube-js/cube#10854. + // + // Unwrapping must apply to both sides: `group_by` entries can + // themselves be TimeDimensionSymbols (e.g. `group_by: + // [orders.created_at.month]`), so unwrapping only the projected dim + // would break the symmetric match for explicit-granularity group_by. + fn base_of(d: &Rc) -> &Rc { + match d.as_ref() { + MemberSymbol::TimeDimension(td) => td.base_symbol(), + _ => d, + } + } + let dimensions = self.all_dimensions(); let dimensions = if !reduce_by.is_empty() { dimensions .into_iter() - .filter(|d| !reduce_by.iter().any(|m| d.has_member_in_reference_chain(m))) + .filter(|d| { + let d_base = base_of(d); + !reduce_by + .iter() + .any(|m| d_base.has_member_in_reference_chain(base_of(m))) + }) .collect_vec() } else { dimensions @@ -482,7 +509,12 @@ impl MultiStageMemberQueryPlanner { let dimensions = if let Some(group_by) = group_by { dimensions .into_iter() - .filter(|d| group_by.iter().any(|m| d.has_member_in_reference_chain(m))) + .filter(|d| { + let d_base = base_of(d); + group_by + .iter() + .any(|m| d_base.has_member_in_reference_chain(base_of(m))) + }) .collect_vec() } else { dimensions diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml index 3d915bafeb624..a5e748f7106cd 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/test_fixtures/schemas/yaml_files/common/integration_multi_stage.yaml @@ -134,6 +134,13 @@ cubes: - orders.status - orders.category + - name: amount_reduce_created_at + type: sum + sql: "{CUBE.total_amount}" + multi_stage: true + reduce_by: + - orders.created_at + - name: amount_group_by_status type: sum sql: "{CUBE.total_amount}" diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/reduce_by.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/reduce_by.rs index 3cae1efd87fdf..cf8eb67633842 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/reduce_by.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/reduce_by.rs @@ -120,3 +120,44 @@ async fn test_reduce_by_with_time() { insta::assert_snapshot!(result); } } + +#[tokio::test(flavor = "multi_thread")] +async fn test_reduce_by_time_dim() { + let ctx = create_context(); + + // Regression test for cube-js/cube#10854: `reduce_by: []` was + // silently ignored because `TimeDimensionSymbol::full_name` carries a + // granularity suffix (defaulting to "_day" even with no granularity) + // while the reduce_by entry resolves to a base DimensionSymbol without + // the suffix — string equality at the comparison site failed and the + // time dim stayed in PARTITION BY, collapsing each window to a one-row + // partition. + // + // Expected: `amount_reduce_created_at` (sum reducing by created_at) + // returns the per-status grand total across all months, not split per + // (status, month). Status totals from seed: completed=1400, pending=650, + // cancelled=200. With reduce_by working, every row for a given status + // shows the same total regardless of month. + let query = indoc! {r#" + measures: + - orders.amount_reduce_created_at + dimensions: + - orders.status + time_dimensions: + - dimension: orders.created_at + granularity: month + dateRange: + - "2024-01-01" + - "2024-03-31" + order: + - id: orders.status + - id: orders.created_at + "#}; + + ctx.build_sql(query).unwrap(); + + if let Some(result) = ctx.try_execute_pg(query, SEED).await { + insta::assert_snapshot!(result); + } +} + diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/snapshots/cubesqlplanner__tests__integration__multi_stage__reduce_by__reduce_by_time_dim.snap b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/snapshots/cubesqlplanner__tests__integration__multi_stage__reduce_by__reduce_by_time_dim.snap new file mode 100644 index 0000000000000..d597f875e36d8 --- /dev/null +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/snapshots/cubesqlplanner__tests__integration__multi_stage__reduce_by__reduce_by_time_dim.snap @@ -0,0 +1,15 @@ +--- +source: cubesqlplanner/src/tests/integration/multi_stage/reduce_by.rs +expression: result +--- +orders__status | orders__created_at_month | orders__amount_reduce_created_at +---------------+--------------------------+--------------------------------- +cancelled | 2024-01-01 00:00:00 | 200.00 +cancelled | 2024-02-01 00:00:00 | 200.00 +cancelled | 2024-03-01 00:00:00 | 200.00 +completed | 2024-01-01 00:00:00 | 1400.00 +completed | 2024-02-01 00:00:00 | 1400.00 +completed | 2024-03-01 00:00:00 | 1400.00 +pending | 2024-01-01 00:00:00 | 650.00 +pending | 2024-02-01 00:00:00 | 650.00 +pending | 2024-03-01 00:00:00 | 650.00 From 1ae2a6f5ab68d5be730dba4d4a01bcb009f20d5a Mon Sep 17 00:00:00 2001 From: Thomas Langton <155970791+tlangton3@users.noreply.github.com> Date: Mon, 11 May 2026 15:41:04 +0100 Subject: [PATCH 2/3] style(tesseract): cargo fmt --- .../src/tests/integration/multi_stage/reduce_by.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/reduce_by.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/reduce_by.rs index cf8eb67633842..dcffff25f3955 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/reduce_by.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/reduce_by.rs @@ -160,4 +160,3 @@ async fn test_reduce_by_time_dim() { insta::assert_snapshot!(result); } } - From fe7d04eafc3364282f8f8ab1adcd60b7b4fb3eec Mon Sep 17 00:00:00 2001 From: Thomas Langton <155970791+tlangton3@users.noreply.github.com> Date: Mon, 11 May 2026 16:24:46 +0100 Subject: [PATCH 3/3] fix(tesseract): match partition members by granularity intent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace member_partition_by_logical's separate reduce_by/group_by matching with a single matches_partition_member helper that branches on whether the measure-level entry (m) carries an explicit granularity: - m has explicit granularity (e.g. .quarter): match strictly on full_name. group_by: [.quarter] keeps only the _quarter projection in PARTITION BY; reduce_by: [.month] excludes only the _month projection. - m is bare (plain DimensionSymbol, or TimeDimensionSymbol with granularity = None): unwrap both sides to base. reduce_by: [] excludes the time dim regardless of the projection's granularity suffix — the original cube-js/cube#10854 case. This replaces the symmetric base_of() unwrap on the group_by site, which over-matched when a query projected multiple granularities of the same time dim and degenerated window aggregates to per-row sums. CI surfaced the regression against BigECommerce's "multi-stage group by time dimension" test (both postgres-driver and mssql-driver runs). Adds test_group_by_time_dim_multiple_granularities mirroring the BigECommerce shape against cubesqlplanner's testcontainers Postgres harness — without the fix, sums collapse per-(month, week); with the fix, they correctly aggregate per-month repeated across weeks. --- .../multi_stage/member_query_planner.rs | 64 ++++++++++--------- .../tests/integration/multi_stage/group_by.rs | 38 +++++++++++ ...up_by_time_dim_multiple_granularities.snap | 19 ++++++ 3 files changed, 91 insertions(+), 30 deletions(-) create mode 100644 rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/snapshots/cubesqlplanner__tests__integration__multi_stage__group_by__group_by_time_dim_multiple_granularities.snap diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs index 9abaec750b364..72da2e7645e20 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs @@ -470,25 +470,39 @@ impl MultiStageMemberQueryPlanner { reduce_by: &Vec>, group_by: &Option>>, ) -> Vec> { - // Unwrap any TimeDimensionSymbol to its base DimensionSymbol on BOTH - // sides of the comparison before testing `has_member_in_reference_chain`. - // `TimeDimensionSymbol::full_name` carries a granularity suffix that - // defaults to "_day" when no granularity was set (see - // TimeDimensionSymbol::new), so a `reduce_by: []` entry that - // resolves to a base DimensionSymbol with bare full_name never matches - // a projected query dim wrapped as TimeDimensionSymbol. Without - // unwrapping, the time dim silently remained in PARTITION BY, - // collapsing the window to one-row partitions and producing - // rank() = 1 for every row. See cube-js/cube#10854. + // Match a projected query dimension `d` against a measure-level + // `reduce_by` / `group_by` entry `m`, honouring the user's + // granularity intent on `m`: // - // Unwrapping must apply to both sides: `group_by` entries can - // themselves be TimeDimensionSymbols (e.g. `group_by: - // [orders.created_at.month]`), so unwrapping only the projected dim - // would break the symmetric match for explicit-granularity group_by. - fn base_of(d: &Rc) -> &Rc { - match d.as_ref() { - MemberSymbol::TimeDimension(td) => td.base_symbol(), - _ => d, + // - If `m` carries an explicit granularity (e.g. + // `.quarter` resolves to a `TimeDimensionSymbol` with + // `has_granularity() == true`), match strictly on `full_name`. A + // `group_by: [.quarter]` should keep only the + // `_quarter` projection in PARTITION BY, not sibling granularities + // of the same base time dim. + // + // - Otherwise (bare reference: plain `DimensionSymbol`, or + // `TimeDimensionSymbol` with no granularity), match across + // granularities by unwrapping both sides to their base symbol. A + // `reduce_by: []` should exclude the time dim from + // PARTITION BY regardless of the projection's granularity suffix + // — see cube-js/cube#10854: without unwrap, the `_day` / + // `_month` / etc. suffix on `TimeDimensionSymbol::full_name` + // caused string-equality mismatch and the time dim silently stayed + // in PARTITION BY, collapsing the window to one-row partitions. + fn matches_partition_member(d: &Rc, m: &Rc) -> bool { + let m_has_explicit_granularity = matches!( + m.as_ref(), + MemberSymbol::TimeDimension(td) if td.has_granularity() + ); + if m_has_explicit_granularity { + d.has_member_in_reference_chain(m) + } else { + let base = |x: &Rc| match x.as_ref() { + MemberSymbol::TimeDimension(td) => td.base_symbol().clone(), + _ => x.clone(), + }; + base(d).has_member_in_reference_chain(&base(m)) } } @@ -496,12 +510,7 @@ impl MultiStageMemberQueryPlanner { let dimensions = if !reduce_by.is_empty() { dimensions .into_iter() - .filter(|d| { - let d_base = base_of(d); - !reduce_by - .iter() - .any(|m| d_base.has_member_in_reference_chain(base_of(m))) - }) + .filter(|d| !reduce_by.iter().any(|m| matches_partition_member(d, m))) .collect_vec() } else { dimensions @@ -509,12 +518,7 @@ impl MultiStageMemberQueryPlanner { let dimensions = if let Some(group_by) = group_by { dimensions .into_iter() - .filter(|d| { - let d_base = base_of(d); - group_by - .iter() - .any(|m| d_base.has_member_in_reference_chain(base_of(m))) - }) + .filter(|d| group_by.iter().any(|m| matches_partition_member(d, m))) .collect_vec() } else { dimensions diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs index fdf8880297dd6..e98aba0e3cabc 100644 --- a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs @@ -98,3 +98,41 @@ async fn test_group_by_equals_query_dims() { insta::assert_snapshot!(result); } } + +#[tokio::test(flavor = "multi_thread")] +async fn test_group_by_time_dim_multiple_granularities() { + let ctx = create_context(); + + // Regression test mirroring the BigECommerce postgres-driver case (see + // packages/cubejs-testing-drivers/src/tests/testQueries.ts + // "multi-stage group by time dimension"). The query projects two + // granularities of the same time dim (month + week), while the measure + // `amount_group_by_status_time` has `group_by: [status, created_at.month]` + // — only the month granularity should stay in PARTITION BY. + // + // Expected: every (month, week) row reports the per-month grand total + // (Jan=500, Feb=750, Mar=1000), NOT the per-week sum. If `group_by` + // matched via TimeDimensionSymbol → base unwrapping, the week granularity + // would also stay in PARTITION BY and the result would collapse to + // per-(month, week) sums. + let query = indoc! {r#" + measures: + - orders.amount_group_by_status_time + time_dimensions: + - dimension: orders.created_at + granularity: month + dateRange: + - "2024-01-01" + - "2024-03-31" + - dimension: orders.created_at + granularity: week + order: + - id: orders.created_at + "#}; + + ctx.build_sql(query).unwrap(); + + if let Some(result) = ctx.try_execute_pg(query, SEED).await { + insta::assert_snapshot!(result); + } +} diff --git a/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/snapshots/cubesqlplanner__tests__integration__multi_stage__group_by__group_by_time_dim_multiple_granularities.snap b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/snapshots/cubesqlplanner__tests__integration__multi_stage__group_by__group_by_time_dim_multiple_granularities.snap new file mode 100644 index 0000000000000..720ed36c76c68 --- /dev/null +++ b/rust/cube/cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/snapshots/cubesqlplanner__tests__integration__multi_stage__group_by__group_by_time_dim_multiple_granularities.snap @@ -0,0 +1,19 @@ +--- +source: cubesqlplanner/cubesqlplanner/src/tests/integration/multi_stage/group_by.rs +assertion_line: 136 +expression: result +--- +orders__created_at_month | orders__created_at_week | orders__amount_group_by_status_time +-------------------------+-------------------------+------------------------------------ +2024-01-01 00:00:00 | 2024-01-01 00:00:00 | 500.00 +2024-01-01 00:00:00 | 2024-01-08 00:00:00 | 500.00 +2024-01-01 00:00:00 | 2024-01-15 00:00:00 | 500.00 +2024-01-01 00:00:00 | 2024-01-22 00:00:00 | 500.00 +2024-02-01 00:00:00 | 2024-01-29 00:00:00 | 750.00 +2024-02-01 00:00:00 | 2024-02-05 00:00:00 | 750.00 +2024-02-01 00:00:00 | 2024-02-12 00:00:00 | 750.00 +2024-02-01 00:00:00 | 2024-02-19 00:00:00 | 750.00 +2024-03-01 00:00:00 | 2024-02-26 00:00:00 | 1000.00 +2024-03-01 00:00:00 | 2024-03-04 00:00:00 | 1000.00 +2024-03-01 00:00:00 | 2024-03-11 00:00:00 | 1000.00 +2024-03-01 00:00:00 | 2024-03-18 00:00:00 | 1000.00