From 444d277932dbc72b6a75878bfca25203560d1e67 Mon Sep 17 00:00:00 2001 From: Filipp Shpomer Date: Fri, 22 May 2026 16:37:53 -0700 Subject: [PATCH 1/2] fix: replace Dask GroupBy path in Distinct (.apply(set) fails in pandas 3.0) --- cdisc_rules_engine/operations/distinct.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/cdisc_rules_engine/operations/distinct.py b/cdisc_rules_engine/operations/distinct.py index 7d3ab9897..da5e31378 100644 --- a/cdisc_rules_engine/operations/distinct.py +++ b/cdisc_rules_engine/operations/distinct.py @@ -1,4 +1,5 @@ import pandas as pd +from cdisc_rules_engine.models.dataset.pandas_dataset import PandasDataset from cdisc_rules_engine.operations.base_operation import BaseOperation @@ -19,6 +20,8 @@ def _execute_operation(self): result = self.params.dataframe if self.params.filter: result = self._filter_data(result) + if hasattr(result.data, 'compute'): + result = PandasDataset(result.data.compute()) value_is_reference = getattr(self.params, "value_is_reference", False) if not self.params.grouping: if value_is_reference: @@ -57,17 +60,10 @@ def get_existing_column_names(group): ) result = grouped.apply(get_existing_column_names).reset_index() - elif isinstance(result.data, pd.DataFrame): + else: result = grouped.data[self.params.target].agg( self._unique_values_for_column ) - else: - result = ( - grouped.data[self.params.target] - .unique() - .rename({self.params.target: self.params.operation_id}) - ) - result = result.apply(list).to_frame().reset_index() return result def _get_referenced_datasets(self): From 2642ace4b80b578190077d0c671c0aac0b8d0337 Mon Sep 17 00:00:00 2001 From: Filipp Shpomer Date: Wed, 24 Jun 2026 10:43:10 -0700 Subject: [PATCH 2/2] style: apply black formatting to distinct.py --- cdisc_rules_engine/operations/distinct.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdisc_rules_engine/operations/distinct.py b/cdisc_rules_engine/operations/distinct.py index da5e31378..deab4da2f 100644 --- a/cdisc_rules_engine/operations/distinct.py +++ b/cdisc_rules_engine/operations/distinct.py @@ -20,7 +20,7 @@ def _execute_operation(self): result = self.params.dataframe if self.params.filter: result = self._filter_data(result) - if hasattr(result.data, 'compute'): + if hasattr(result.data, "compute"): result = PandasDataset(result.data.compute()) value_is_reference = getattr(self.params, "value_is_reference", False) if not self.params.grouping: