diff --git a/src/midrc_react/core/jsdcontroller.py b/src/midrc_react/core/jsdcontroller.py index 6a8bf96..9e65307 100644 --- a/src/midrc_react/core/jsdcontroller.py +++ b/src/midrc_react/core/jsdcontroller.py @@ -324,11 +324,8 @@ def build_date_list(df1, df2): str_col = category[:-6] num_col = data_source_1.numeric_cols[str_col]['raw column'] - # remove the 'Not Reported' data before calculation - combined_df = combined_df[pd.to_numeric(combined_df[num_col], errors='coerce').notnull()] - - input_data = [float(calc_ks2_samp_by_feature(combined_df[combined_df['date'] <= date], - num_col)['Dataset 0 vs Dataset 1']) for date in date_list] + input_data = [calc_ks2_samp_by_feature(combined_df[combined_df['date'] <= date], + num_col)['Dataset 0 vs Dataset 1'] for date in date_list] if input_data is not None: model_input_data.append([pandas_date_to_qdate(calc_date) for calc_date in date_list]) diff --git a/src/midrc_react/core/jsdmodel.py b/src/midrc_react/core/jsdmodel.py index 0d1763e..6a046a5 100644 --- a/src/midrc_react/core/jsdmodel.py +++ b/src/midrc_react/core/jsdmodel.py @@ -19,12 +19,19 @@ from typing import Any, Optional +from numpy import float64 as np_float64 from PySide6.QtCore import QAbstractTableModel, QModelIndex, Qt, Signal from PySide6.QtGui import QColor from midrc_react.core.excel_layout import DataSource +def convert_to_builtin(val): + if isinstance(val, np_float64): + return float(val) + return val + + class JSDTableModel(QAbstractTableModel): r""" A class representing a table model for JSD data. @@ -198,7 +205,7 @@ def data(self, index: QModelIndex, role: int = Qt.DisplayRole) -> Optional[Any]: """ if role in (Qt.DisplayRole, Qt.EditRole): try: - return self._input_data[index.column()][index.row()] + return convert_to_builtin(self._input_data[index.column()][index.row()]) except IndexError: return None elif role == Qt.BackgroundRole: diff --git a/src/midrc_react/core/numeric_distances.py b/src/midrc_react/core/numeric_distances.py index a345bf4..2edaf04 100644 --- a/src/midrc_react/core/numeric_distances.py +++ b/src/midrc_react/core/numeric_distances.py @@ -29,6 +29,18 @@ from midrc_react.core.aggregate_jsd_calc import calc_jsd_from_counts_dict from midrc_react.core.cucconi import cucconi_test +def remove_nan_from_df(df, column_name): + """ + Remove NaN values from a specific column in a DataFrame. + + Args: + df (pd.DataFrame): The DataFrame to process. + column_name (str): The name of the column to check for NaN values. + + Returns: + pd.DataFrame: A DataFrame with NaN values removed from the specified column. + """ + return df[pd.to_numeric(df[column_name], errors='coerce').notnull()] def calc_numerical_metric_by_feature(df, feature: str, dataset_column: str, metric_function): """ @@ -91,7 +103,7 @@ def calc_cucconi_by_feature(df, feature: str, dataset_column: str = '_dataset_', """ if scaling is not None: logging.warning('Cucconi test is not affected by scaling. Ignoring scaling method.') - calc_df = df # if scaling is None else scale_feature(df, feature, method=scaling) + calc_df = remove_nan_from_df(df, feature) # if scaling is None else scale_feature(df, feature, method=scaling) def cucconi_2samp_test(values1, values2): return cucconi_test(values1, values2, method='permutation') @@ -119,7 +131,7 @@ def calc_ks2_samp_by_feature(df, feature: str, dataset_column: str = '_dataset_' """ if scaling is not None: logging.warning('Kolmogorov-Smirnov test is not affected by scaling. Ignoring scaling method.') - calc_df = df # if scaling is None else scale_feature(df, feature, method=scaling) + calc_df = remove_nan_from_df(df, feature) # if scaling is None else scale_feature(df, feature, method=scaling) return calc_numerical_metric_by_feature(calc_df, feature, dataset_column, ks_2samp) @@ -147,6 +159,7 @@ def calc_wasserstein_by_feature(df, feature: str, dataset_column: str = '_datase # We need to define a function that returns a SimpleNamespace with a 'statistic' attribute def w_d_calc(values1, values2): return SimpleNamespace(statistic=wasserstein_distance(values1, values2)) + df = remove_nan_from_df(df, feature) calc_df = df if scaling is None else scale_feature(df, feature, method=scaling) return calc_numerical_metric_by_feature(calc_df, feature, dataset_column, w_d_calc)