Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions src/midrc_react/core/jsdcontroller.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,11 +324,8 @@ def build_date_list(df1, df2):
str_col = category[:-6]
num_col = data_source_1.numeric_cols[str_col]['raw column']

# remove the 'Not Reported' data before calculation
combined_df = combined_df[pd.to_numeric(combined_df[num_col], errors='coerce').notnull()]

input_data = [float(calc_ks2_samp_by_feature(combined_df[combined_df['date'] <= date],
num_col)['Dataset 0 vs Dataset 1']) for date in date_list]
input_data = [calc_ks2_samp_by_feature(combined_df[combined_df['date'] <= date],
num_col)['Dataset 0 vs Dataset 1'] for date in date_list]

if input_data is not None:
model_input_data.append([pandas_date_to_qdate(calc_date) for calc_date in date_list])
Expand Down
9 changes: 8 additions & 1 deletion src/midrc_react/core/jsdmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,19 @@

from typing import Any, Optional

from numpy import float64 as np_float64
from PySide6.QtCore import QAbstractTableModel, QModelIndex, Qt, Signal
from PySide6.QtGui import QColor

from midrc_react.core.excel_layout import DataSource


def convert_to_builtin(val):
if isinstance(val, np_float64):
return float(val)
return val


class JSDTableModel(QAbstractTableModel):
r"""
A class representing a table model for JSD data.
Expand Down Expand Up @@ -198,7 +205,7 @@ def data(self, index: QModelIndex, role: int = Qt.DisplayRole) -> Optional[Any]:
"""
if role in (Qt.DisplayRole, Qt.EditRole):
try:
return self._input_data[index.column()][index.row()]
return convert_to_builtin(self._input_data[index.column()][index.row()])
except IndexError:
return None
elif role == Qt.BackgroundRole:
Expand Down
17 changes: 15 additions & 2 deletions src/midrc_react/core/numeric_distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,18 @@
from midrc_react.core.aggregate_jsd_calc import calc_jsd_from_counts_dict
from midrc_react.core.cucconi import cucconi_test

def remove_nan_from_df(df, column_name):
"""
Remove NaN values from a specific column in a DataFrame.

Args:
df (pd.DataFrame): The DataFrame to process.
column_name (str): The name of the column to check for NaN values.

Returns:
pd.DataFrame: A DataFrame with NaN values removed from the specified column.
"""
return df[pd.to_numeric(df[column_name], errors='coerce').notnull()]

def calc_numerical_metric_by_feature(df, feature: str, dataset_column: str, metric_function):
"""
Expand Down Expand Up @@ -91,7 +103,7 @@ def calc_cucconi_by_feature(df, feature: str, dataset_column: str = '_dataset_',
"""
if scaling is not None:
logging.warning('Cucconi test is not affected by scaling. Ignoring scaling method.')
calc_df = df # if scaling is None else scale_feature(df, feature, method=scaling)
calc_df = remove_nan_from_df(df, feature) # if scaling is None else scale_feature(df, feature, method=scaling)

def cucconi_2samp_test(values1, values2):
return cucconi_test(values1, values2, method='permutation')
Expand Down Expand Up @@ -119,7 +131,7 @@ def calc_ks2_samp_by_feature(df, feature: str, dataset_column: str = '_dataset_'
"""
if scaling is not None:
logging.warning('Kolmogorov-Smirnov test is not affected by scaling. Ignoring scaling method.')
calc_df = df # if scaling is None else scale_feature(df, feature, method=scaling)
calc_df = remove_nan_from_df(df, feature) # if scaling is None else scale_feature(df, feature, method=scaling)
return calc_numerical_metric_by_feature(calc_df, feature, dataset_column, ks_2samp)


Expand Down Expand Up @@ -147,6 +159,7 @@ def calc_wasserstein_by_feature(df, feature: str, dataset_column: str = '_datase
# We need to define a function that returns a SimpleNamespace with a 'statistic' attribute
def w_d_calc(values1, values2):
return SimpleNamespace(statistic=wasserstein_distance(values1, values2))
df = remove_nan_from_df(df, feature)
calc_df = df if scaling is None else scale_feature(df, feature, method=scaling)
return calc_numerical_metric_by_feature(calc_df, feature, dataset_column, w_d_calc)

Expand Down