Skip to content
6 changes: 3 additions & 3 deletions docs/source/modules.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
midrc_react
===========
src
===

.. toctree::
:maxdepth: 8
:maxdepth: 7

midrc_react
64 changes: 50 additions & 14 deletions jsdconfig-zipcode.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,22 @@
data sources:
# The data sources will be loaded in the order they are populated here
- name: MIDRC
description: MIDRC Excel File
description: MIDRC TSV File
data type: file
filename: data/MIDRC Open A1 and R1 - cumulative by batch.xlsx
remove column name text: [(CUSUM)]
filename: data/midrc_data_download-2025-01-29.tsv
columns:
- Age at Index
- Ethnicity
- Race
- Sex
- COVID-19 Positive
- Race and Ethnicity
numeric_cols:
Age at Index:
raw column: age_at_index
bins: [ 0, 5, 12, 16, 18, 30, 40, 50, 65, 75, 85, 1000 ]
labels: [ '0-4', '5-11', '12-15', '16-17', '18-29', '30-39', '40-49', '50-64', '65-74', '75-84', '85+' ]
plugin: midrc_tsv_loader

- name: CDC
description: CDC Excel File
Expand All @@ -18,47 +30,71 @@ data sources:
date: '2020-01-01' # The census file does not have a date column, so we specify the date here

- name: MIDRC COVID+
description: MIDRC COVID+ Excel File
description: MIDRC COVID+ TSV File
data type: file
filename: data/MIDRC Open A1 and R1 COVIDpos only - cumulative by batch.xlsx
remove column name text: [(CUSUM)]
filename: data/midrc_data_download-2025-01-29_covid_pos.tsv
columns:
- Age at Index
- Ethnicity
- Race
- Sex
- COVID-19 Positive
- Race and Ethnicity
numeric_cols:
Age at Index:
raw column: age_at_index
bins: [ 0, 5, 12, 16, 18, 30, 40, 50, 65, 75, 85, 1000 ]
labels: [ '0-4', '5-11', '12-15', '16-17', '18-29', '30-39', '40-49', '50-64', '65-74', '75-84', '85+' ]
plugin: midrc_tsv_loader

- name: Zip Code 1
description: Zip Code 1 Excel File
description: Zip Code 1 TSV File
data type: file
filename: data/midrc_data_download-2025-01-29_0.tsv
columns:
- Age at Index
- Ethnicity
- Race
- Sex
- Covid19 Positive
- COVID-19 Positive
- Race and Ethnicity
numeric_cols:
Age at Index:
raw column: age_at_index
bins: [ 0, 17, 50, 65, 1000 ]
labels: ['0-17', "18-49", '50-64', '65+']
bins: [ 0, 5, 12, 16, 18, 30, 40, 50, 65, 75, 85, 1000 ]
labels: [ '0-4', '5-11', '12-15', '16-17', '18-29', '30-39', '40-49', '50-64', '65-74', '75-84', '85+' ]
plugin: midrc_tsv_loader

- name: Zip Code 2
description: Zip Code 2 Excel File
description: Zip Code 2 TSV File
data type: file
filename: data/midrc_data_download-2025-01-29_1.tsv
columns:
- Age at Index
- Ethnicity
- Race
- Sex
- Covid19 Positive
- COVID-19 Positive
- Race and Ethnicity
numeric_cols:
Age at Index:
raw column: age_at_index
bins: [ 0, 17, 50, 65, 1000 ]
labels: ['0-17', "18-49", '50-64', '65+']
bins: [ 0, 5, 12, 16, 18, 30, 40, 50, 65, 75, 85, 1000 ]
labels: [ '0-4', '5-11', '12-15', '16-17', '18-29', '30-39', '40-49', '50-64', '65-74', '75-84', '85+' ]
plugin: midrc_tsv_loader

- name: MIDRC COVID+
description: MIDRC COVID+ Excel File
data type: file
filename: data/MIDRC Open A1 and R1 COVIDpos only - cumulative by batch.xlsx
remove column name text: [(CUSUM)]

- name: MIDRC
description: MIDRC Excel File
data type: file
filename: data/MIDRC Open A1 and R1 - cumulative by batch.xlsx
remove column name text: [(CUSUM)]

# TODO: The following should be moved into QSettings for modifications within the GUI
# For custom age columns, please use .inf as the maximum age in the final age group
custom age ranges:
Expand Down
32 changes: 17 additions & 15 deletions src/midrc_react/core/aggregate_jsd_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,22 @@ def calc_jsd_from_counts_dict(counts_dict, dataset_names):

return output_dict

def calc_jsd_by_features_combined(combined_df: pd.DataFrame, cols_to_use: list[str], dataset_column) -> dict[str, float]:
# Pivot table to get counts for each combination
pivot_table = combined_df.pivot_table(index=cols_to_use, columns=dataset_column, aggfunc='size', fill_value=0)
pivot_table = pivot_table.reset_index()

# Convert dataset columns to string in case they are integers
pivot_table.columns = pivot_table.columns.astype(str)

labels = combined_df[dataset_column].unique().astype(str)

# Create a dictionary to hold counts for each dataset
counts_dict = {dataset: pivot_table[dataset].values if dataset in pivot_table else np.zeros(len(pivot_table)) for
dataset in labels}

return calc_jsd_from_counts_dict(counts_dict, labels)


def calc_jsd_by_features(df_list: list[pd.DataFrame], cols_to_use: list[str]) -> dict[str, float]:
"""
Expand All @@ -76,21 +92,7 @@ def calc_jsd_by_features(df_list: list[pd.DataFrame], cols_to_use: list[str]) ->
"""
dataset_column = '_dataset_' # Temporary column name to store dataset information
combined_df = combine_datasets_from_list(df_list, dataset_column)

# Pivot table to get counts for each combination
pivot_table = combined_df.pivot_table(index=cols_to_use, columns=dataset_column, aggfunc='size', fill_value=0)
pivot_table = pivot_table.reset_index()

# Convert dataset columns to string in case they are integers
pivot_table.columns = pivot_table.columns.astype(str)

labels = combined_df[dataset_column].unique()

# Create a dictionary to hold counts for each dataset
counts_dict = {dataset: pivot_table[dataset].values if dataset in pivot_table else np.zeros(len(pivot_table)) for
dataset in labels}

return calc_jsd_from_counts_dict(counts_dict, labels)
return calc_jsd_by_features_combined(combined_df, cols_to_use, dataset_column)


def calc_jsd_by_features_2df(df1: pd.DataFrame, df2: pd.DataFrame, cols_to_use: list[str]) -> float:
Expand Down
8 changes: 6 additions & 2 deletions src/midrc_react/core/data_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,12 @@ def _adjust_outliers(df: pd.DataFrame, cut_column_name: str, column_name: str, b
low_text = "Outlier_Low"
high_text = "Outlier_High"
print(f"WARNING: There are values outside the bins specified for the '{column_name}' column.")
df.loc[df[cut_column_name].isna() & (df[column_name] < bins[0]), cut_column_name] = low_text
df.loc[df[cut_column_name].isna() & (df[column_name] >= bins[-1]), cut_column_name] = high_text

# Only compare numeric values, ignore strings or other types
col_numeric = pd.to_numeric(df[column_name], errors='coerce')

df.loc[df[cut_column_name].isna() & (col_numeric < bins[0]), cut_column_name] = low_text
df.loc[df[cut_column_name].isna() & (col_numeric >= bins[-1]), cut_column_name] = high_text
df.loc[df[cut_column_name].isna(), cut_column_name] = new_text
if (df[cut_column_name] == low_text).sum() > 0:
print(f" {(df[cut_column_name] == low_text).sum()} values are below the min bin value.\n"
Expand Down
5 changes: 5 additions & 0 deletions src/midrc_react/core/excel_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,11 @@ def build_data_frames_from_csv(self, filename: str):
# Apply numeric column adjustments
df = self.apply_numeric_column_adjustments(df)

# Convert all non-numeric columns to string
for col in self._columns:
if col in df.columns:
df[col] = df[col].astype(str)

self.raw_data = df
self.create_sheets_from_df(df)

Expand Down
17 changes: 10 additions & 7 deletions src/midrc_react/core/famd_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,15 +165,20 @@ def calc_famd_distances(df, cols_to_use, numeric_cols, dataset_column='_dataset_
dict: Dictionary of distance values specified in distance_metrics for each dataset combination.

"""
return calc_distances_via_df(calc_famd_df(df, cols_to_use, numeric_cols, print_outliers=print_outliers),
return calc_distances_via_df(calc_famd_df(df,
cols_to_use,
numeric_cols,
dataset_column,
print_outliers=print_outliers
),
'famd_x_coordinates',
dataset_column,
dataset_column=dataset_column,
distance_metrics=distance_metrics,
jsd_scaled_bin_width=jsd_scaled_bin_width,
)


def calc_famd_ks2_at_date(df1, df2, cols_to_use, numeric_cols, calc_date):
def calc_famd_ks2_at_date(df1, df2, cols_to_use, numeric_cols, calc_date, dataset_column='_dataset_'):
"""
Calculate the KS2 distance between two datasets at a specific date.

Expand All @@ -190,7 +195,6 @@ def calc_famd_ks2_at_date(df1, df2, cols_to_use, numeric_cols, calc_date):
df1_at_date = df1[df1['date'] <= calc_date]
df2_at_date = df2[df2['date'] <= calc_date]

dataset_column = '_dataset_'
combined_df = combine_datasets_from_list([df1_at_date, df2_at_date], dataset_column=dataset_column)

distance_metrics = ['ks2']
Expand All @@ -199,7 +203,7 @@ def calc_famd_ks2_at_date(df1, df2, cols_to_use, numeric_cols, calc_date):
return distance_dict['ks2']['Dataset 0 vs Dataset 1']


def calc_famd_ks2_at_dates(df1, df2, cols_to_use, numeric_cols, calc_date_list):
def calc_famd_ks2_at_dates(df1, df2, cols_to_use, numeric_cols, calc_date_list, dataset_column='_dataset_'):
"""
Calculate the KS2 distance between two datasets at multiple dates.

Expand All @@ -213,10 +217,9 @@ def calc_famd_ks2_at_dates(df1, df2, cols_to_use, numeric_cols, calc_date_list):
Returns:
list(float): list of KS2 distances at each date
"""
dataset_column = '_dataset_'
combined_df = combine_datasets_from_list([df1, df2], dataset_column=dataset_column)

famd_df = calc_famd_df(combined_df, cols_to_use, numeric_cols)
famd_df = calc_famd_df(combined_df, cols_to_use, numeric_cols, dataset_column=dataset_column)

# Add date column to the DataFrame after FAMD fitting
famd_df['date'] = combined_df['date']
Expand Down
10 changes: 6 additions & 4 deletions src/midrc_react/core/jsdcontroller.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,9 @@ def connect_signals(self):
dataselectiongroupbox_class_name = type(jsd_view.dataselectiongroupbox).__name__

if dataselectiongroupbox_class_name == 'JsdDataSelectionGroupBox':
for f_c in jsd_view.dataselectiongroupbox.file_comboboxes:
f_c.currentIndexChanged.connect(self.file_changed)
jsd_view.dataselectiongroupbox.num_data_items_changed.connect(self.file_changed)
jsd_view.dataselectiongroupbox.file_checkbox_state_changed.connect(self.file_changed)
jsd_view.dataselectiongroupbox.file_combobox_changed.connect(self.file_changed)
jsd_view.dataselectiongroupbox.category_combobox.currentIndexChanged.connect(self.category_changed)

elif dataselectiongroupbox_class_name == 'DataSelectionGroupBox':
Expand Down Expand Up @@ -399,12 +398,15 @@ def update_file_based_charts(self):
sheet_dict[i] = self.get_file_sheets_from_index(i)

spider_plot_values = self.get_spider_plot_values(spider_plot_date)
self.jsd_view.update_spider_chart(spider_plot_values)
try:
self.jsd_view.update_spider_chart(spider_plot_values)
except (ValueError, KeyError, TypeError):
print('An error occurred during the update of the spider chart.')

try:
self.jsd_view.update_pie_chart_dock(sheet_dict)
except (ValueError, KeyError, TypeError):
return False
print('An error occurred during the update of file-based charts.')

return True

Expand Down
22 changes: 21 additions & 1 deletion src/midrc_react/gui/pyside6/copyabletableview.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from PySide6.QtCore import QDate, QEvent, QObject, Qt
from PySide6.QtGui import QGuiApplication, QKeySequence
from PySide6.QtWidgets import QTableView
from PySide6.QtWidgets import QTableView, QMenu


class CopyableTableView(QTableView):
Expand Down Expand Up @@ -78,3 +78,23 @@ def copy_selection(self) -> None:
stream = io.StringIO()
csv.writer(stream, delimiter='\t').writerows(table)
QGuiApplication.clipboard().setText(stream.getvalue())

def contextMenuEvent(self, event) -> None:
"""
Create a context menu with 'Select All' and 'Copy' options on right-click.

Args:
event (QContextMenuEvent): The context menu event.

Returns:
None
"""
menu = QMenu(self)
select_all_action = menu.addAction("Select All")
copy_action = menu.addAction("Copy")

action = menu.exec(event.globalPos())
if action == select_all_action:
self.selectAll()
elif action == copy_action:
self.copy_selection()
2 changes: 2 additions & 0 deletions src/midrc_react/gui/pyside6/dataselectiongroupbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class JsdDataSelectionGroupBox(QGroupBox, GroupBoxData):
"""
num_data_items_changed: Signal = Signal(int)
file_checkbox_state_changed: Signal = Signal(bool)
file_combobox_changed: Signal = Signal(int)
NUM_DEFAULT_DATA_ITEMS: int = 2

def __init__(self, data_sources):
Expand Down Expand Up @@ -145,6 +146,7 @@ def add_file_combobox_to_layout(self, auto_populate: bool = True):
self.form_layout.insertRow(index - 1, new_label, new_hbox)

self.file_comboboxes.append(new_combobox)
new_combobox.currentIndexChanged.connect(self.file_combobox_changed.emit)
self.file_checkboxes.append(new_checkbox)
new_checkbox.toggled.connect(self.file_checkbox_state_changed.emit)

Expand Down
Loading