MIDRC · rtomek · May 23, 2025 · May 1, 2025 · May 1, 2025 · May 6, 2025
diff --git a/docs/source/modules.rst b/docs/source/modules.rst
@@ -1,7 +1,7 @@
-midrc_react
-===========
+src
+===
 
 .. toctree::
-   :maxdepth: 8
+   :maxdepth: 7
 
    midrc_react
diff --git a/jsdconfig-zipcode.yaml b/jsdconfig-zipcode.yaml
@@ -1,10 +1,22 @@
 data sources:
   # The data sources will be loaded in the order they are populated here
   - name: MIDRC
-    description: MIDRC Excel File
+    description: MIDRC TSV File
     data type: file
-    filename: data/MIDRC Open A1 and R1 - cumulative by batch.xlsx
-    remove column name text: [(CUSUM)]
+    filename: data/midrc_data_download-2025-01-29.tsv
+    columns:
+      - Age at Index
+      - Ethnicity
+      - Race
+      - Sex
+      - COVID-19 Positive
+      - Race and Ethnicity
+    numeric_cols:
+      Age at Index:
+        raw column: age_at_index
+        bins: [ 0, 5, 12, 16, 18, 30, 40, 50, 65, 75, 85, 1000 ]
+        labels: [ '0-4', '5-11', '12-15', '16-17', '18-29', '30-39', '40-49', '50-64', '65-74', '75-84', '85+' ]
+    plugin: midrc_tsv_loader
 
   - name: CDC
     description: CDC Excel File
@@ -18,47 +30,71 @@ data sources:
     date: '2020-01-01' # The census file does not have a date column, so we specify the date here
 
   - name: MIDRC COVID+
-    description: MIDRC COVID+ Excel File
+    description: MIDRC COVID+ TSV File
     data type: file
-    filename: data/MIDRC Open A1 and R1 COVIDpos only - cumulative by batch.xlsx
-    remove column name text: [(CUSUM)]
+    filename: data/midrc_data_download-2025-01-29_covid_pos.tsv
+    columns:
+      - Age at Index
+      - Ethnicity
+      - Race
+      - Sex
+      - COVID-19 Positive
+      - Race and Ethnicity
+    numeric_cols:
+      Age at Index:
+        raw column: age_at_index
+        bins: [ 0, 5, 12, 16, 18, 30, 40, 50, 65, 75, 85, 1000 ]
+        labels: [ '0-4', '5-11', '12-15', '16-17', '18-29', '30-39', '40-49', '50-64', '65-74', '75-84', '85+' ]
+    plugin: midrc_tsv_loader
 
   - name: Zip Code 1
-    description: Zip Code 1 Excel File
+    description: Zip Code 1 TSV File
     data type: file
     filename: data/midrc_data_download-2025-01-29_0.tsv
     columns:
       - Age at Index
       - Ethnicity
       - Race
       - Sex
-      - Covid19 Positive
+      - COVID-19 Positive
       - Race and Ethnicity
     numeric_cols:
       Age at Index:
         raw column: age_at_index
-        bins: [ 0, 17, 50, 65, 1000 ]
-        labels: ['0-17', "18-49", '50-64', '65+']
+        bins: [ 0, 5, 12, 16, 18, 30, 40, 50, 65, 75, 85, 1000 ]
+        labels: [ '0-4', '5-11', '12-15', '16-17', '18-29', '30-39', '40-49', '50-64', '65-74', '75-84', '85+' ]
     plugin: midrc_tsv_loader
 
   - name: Zip Code 2
-    description: Zip Code 2 Excel File
+    description: Zip Code 2 TSV File
     data type: file
     filename: data/midrc_data_download-2025-01-29_1.tsv
     columns:
       - Age at Index
       - Ethnicity
       - Race
       - Sex
-      - Covid19 Positive
+      - COVID-19 Positive
       - Race and Ethnicity
     numeric_cols:
       Age at Index:
         raw column: age_at_index
-        bins: [ 0, 17, 50, 65, 1000 ]
-        labels: ['0-17', "18-49", '50-64', '65+']
+        bins: [ 0, 5, 12, 16, 18, 30, 40, 50, 65, 75, 85, 1000 ]
+        labels: [ '0-4', '5-11', '12-15', '16-17', '18-29', '30-39', '40-49', '50-64', '65-74', '75-84', '85+' ]
     plugin: midrc_tsv_loader
 
+  - name: MIDRC COVID+
+    description: MIDRC COVID+ Excel File
+    data type: file
+    filename: data/MIDRC Open A1 and R1 COVIDpos only - cumulative by batch.xlsx
+    remove column name text: [(CUSUM)]
+
+  - name: MIDRC
+    description: MIDRC Excel File
+    data type: file
+    filename: data/MIDRC Open A1 and R1 - cumulative by batch.xlsx
+    remove column name text: [(CUSUM)]
+
 # TODO: The following should be moved into QSettings for modifications within the GUI
 # For custom age columns, please use .inf as the maximum age in the final age group
 custom age ranges:

diff --git a/src/midrc_react/core/aggregate_jsd_calc.py b/src/midrc_react/core/aggregate_jsd_calc.py
@@ -62,6 +62,22 @@ def calc_jsd_from_counts_dict(counts_dict, dataset_names):
 
     return output_dict
 
+def calc_jsd_by_features_combined(combined_df: pd.DataFrame, cols_to_use: list[str], dataset_column) -> dict[str, float]:
+    # Pivot table to get counts for each combination
+    pivot_table = combined_df.pivot_table(index=cols_to_use, columns=dataset_column, aggfunc='size', fill_value=0)
+    pivot_table = pivot_table.reset_index()
+
+    # Convert dataset columns to string in case they are integers
+    pivot_table.columns = pivot_table.columns.astype(str)
+
+    labels = combined_df[dataset_column].unique().astype(str)
+
+    # Create a dictionary to hold counts for each dataset
+    counts_dict = {dataset: pivot_table[dataset].values if dataset in pivot_table else np.zeros(len(pivot_table)) for
+                   dataset in labels}
+
+    return calc_jsd_from_counts_dict(counts_dict, labels)
+
 
 def calc_jsd_by_features(df_list: list[pd.DataFrame], cols_to_use: list[str]) -> dict[str, float]:
     """
@@ -76,21 +92,7 @@ def calc_jsd_by_features(df_list: list[pd.DataFrame], cols_to_use: list[str]) ->
     """
     dataset_column = '_dataset_'  # Temporary column name to store dataset information
     combined_df = combine_datasets_from_list(df_list, dataset_column)
-
-    # Pivot table to get counts for each combination
-    pivot_table = combined_df.pivot_table(index=cols_to_use, columns=dataset_column, aggfunc='size', fill_value=0)
-    pivot_table = pivot_table.reset_index()
-
-    # Convert dataset columns to string in case they are integers
-    pivot_table.columns = pivot_table.columns.astype(str)
-
-    labels = combined_df[dataset_column].unique()
-
-    # Create a dictionary to hold counts for each dataset
-    counts_dict = {dataset: pivot_table[dataset].values if dataset in pivot_table else np.zeros(len(pivot_table)) for
-                   dataset in labels}
-
-    return calc_jsd_from_counts_dict(counts_dict, labels)
+    return calc_jsd_by_features_combined(combined_df, cols_to_use, dataset_column)
 
 
 def calc_jsd_by_features_2df(df1: pd.DataFrame, df2: pd.DataFrame, cols_to_use: list[str]) -> float:

diff --git a/src/midrc_react/core/data_preprocessing.py b/src/midrc_react/core/data_preprocessing.py
@@ -60,8 +60,12 @@ def _adjust_outliers(df: pd.DataFrame, cut_column_name: str, column_name: str, b
     low_text = "Outlier_Low"
     high_text = "Outlier_High"
     print(f"WARNING: There are values outside the bins specified for the '{column_name}' column.")
-    df.loc[df[cut_column_name].isna() & (df[column_name] < bins[0]), cut_column_name] = low_text
-    df.loc[df[cut_column_name].isna() & (df[column_name] >= bins[-1]), cut_column_name] = high_text
+
+    # Only compare numeric values, ignore strings or other types
+    col_numeric = pd.to_numeric(df[column_name], errors='coerce')
+
+    df.loc[df[cut_column_name].isna() & (col_numeric < bins[0]), cut_column_name] = low_text
+    df.loc[df[cut_column_name].isna() & (col_numeric >= bins[-1]), cut_column_name] = high_text
     df.loc[df[cut_column_name].isna(), cut_column_name] = new_text
     if (df[cut_column_name] == low_text).sum() > 0:
         print(f"         {(df[cut_column_name] == low_text).sum()} values are below the min bin value.\n"

diff --git a/src/midrc_react/core/excel_layout.py b/src/midrc_react/core/excel_layout.py
@@ -162,6 +162,11 @@ def build_data_frames_from_csv(self, filename: str):
         # Apply numeric column adjustments
         df = self.apply_numeric_column_adjustments(df)
 
+        # Convert all non-numeric columns to string
+        for col in self._columns:
+            if col in df.columns:
+                df[col] = df[col].astype(str)
+
         self.raw_data = df
         self.create_sheets_from_df(df)
 

diff --git a/src/midrc_react/core/famd_calc.py b/src/midrc_react/core/famd_calc.py
@@ -165,15 +165,20 @@ def calc_famd_distances(df, cols_to_use, numeric_cols, dataset_column='_dataset_
         dict: Dictionary of distance values specified in distance_metrics for each dataset combination.
 
     """
-    return calc_distances_via_df(calc_famd_df(df, cols_to_use, numeric_cols, print_outliers=print_outliers),
+    return calc_distances_via_df(calc_famd_df(df,
+                                              cols_to_use,
+                                              numeric_cols,
+                                              dataset_column,
+                                              print_outliers=print_outliers
+                                              ),
                                  'famd_x_coordinates',
-                                 dataset_column,
+                                 dataset_column=dataset_column,
                                  distance_metrics=distance_metrics,
                                  jsd_scaled_bin_width=jsd_scaled_bin_width,
                                  )
 
 
-def calc_famd_ks2_at_date(df1, df2, cols_to_use, numeric_cols, calc_date):
+def calc_famd_ks2_at_date(df1, df2, cols_to_use, numeric_cols, calc_date, dataset_column='_dataset_'):
     """
     Calculate the KS2 distance between two datasets at a specific date.
 
@@ -190,7 +195,6 @@ def calc_famd_ks2_at_date(df1, df2, cols_to_use, numeric_cols, calc_date):
     df1_at_date = df1[df1['date'] <= calc_date]
     df2_at_date = df2[df2['date'] <= calc_date]
 
-    dataset_column = '_dataset_'
     combined_df = combine_datasets_from_list([df1_at_date, df2_at_date], dataset_column=dataset_column)
 
     distance_metrics = ['ks2']
@@ -199,7 +203,7 @@ def calc_famd_ks2_at_date(df1, df2, cols_to_use, numeric_cols, calc_date):
     return distance_dict['ks2']['Dataset 0 vs Dataset 1']
 
 
-def calc_famd_ks2_at_dates(df1, df2, cols_to_use, numeric_cols, calc_date_list):
+def calc_famd_ks2_at_dates(df1, df2, cols_to_use, numeric_cols, calc_date_list, dataset_column='_dataset_'):
     """
     Calculate the KS2 distance between two datasets at multiple dates.
 
@@ -213,10 +217,9 @@ def calc_famd_ks2_at_dates(df1, df2, cols_to_use, numeric_cols, calc_date_list):
     Returns:
         list(float): list of KS2 distances at each date
     """
-    dataset_column = '_dataset_'
     combined_df = combine_datasets_from_list([df1, df2], dataset_column=dataset_column)
 
-    famd_df = calc_famd_df(combined_df, cols_to_use, numeric_cols)
+    famd_df = calc_famd_df(combined_df, cols_to_use, numeric_cols, dataset_column=dataset_column)
 
     # Add date column to the DataFrame after FAMD fitting
     famd_df['date'] = combined_df['date']

diff --git a/src/midrc_react/core/jsdcontroller.py b/src/midrc_react/core/jsdcontroller.py
@@ -96,10 +96,9 @@ def connect_signals(self):
         dataselectiongroupbox_class_name = type(jsd_view.dataselectiongroupbox).__name__
 
         if dataselectiongroupbox_class_name == 'JsdDataSelectionGroupBox':
-            for f_c in jsd_view.dataselectiongroupbox.file_comboboxes:
-                f_c.currentIndexChanged.connect(self.file_changed)
             jsd_view.dataselectiongroupbox.num_data_items_changed.connect(self.file_changed)
             jsd_view.dataselectiongroupbox.file_checkbox_state_changed.connect(self.file_changed)
+            jsd_view.dataselectiongroupbox.file_combobox_changed.connect(self.file_changed)
             jsd_view.dataselectiongroupbox.category_combobox.currentIndexChanged.connect(self.category_changed)
 
         elif dataselectiongroupbox_class_name == 'DataSelectionGroupBox':
@@ -399,12 +398,15 @@ def update_file_based_charts(self):
                 sheet_dict[i] = self.get_file_sheets_from_index(i)
 
         spider_plot_values = self.get_spider_plot_values(spider_plot_date)
-        self.jsd_view.update_spider_chart(spider_plot_values)
+        try:
+            self.jsd_view.update_spider_chart(spider_plot_values)
+        except (ValueError, KeyError, TypeError):
+            print('An error occurred during the update of the spider chart.')
 
         try:
             self.jsd_view.update_pie_chart_dock(sheet_dict)
         except (ValueError, KeyError, TypeError):
-            return False
+            print('An error occurred during the update of file-based charts.')
 
         return True
 

diff --git a/src/midrc_react/gui/pyside6/copyabletableview.py b/src/midrc_react/gui/pyside6/copyabletableview.py
@@ -23,7 +23,7 @@
 
 from PySide6.QtCore import QDate, QEvent, QObject, Qt
 from PySide6.QtGui import QGuiApplication, QKeySequence
-from PySide6.QtWidgets import QTableView
+from PySide6.QtWidgets import QTableView, QMenu
 
 
 class CopyableTableView(QTableView):
@@ -78,3 +78,23 @@ def copy_selection(self) -> None:
             stream = io.StringIO()
             csv.writer(stream, delimiter='\t').writerows(table)
             QGuiApplication.clipboard().setText(stream.getvalue())
+
+    def contextMenuEvent(self, event) -> None:
+        """
+        Create a context menu with 'Select All' and 'Copy' options on right-click.
+
+        Args:
+            event (QContextMenuEvent): The context menu event.
+
+        Returns:
+            None
+        """
+        menu = QMenu(self)
+        select_all_action = menu.addAction("Select All")
+        copy_action = menu.addAction("Copy")
+
+        action = menu.exec(event.globalPos())
+        if action == select_all_action:
+            self.selectAll()
+        elif action == copy_action:
+            self.copy_selection()
diff --git a/src/midrc_react/gui/pyside6/dataselectiongroupbox.py b/src/midrc_react/gui/pyside6/dataselectiongroupbox.py
@@ -38,6 +38,7 @@ class JsdDataSelectionGroupBox(QGroupBox, GroupBoxData):
     """
     num_data_items_changed: Signal = Signal(int)
     file_checkbox_state_changed: Signal = Signal(bool)
+    file_combobox_changed: Signal = Signal(int)
     NUM_DEFAULT_DATA_ITEMS: int = 2
 
     def __init__(self, data_sources):
@@ -145,6 +146,7 @@ def add_file_combobox_to_layout(self, auto_populate: bool = True):
         self.form_layout.insertRow(index - 1, new_label, new_hbox)
 
         self.file_comboboxes.append(new_combobox)
+        new_combobox.currentIndexChanged.connect(self.file_combobox_changed.emit)
         self.file_checkboxes.append(new_checkbox)
         new_checkbox.toggled.connect(self.file_checkbox_state_changed.emit)