From 07b9d27824d90c99226be5619a660eb1564f99ed Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 18 Jun 2026 15:20:02 -0400 Subject: [PATCH 1/2] Check for unique columns --- scallops/cli/pooled_if_sbs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scallops/cli/pooled_if_sbs.py b/scallops/cli/pooled_if_sbs.py index 6f45daf..49d7947 100644 --- a/scallops/cli/pooled_if_sbs.py +++ b/scallops/cli/pooled_if_sbs.py @@ -614,6 +614,8 @@ def merge_sbs_phenotype_pipeline( df_phenotype = pd.concat(df_phenotypes, axis=1, join=join_phenotype) elif len(df_phenotypes) == 1: df_phenotype = df_phenotypes[0] + if df_phenotype is not None: + assert not df_phenotype.columns.has_duplicates, "Duplicate columns" if df_labels is not None and df_phenotype is not None and df_barcode is not None: merged_df = merge_sbs_phenotype( df_labels=df_labels, From ba6bfca513ec65bdc4ab54c7108247627ff96e3c Mon Sep 17 00:00:00 2001 From: Joshua Gould Date: Thu, 18 Jun 2026 15:23:10 -0400 Subject: [PATCH 2/2] Check for unique columns --- scallops/cli/pooled_if_sbs.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scallops/cli/pooled_if_sbs.py b/scallops/cli/pooled_if_sbs.py index 49d7947..69696e5 100644 --- a/scallops/cli/pooled_if_sbs.py +++ b/scallops/cli/pooled_if_sbs.py @@ -481,6 +481,7 @@ def _rename_unique(columns, unique_values, prefix): replace_chars = " |-" for value in columns: new_value = value + new_value = re.sub(replace_chars, "_", new_value) if value in unique_values: new_value = f"{value}_{prefix}" if new_value in unique_values: @@ -489,7 +490,7 @@ def _rename_unique(columns, unique_values, prefix): while new_value in unique_values: counter += 1 new_value = f"{value}_{prefix}_{counter}" - new_value = re.sub(replace_chars, "_", new_value) + if value != new_value: rename[value] = new_value @@ -615,7 +616,9 @@ def merge_sbs_phenotype_pipeline( elif len(df_phenotypes) == 1: df_phenotype = df_phenotypes[0] if df_phenotype is not None: - assert not df_phenotype.columns.has_duplicates, "Duplicate columns" + assert not df_phenotype.columns.has_duplicates, ( + f"Duplicate columns: {', '.join(df_phenotype.columns[df_phenotype.columns.duplicated()].to_list())}" + ) if df_labels is not None and df_phenotype is not None and df_barcode is not None: merged_df = merge_sbs_phenotype( df_labels=df_labels,