From 07b9d27824d90c99226be5619a660eb1564f99ed Mon Sep 17 00:00:00 2001
From: Joshua Gould <joshua-gould@users.noreply.github.com>
Date: Thu, 18 Jun 2026 15:20:02 -0400
Subject: [PATCH 1/2] Check for unique columns

---
 scallops/cli/pooled_if_sbs.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scallops/cli/pooled_if_sbs.py b/scallops/cli/pooled_if_sbs.py
index 6f45daf..49d7947 100644
--- a/scallops/cli/pooled_if_sbs.py
+++ b/scallops/cli/pooled_if_sbs.py
@@ -614,6 +614,8 @@ def merge_sbs_phenotype_pipeline(
             df_phenotype = pd.concat(df_phenotypes, axis=1, join=join_phenotype)
     elif len(df_phenotypes) == 1:
         df_phenotype = df_phenotypes[0]
+    if df_phenotype is not None:
+        assert not df_phenotype.columns.has_duplicates, "Duplicate columns"
     if df_labels is not None and df_phenotype is not None and df_barcode is not None:
         merged_df = merge_sbs_phenotype(
             df_labels=df_labels,

From ba6bfca513ec65bdc4ab54c7108247627ff96e3c Mon Sep 17 00:00:00 2001
From: Joshua Gould <joshua-gould@users.noreply.github.com>
Date: Thu, 18 Jun 2026 15:23:10 -0400
Subject: [PATCH 2/2] Check for unique columns

---
 scallops/cli/pooled_if_sbs.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/scallops/cli/pooled_if_sbs.py b/scallops/cli/pooled_if_sbs.py
index 49d7947..69696e5 100644
--- a/scallops/cli/pooled_if_sbs.py
+++ b/scallops/cli/pooled_if_sbs.py
@@ -481,6 +481,7 @@ def _rename_unique(columns, unique_values, prefix):
     replace_chars = " |-"
     for value in columns:
         new_value = value
+        new_value = re.sub(replace_chars, "_", new_value)
         if value in unique_values:
             new_value = f"{value}_{prefix}"
             if new_value in unique_values:
@@ -489,7 +490,7 @@ def _rename_unique(columns, unique_values, prefix):
                 while new_value in unique_values:
                     counter += 1
                     new_value = f"{value}_{prefix}_{counter}"
-        new_value = re.sub(replace_chars, "_", new_value)
+
         if value != new_value:
             rename[value] = new_value
 
@@ -615,7 +616,9 @@ def merge_sbs_phenotype_pipeline(
     elif len(df_phenotypes) == 1:
         df_phenotype = df_phenotypes[0]
     if df_phenotype is not None:
-        assert not df_phenotype.columns.has_duplicates, "Duplicate columns"
+        assert not df_phenotype.columns.has_duplicates, (
+            f"Duplicate columns: {', '.join(df_phenotype.columns[df_phenotype.columns.duplicated()].to_list())}"
+        )
     if df_labels is not None and df_phenotype is not None and df_barcode is not None:
         merged_df = merge_sbs_phenotype(
             df_labels=df_labels,