diff --git a/CHANGELOG.md b/CHANGELOG.md index d78d079..68acf21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased] +## [1.1.3] - 2026-01-23 +### Fixed +- Fixed Python 3.12 compatibility issues with pandas StringArray in `set_index()` calls: + - Replaced `set_index()` with direct index assignment using `pd.Index()` to avoid StringArray unhashable type errors + - Fixed index restoration from `devopts` dictionary in both `denovo_refit_option` and `decompose_fit_option` paths + - Fixed index assignment in `make_final_solution()` for process matrices, exposure matrices, and signature statistics + ## [1.1.2] - 2026-01-23 ### Fixed - Fixed Python 3.12 compatibility issues with pandas DataFrame/Series indexing and method calls: diff --git a/SigProfilerAssignment/decompose_subroutines.py b/SigProfilerAssignment/decompose_subroutines.py index 005738d..fab3866 100644 --- a/SigProfilerAssignment/decompose_subroutines.py +++ b/SigProfilerAssignment/decompose_subroutines.py @@ -1125,7 +1125,19 @@ def make_final_solution( pass processAvg = pd.DataFrame(processAvg.astype(float)) - processes = processAvg.set_index(index) + # Convert index to pandas Index to handle StringArray compatibility in Python 3.12+ + # StringArray is unhashable and cannot be used directly with set_index + if isinstance(index, str): + processes = processAvg.set_index(index) + else: + # Convert to list first, then create pandas Index and assign directly + if hasattr(index, 'tolist'): + index_list = index.tolist() + else: + index_list = list(index) + # Assign index directly instead of using set_index to avoid column lookup + processes = processAvg.copy() + processes.index = pd.Index(index_list) processes.columns = allsigids processes = processes.rename_axis("MutationType", axis="columns") processes.to_csv( @@ -1140,8 +1152,17 @@ def make_final_solution( index_label=[processes.columns.name], ) exposureAvg = pd.DataFrame(exposureAvg.astype(int)) - allsigids = np.array(allsigids) - exposures = exposureAvg.set_index(allsigids) + # Convert allsigids to list to handle StringArray compatibility in Python 3.12+ + if isinstance(allsigids, str): + allsigids_list = allsigids + elif hasattr(allsigids, 'tolist'): + allsigids_list = allsigids.tolist() + else: + allsigids_list = list(allsigids) + allsigids = np.array(allsigids_list) + # Assign index directly instead of using set_index to avoid column lookup + exposures = exposureAvg.copy() + exposures.index = pd.Index(allsigids_list) exposures.columns = allcolnames exposures = exposures.T exposures = exposures.rename_axis("Samples", axis="columns") @@ -1258,7 +1279,18 @@ def make_final_solution( if refit_denovo_signatures: try: process_std_error = pd.DataFrame(process_std_error) - processSTE = process_std_error.set_index(index) + # Convert index to pandas Index to handle StringArray compatibility in Python 3.12+ + if isinstance(index, str): + processSTE = process_std_error.set_index(index) + else: + # Convert to list first, then create pandas Index and assign directly + if hasattr(index, 'tolist'): + index_list = index.tolist() + else: + index_list = list(index) + # Assign index directly instead of using set_index to avoid column lookup + processSTE = process_std_error.copy() + processSTE.index = pd.Index(index_list) processSTE.columns = allsigids processSTE = processSTE.rename_axis("MutationType", axis="columns") processSTE.to_csv( @@ -1276,7 +1308,16 @@ def make_final_solution( pass if refit_denovo_signatures: try: - signature_stats = signature_stats.set_index(allsigids) + # Convert allsigids to list to handle StringArray compatibility in Python 3.12+ + if isinstance(allsigids, str): + allsigids_list = allsigids + elif hasattr(allsigids, 'tolist'): + allsigids_list = allsigids.tolist() + else: + allsigids_list = list(allsigids) + # Assign index directly instead of using set_index to avoid column lookup + signature_stats = signature_stats.copy() + signature_stats.index = pd.Index(allsigids_list) signature_stats = signature_stats.rename_axis("Signatures", axis="columns") signature_stats.to_csv( layer_directory diff --git a/SigProfilerAssignment/decomposition.py b/SigProfilerAssignment/decomposition.py index 119839d..1748b34 100644 --- a/SigProfilerAssignment/decomposition.py +++ b/SigProfilerAssignment/decomposition.py @@ -577,7 +577,19 @@ def spa_analyze( listOfSignatures = devopts["listOfSignatures"] index = devopts["index"] colnames = devopts["colnames"] - genomes = genomes.set_index(index) + # Convert index to pandas Index to handle StringArray compatibility in Python 3.12+ + # StringArray is unhashable and cannot be used directly with set_index + if isinstance(index, str): + # If it's a column name, use set_index normally + genomes = genomes.set_index(index) + else: + # Convert to list first, then create pandas Index and assign directly + if hasattr(index, 'tolist'): + index_list = index.tolist() + else: + index_list = list(index) + # Assign index directly instead of using set_index to avoid column lookup + genomes.index = pd.Index(index_list) genomes.columns = colnames # genomes = genomes.rename_axis("Mutation Types", axis="columns") @@ -766,7 +778,19 @@ def spa_analyze( listOfSignatures = devopts["listOfSignatures"] index = devopts["index"] colnames = devopts["colnames"] - genomes = genomes.set_index(index) + # Convert index to pandas Index to handle StringArray compatibility in Python 3.12+ + # StringArray is unhashable and cannot be used directly with set_index + if isinstance(index, str): + # If it's a column name, use set_index normally + genomes = genomes.set_index(index) + else: + # Convert to list first, then create pandas Index and assign directly + if hasattr(index, 'tolist'): + index_list = index.tolist() + else: + index_list = list(index) + # Assign index directly instead of using set_index to avoid column lookup + genomes.index = pd.Index(index_list) genomes.columns = colnames make_decomposition_plots = devopts["make_decomposition_plots"] # genomes = genomes.rename_axis("Mutation Types", axis="columns") diff --git a/setup.py b/setup.py index 7150bc3..6c360e2 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if os.path.exists("dist"): shutil.rmtree("dist") -VERSION = "1.1.2" +VERSION = "1.1.3" def write_version_py(filename="SigProfilerAssignment/version.py"): @@ -15,7 +15,7 @@ def write_version_py(filename="SigProfilerAssignment/version.py"): # THIS FILE IS GENERATED FROM SigProfilerAssignment SETUP.PY short_version = '%(version)s' version = '%(version)s' -Update = 'v1.1.2: Fixed to_csv() calls for Python 3.12 compatibility' +Update = 'v1.1.3: Fixed set_index() calls for Python 3.12 StringArray compatibility' """ fh = open(filename, "w")