From ade2c17e95b438155915c309e165b13ae9416b5c Mon Sep 17 00:00:00 2001 From: Mark Barnes Date: Fri, 23 Jan 2026 16:01:06 -0800 Subject: [PATCH 1/5] Switch from Travis to Github Actions --- .github/workflows/ci.yml | 94 ++++++++++++++++++++++++++++++++++++++++ .travis.yml | 69 ----------------------------- 2 files changed, 94 insertions(+), 69 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..d0f320b --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,94 @@ +name: CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + +permissions: + contents: read + packages: write + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.9'] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y poppler-utils + + - name: Upgrade pip, setuptools, and packaging + run: | + python -m pip install --upgrade pip setuptools packaging + + - name: Cache src directory + uses: actions/cache@v4 + with: + path: ./src/ + key: ${{ runner.os }}-src-grch37 + restore-keys: | + ${{ runner.os }}-src- + + - name: Download GRCh37.tar.gz if not present + run: | + if [ ! -f ./src/GRCh37.tar.gz ]; then + wget --connect-timeout=10 --tries=20 ftp://alexandrovlab-ftp.ucsd.edu/pub/tools/SigProfilerMatrixGenerator/GRCh37.tar.gz -P ./src/ + fi + + - name: Install package with tests + run: | + pip install .[tests] + + - name: Install genome + run: | + python install_genome.py ${{ github.workspace }}/src/ + + - name: Run unit tests + run: | + pytest tests + + - name: Run integration test + run: | + python3 test.py + + - name: Build and push Docker image + if: github.ref == 'refs/heads/main' && github.event_name == 'push' && matrix.python-version == '3.12' + run: | + echo "Starting Docker deployment to GHCR for sigprofilersuite..." + + VERSION_TAG=$(grep "VERSION = " setup.py | cut -d'"' -f2) + + # Get the repository name and convert it to lowercase + REPO_NAME=$(basename ${{ github.repository }} | tr '[:upper:]' '[:lower:]') + IMAGE_NAME="ghcr.io/sigprofilersuite/$REPO_NAME" + + echo "Building version: $VERSION_TAG for image: $IMAGE_NAME" + + echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io \ + --username "${{ github.actor }}" \ + --password-stdin + + docker build \ + --build-arg COMMIT_SHA=${{ github.sha }} \ + -t $IMAGE_NAME:$VERSION_TAG \ + -t $IMAGE_NAME:latest . + + docker push $IMAGE_NAME:$VERSION_TAG + docker push $IMAGE_NAME:latest + + echo "Docker deployment to GHCR successful" diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 8c273d2..0000000 --- a/.travis.yml +++ /dev/null @@ -1,69 +0,0 @@ -dist: focal -language: python - -branches: - only: - - main - -python: - - '3.9' - - '3.12' - -services: - - docker - -addons: - apt: - packages: - - poppler-utils - -before_install: - - pip install --upgrade pip setuptools packaging - - if ! [ -f ./src/GRCh37.tar.gz ]; then wget --connect-timeout=10 --tries=20 ftp://alexandrovlab-ftp.ucsd.edu/pub/tools/SigProfilerMatrixGenerator/GRCh37.tar.gz -P ./src/; fi - -install: - - pip install .[tests] - -cache: - directories: - - $TRAVIS_BUILD_DIR/src/ - -before_script: - - python3 install_genome.py $TRAVIS_BUILD_DIR/src/ - -script: - - pytest tests - - python3 test.py - -after_success: - - | - if [ "$TRAVIS_BRANCH" == "main" ] && [ "$TRAVIS_PULL_REQUEST" == "false" ] && [ "$TRAVIS_PYTHON_VERSION" == "3.12" ]; then - echo "Starting Docker deployment to GHCR for alexandrovlab..." - - VERSION_TAG=$(grep "VERSION = " setup.py | cut -d'"' -f2) - - # Get the repository name and convert it to lowercase - REPO_NAME=$(basename $TRAVIS_REPO_SLUG | tr '[:upper:]' '[:lower:]') - IMAGE_NAME="ghcr.io/alexandrovlab/$REPO_NAME" - - echo "Checking if $IMAGE_NAME:$VERSION_TAG already exists on GHCR..." - echo "$GHCR_PASSWORD" | docker login ghcr.io -u "$GHCR_USERNAME" --password-stdin - - if docker manifest inspect $IMAGE_NAME:$VERSION_TAG > /dev/null 2>&1; then - echo "Tag $IMAGE_NAME:$VERSION_TAG already exists. Skipping Docker push." - else - echo "Building version: $VERSION_TAG for image: $IMAGE_NAME" - - docker build \ - --build-arg COMMIT_SHA=$TRAVIS_COMMIT \ - -t $IMAGE_NAME:$VERSION_TAG \ - -t $IMAGE_NAME:latest . - - docker push $IMAGE_NAME:$VERSION_TAG - docker push $IMAGE_NAME:latest - - echo "Docker deployment to GHCR successful" - fi - else - echo "Skipping Docker deployment" - fi From 01e436cbb73e6aeb9110587c41ab1d6b6bfdf0b7 Mon Sep 17 00:00:00 2001 From: Mark Barnes Date: Fri, 23 Jan 2026 16:05:14 -0800 Subject: [PATCH 2/5] fix: use keyword arguments in to_csv() calls for Python 3.12 compatibility --- SigProfilerAssignment/decompose_subroutines.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/SigProfilerAssignment/decompose_subroutines.py b/SigProfilerAssignment/decompose_subroutines.py index b806572..b6211a4 100644 --- a/SigProfilerAssignment/decompose_subroutines.py +++ b/SigProfilerAssignment/decompose_subroutines.py @@ -1135,7 +1135,7 @@ def make_final_solution( + solution_prefix + "_" + "Signatures.txt", - "\t", + sep="\t", float_format="%.8f", index_label=[processes.columns.name], ) @@ -1153,7 +1153,7 @@ def make_final_solution( + solution_prefix + "_" + "Activities_refit.txt", - "\t", + sep="\t", index_label=[exposures.columns.name], ) else: @@ -1164,7 +1164,7 @@ def make_final_solution( + solution_prefix + "_" + "Activities.txt", - "\t", + sep="\t", index_label=[exposures.columns.name], ) @@ -1268,7 +1268,7 @@ def make_final_solution( + solution_prefix + "_" + "Signatures_SEM_Error.txt", - "\t", + sep="\t", float_format="%.2E", index_label=[processes.columns.name], ) @@ -1285,7 +1285,7 @@ def make_final_solution( + solution_prefix + "_" + "Signatures_Stats.txt", - "\t", + sep="\t", index_label=[exposures.columns.name], ) signature_total_mutations = np.sum(exposureAvg, axis=1).astype(int) @@ -1464,7 +1464,7 @@ def make_final_solution( + "/Activities" + "/" + "De_Novo_MutationType_Probabilities_refit.txt", - "\t", + sep="\t", ) else: probability.to_csv( @@ -1472,7 +1472,7 @@ def make_final_solution( + "/Activities" + "/" + "De_Novo_MutationType_Probabilities.txt", - "\t", + sep="\t", ) if not denovo_refit_option: probability.to_csv( @@ -1480,7 +1480,7 @@ def make_final_solution( + "/Activities" + "/" + "Decomposed_MutationType_Probabilities.txt", - "\t", + sep="\t", ) if export_probabilities_per_mutation: @@ -1526,7 +1526,7 @@ def make_final_solution( + "_" + sample + ".txt", - "\t", + sep="\t", ) else: print( From 73d32a033d3a65c086da353047a8a50450e35fb5 Mon Sep 17 00:00:00 2001 From: Mark Barnes Date: Fri, 23 Jan 2026 16:09:07 -0800 Subject: [PATCH 3/5] v1.1.2: Fixed to_csv() calls for Python 3.12 compatibility --- CHANGELOG.md | 7 +++++++ README.md | 2 +- setup.py | 4 ++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e6758b..8831961 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased] +## [1.1.2] - 2026-01-23 +### Fixed +- Fixed `to_csv()` calls to use keyword arguments (`sep="\t"`) instead of positional arguments for Python 3.12 compatibility. This ensures compatibility with newer pandas versions that enforce keyword-only arguments. + +### Changed +- Switched CI/CD from Travis CI to GitHub Actions. Updated README badge to reflect the new CI/CD platform. + ## [1.1.1] - 2026-01-06 ### Added - Added missing COSMIC v3.5 signature files: exome versions for all supported genome builds, mm39 genome build signatures, and rn7 genome build signatures. diff --git a/README.md b/README.md index 2c2fb29..9522c7f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![Docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://osf.io/mz79v/wiki/home/) [![License](https://img.shields.io/badge/License-BSD\%202--Clause-orange.svg)](https://opensource.org/licenses/BSD-2-Clause) -[![Build Status](https://api.travis-ci.com/AlexandrovLab/SigProfilerAssignment.svg?branch=main)](https://app.travis-ci.com/AlexandrovLab/SigProfilerAssignment) +[![CI](https://github.com/SigProfilerSuite/SigProfilerAssignment/actions/workflows/ci.yml/badge.svg)](https://github.com/SigProfilerSuite/SigProfilerAssignment/actions/workflows/ci.yml) drawing diff --git a/setup.py b/setup.py index dd5c9f1..7150bc3 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ if os.path.exists("dist"): shutil.rmtree("dist") -VERSION = "1.1.1" +VERSION = "1.1.2" def write_version_py(filename="SigProfilerAssignment/version.py"): @@ -15,7 +15,7 @@ def write_version_py(filename="SigProfilerAssignment/version.py"): # THIS FILE IS GENERATED FROM SigProfilerAssignment SETUP.PY short_version = '%(version)s' version = '%(version)s' -Update = 'v1.1.1: Added missing COSMIC v3.5 signature files (exome versions, mm39, rn7)' +Update = 'v1.1.2: Fixed to_csv() calls for Python 3.12 compatibility' """ fh = open(filename, "w") From d0723a6dfff4bc9f6182412ede873ce3ac6a1bc6 Mon Sep 17 00:00:00 2001 From: Mark Barnes Date: Fri, 23 Jan 2026 16:13:17 -0800 Subject: [PATCH 4/5] Add 3.12 to CI --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d0f320b..94660c4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.9'] + python-version: ['3.9', '3.12'] steps: - uses: actions/checkout@v4 From caa3bbb264d9ea9423f48145d5ee319d43fabcf2 Mon Sep 17 00:00:00 2001 From: Mark Barnes Date: Fri, 23 Jan 2026 16:39:38 -0800 Subject: [PATCH 5/5] Fix Python 3.12 compatibility: update pandas indexing and to_csv() calls --- CHANGELOG.md | 5 +- .../PlotDecomposition_CNV48.py | 10 +- .../PlotDecomposition_DBS78.py | 10 +- .../PlotDecomposition_ID83.py | 14 +- .../PlotDecomposition_SBS1536.py | 10 +- .../PlotDecomposition_SBS288.py | 10 +- .../PlotDecomposition_SBS96.py | 10 +- .../PlotDecomposition_SV32.py | 10 +- .../SigProfilerPlottingMatrix.py | 126 +++++++++--------- .../decompose_subroutines.py | 6 +- 10 files changed, 107 insertions(+), 104 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8831961..d78d079 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [1.1.2] - 2026-01-23 ### Fixed -- Fixed `to_csv()` calls to use keyword arguments (`sep="\t"`) instead of positional arguments for Python 3.12 compatibility. This ensures compatibility with newer pandas versions that enforce keyword-only arguments. +- Fixed Python 3.12 compatibility issues with pandas DataFrame/Series indexing and method calls: + - Updated `to_csv()` calls to use keyword arguments (`sep="\t"`) instead of positional arguments + - Changed DataFrame/Series index access from `[0]` to `.iloc[0]` throughout the codebase + - Fixed dtype assignment when converting columns to strings with "%" suffix ### Changed - Switched CI/CD from Travis CI to GitHub Actions. Updated README badge to reflect the new CI/CD platform. diff --git a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_CNV48.py b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_CNV48.py index 6c620c7..aa6e047 100644 --- a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_CNV48.py +++ b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_CNV48.py @@ -254,11 +254,11 @@ def plot_7_plus(bases, project, c_draw, denovo_plots_dict, basis_plots_dict): # c_draw - (Canvas) The canvas to draw the graph decomposition on. # statistics - (Pandas Dataframe) Dataframe w/ calculations def draw_statistics(c_draw, statistics, sig_version, custom_text): - cos_sim = statistics["Cosine Similarity"][0] - cor_coeff = statistics["Correlation Coefficient"][0] - l1_norm_percent = statistics["L1 Norm %"][0] - l2_norm_percent = statistics["L2 Norm %"][0] - kl_divergence = statistics["KL Divergence"][0] + cos_sim = statistics["Cosine Similarity"].iloc[0] + cor_coeff = statistics["Correlation Coefficient"].iloc[0] + l1_norm_percent = statistics["L1 Norm %"].iloc[0] + l2_norm_percent = statistics["L2 Norm %"].iloc[0] + kl_divergence = statistics["KL Divergence"].iloc[0] c_draw.drawString( WIDTH_GAP + 15, diff --git a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_DBS78.py b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_DBS78.py index c64c9b8..014aa0e 100644 --- a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_DBS78.py +++ b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_DBS78.py @@ -251,11 +251,11 @@ def plot_6_plus(bases, project, c_draw, denovo_plots_dict, basis_plots_dict): # c_draw - (Canvas) The canvas to draw the graph decomposition on. # statistics - (Pandas Dataframe) Dataframe w/ calculations def draw_statistics(c_draw, statistics, sig_version, custom_text): - cos_sim = statistics["Cosine Similarity"][0] - cor_coeff = statistics["Correlation Coefficient"][0] - l1_norm_percent = statistics["L1 Norm %"][0] - l2_norm_percent = statistics["L2 Norm %"][0] - kl_divergence = statistics["KL Divergence"][0] + cos_sim = statistics["Cosine Similarity"].iloc[0] + cor_coeff = statistics["Correlation Coefficient"].iloc[0] + l1_norm_percent = statistics["L1 Norm %"].iloc[0] + l2_norm_percent = statistics["L2 Norm %"].iloc[0] + kl_divergence = statistics["KL Divergence"].iloc[0] c_draw.drawString( WIDTH_GAP + 15, diff --git a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_ID83.py b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_ID83.py index 34e8ee6..1afcd87 100644 --- a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_ID83.py +++ b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_ID83.py @@ -245,13 +245,13 @@ def plot_6_plus(bases, project, c_draw, denovo_plots_dict, basis_plots_dict): # c_draw - (Canvas) The canvas to draw the graph decomposition on. # statistics - (Pandas Dataframe) Dataframe w/ calculations def draw_statistics(c_draw, statistics, sig_version, custom_text): - cos_sim = statistics["Cosine Similarity"][0] - cos_dist = statistics["Cosine Distance"][0] - cor_dist = statistics["Correlation Distance"][0] - cor_coeff = statistics["Correlation Coefficient"][0] - l1_norm_percent = statistics["L1 Norm %"][0] - l2_norm_percent = statistics["L2 Norm %"][0] - kl_divergence = statistics["KL Divergence"][0] + cos_sim = statistics["Cosine Similarity"].iloc[0] + cos_dist = statistics["Cosine Distance"].iloc[0] + cor_dist = statistics["Correlation Distance"].iloc[0] + cor_coeff = statistics["Correlation Coefficient"].iloc[0] + l1_norm_percent = statistics["L1 Norm %"].iloc[0] + l2_norm_percent = statistics["L2 Norm %"].iloc[0] + kl_divergence = statistics["KL Divergence"].iloc[0] c_draw.drawString( WIDTH_GAP + 15, diff --git a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SBS1536.py b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SBS1536.py index 1ca7f45..00f694b 100644 --- a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SBS1536.py +++ b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SBS1536.py @@ -251,11 +251,11 @@ def plot_6_plus(bases, project, c_draw, denovo_plots_dict, basis_plots_dict): # c_draw - (Canvas) The canvas to draw the graph decomposition on. # statistics - (Pandas Dataframe) Dataframe w/ calculations def draw_statistics(c_draw, statistics, sig_version, custom_text): - cos_sim = statistics["Cosine Similarity"][0] - cor_coeff = statistics["Correlation Coefficient"][0] - l1_norm_percent = statistics["L1 Norm %"][0] - l2_norm_percent = statistics["L2 Norm %"][0] - kl_divergence = statistics["KL Divergence"][0] + cos_sim = statistics["Cosine Similarity"].iloc[0] + cor_coeff = statistics["Correlation Coefficient"].iloc[0] + l1_norm_percent = statistics["L1 Norm %"].iloc[0] + l2_norm_percent = statistics["L2 Norm %"].iloc[0] + kl_divergence = statistics["KL Divergence"].iloc[0] c_draw.drawString( WIDTH_GAP + 15, diff --git a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SBS288.py b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SBS288.py index 1382ac1..8528a28 100644 --- a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SBS288.py +++ b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SBS288.py @@ -260,11 +260,11 @@ def plot_6_plus(bases, project, c_draw, denovo_plots_dict, basis_plots_dict): # c_draw - (Canvas) The canvas to draw the graph decomposition on. # statistics - (Pandas Dataframe) Dataframe w/ calculations def draw_statistics(c_draw, statistics, sig_version, custom_text): - cos_sim = statistics["Cosine Similarity"][0] - cor_coeff = statistics["Correlation Coefficient"][0] - l1_norm_percent = statistics["L1 Norm %"][0] - l2_norm_percent = statistics["L2 Norm %"][0] - kl_divergence = statistics["KL Divergence"][0] + cos_sim = statistics["Cosine Similarity"].iloc[0] + cor_coeff = statistics["Correlation Coefficient"].iloc[0] + l1_norm_percent = statistics["L1 Norm %"].iloc[0] + l2_norm_percent = statistics["L2 Norm %"].iloc[0] + kl_divergence = statistics["KL Divergence"].iloc[0] c_draw.drawString( WIDTH_GAP + 15, diff --git a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SBS96.py b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SBS96.py index 8ee4be5..37a1aa8 100644 --- a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SBS96.py +++ b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SBS96.py @@ -278,11 +278,11 @@ def plot_6_plus(bases, project, c_draw, denovo_plots_dict, basis_plots_dict): # c_draw - (Canvas) The canvas to draw the graph decomposition on. # statistics - (Pandas Dataframe) Dataframe w/ calculations def draw_statistics(c_draw, statistics, sig_version, custom_text): - cos_sim = statistics["Cosine Similarity"][0] - cor_coeff = statistics["Correlation Coefficient"][0] - l1_norm_percent = statistics["L1 Norm %"][0] - l2_norm_percent = statistics["L2 Norm %"][0] - kl_divergence = statistics["KL Divergence"][0] + cos_sim = statistics["Cosine Similarity"].iloc[0] + cor_coeff = statistics["Correlation Coefficient"].iloc[0] + l1_norm_percent = statistics["L1 Norm %"].iloc[0] + l2_norm_percent = statistics["L2 Norm %"].iloc[0] + kl_divergence = statistics["KL Divergence"].iloc[0] c_draw.drawString( WIDTH_GAP + 15, diff --git a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SV32.py b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SV32.py index b3fe966..dfabd26 100644 --- a/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SV32.py +++ b/SigProfilerAssignment/DecompositionPlots/PlotDecomposition_SV32.py @@ -255,11 +255,11 @@ def plot_7_plus(bases, project, c_draw, denovo_plots_dict, basis_plots_dict): # c_draw - (Canvas) The canvas to draw the graph decomposition on. # statistics - (Pandas Dataframe) Dataframe w/ calculations def draw_statistics(c_draw, statistics, sig_version, custom_text): - cos_sim = statistics["Cosine Similarity"][0] - cor_coeff = statistics["Correlation Coefficient"][0] - l1_norm_percent = statistics["L1 Norm %"][0] - l2_norm_percent = statistics["L2 Norm %"][0] - kl_divergence = statistics["KL Divergence"][0] + cos_sim = statistics["Cosine Similarity"].iloc[0] + cor_coeff = statistics["Correlation Coefficient"].iloc[0] + l1_norm_percent = statistics["L1 Norm %"].iloc[0] + l2_norm_percent = statistics["L2 Norm %"].iloc[0] + kl_divergence = statistics["KL Divergence"].iloc[0] c_draw.drawString( WIDTH_GAP + 15, diff --git a/SigProfilerAssignment/DecompositionPlots/SigProfilerPlottingMatrix.py b/SigProfilerAssignment/DecompositionPlots/SigProfilerPlottingMatrix.py index b26940b..2b97592 100644 --- a/SigProfilerAssignment/DecompositionPlots/SigProfilerPlottingMatrix.py +++ b/SigProfilerAssignment/DecompositionPlots/SigProfilerPlottingMatrix.py @@ -43,9 +43,9 @@ def plotSBS( buff_list = dict() if plot_type == "96": first_line = matrix_path.iloc[0, :] - if first_line[0][1] == ">": + if first_line.iloc[0][1] == ">": pcawg = True - if first_line[0][5] != "]" and first_line[0][1] != ">": + if first_line.iloc[0][5] != "]" and first_line.iloc[0][1] != ">": sys.exit( "The matrix does not match the correct SBS96 format. Please check you formatting and rerun this plotting function." ) @@ -76,23 +76,23 @@ def plotSBS( if pcawg: line = matrix_path.iloc[lines_tmp, :] # line = lines.strip().split(",") - mut_type = line[0] - nuc = line[1][0] + "[" + mut_type + "]" + line[1][2] + mut_type = line.iloc[0] + nuc = line.iloc[1][0] + "[" + mut_type + "]" + line.iloc[1][2] sample_index = 2 else: line = matrix_path.iloc[lines_tmp, :] # line = lines.strip().split() - nuc = line[0] - mut_type = line[0][2:5] + nuc = line.iloc[0] + mut_type = line.iloc[0][2:5] sample_index = 1 for sample in samples: if percentage: - mutCount = float(line[sample_index]) + mutCount = float(line.iloc[sample_index]) if mutCount < 1 and mutCount > 0: sig_probs = True else: - mutCount = int(line[sample_index]) + mutCount = int(line.iloc[sample_index]) mutations[sample][mut_type][nuc] = mutCount sample_index += 1 @@ -492,12 +492,12 @@ def plotSBS( elif plot_type == "192" or plot_type == "96SB" or plot_type == "384": first_line = matrix_path.iloc[0, :] - if first_line[0][6] == ">" or first_line[0][3] == ">": + if first_line.iloc[0][6] == ">" or first_line.iloc[0][3] == ">": pcawg = True if ( - first_line[0][7] != "]" - and first_line[0][6] != ">" - and first_line[0][3] != ">" + first_line.iloc[0][7] != "]" + and first_line.iloc[0][6] != ">" + and first_line.iloc[0][3] != ">" ): sys.exit( "The matrix does not match the correct SBS192 format. Please check you formatting and rerun this plotting function." @@ -530,25 +530,25 @@ def plotSBS( bias = line[0][0] else: line = matrix_path.iloc[lines_tmp, :] - nuc = line[0][2:] - bias = line[0][0] + nuc = line.iloc[0][2:] + bias = line.iloc[0][0] if bias == "N" or bias == "B": continue else: if pcawg: - mut_type = line[1] + mut_type = line.iloc[1] sample_index = 3 else: - mut_type = line[0][4:7] + mut_type = line.iloc[0][4:7] sample_index = 1 for sample in samples: if percentage: - mutCount = float(line[sample_index]) + mutCount = float(line.iloc[sample_index]) if mutCount < 1 and mutCount > 0: sig_probs = True else: - mutCount = int(line[sample_index]) + mutCount = int(line.iloc[sample_index]) if nuc not in mutations[sample][mut_type].keys(): mutations[sample][mut_type][nuc] = [0, 0] if bias == "T": @@ -1024,11 +1024,11 @@ def plotSBS( for sample in samples: if percentage: - mutCount = float(line[sample_index]) + mutCount = float(line.iloc[sample_index]) if mutCount < 1 and mutCount > 0: sig_probs = True else: - mutCount = int(line[sample_index]) + mutCount = int(line.iloc[sample_index]) mutations[sample][mut_type] = mutCount sample_index += 1 @@ -1251,11 +1251,11 @@ def plotSBS( sample_index = 1 for sample in samples: if percentage: - mutCount = float(line[sample_index]) + mutCount = float(line.iloc[sample_index]) if mutCount < 1 and mutCount > 0: sig_probs = True else: - mutCount = int(line[sample_index]) + mutCount = int(line.iloc[sample_index]) if bias == "T": mutations[sample][nuc][0] = mutCount else: @@ -1463,12 +1463,12 @@ def plotSBS( elif plot_type == "288": first_line = matrix_path.iloc[0, :] - if first_line[0][6] == ">" or first_line[0][3] == ">": + if first_line.iloc[0][6] == ">" or first_line.iloc[0][3] == ">": pcawg = True if ( - first_line[0][7] != "]" - and first_line[0][6] != ">" - and first_line[0][3] != ">" + first_line.iloc[0][7] != "]" + and first_line.iloc[0][6] != ">" + and first_line.iloc[0][3] != ">" ): sys.exit( "The matrix does not match the correct SBS288 format. Please check you formatting and rerun this plotting function." @@ -1508,24 +1508,24 @@ def plotSBS( for lines_tmp in range(0, matrix_path.shape[0]): if pcawg: line = matrix_path.iloc[lines_tmp, :] - mut_type = line[0] - nuc = line[1][0] + "[" + mut_type + "]" + line[1][2] + mut_type = line.iloc[0] + nuc = line.iloc[1][0] + "[" + mut_type + "]" + line.iloc[1][2] sample_index = 2 else: line = matrix_path.iloc[lines_tmp, :] - nuc = line[0] - mut_type = line[0][4:7] + nuc = line.iloc[0] + mut_type = line.iloc[0][4:7] sample_index = 1 tsb = nuc[0] for sample in samples: if percentage: - mutCount = float(line[sample_index]) + mutCount = float(line.iloc[sample_index]) if mutCount < 1 and mutCount > 0: sig_probs = True else: try: - mutCount = int(line[sample_index]) + mutCount = int(line.iloc[sample_index]) except: print( "It appears that the provided matrix does not contain mutation counts.\n\tIf you have provided a signature activity matrix, please change the percentage parameter to True.\n\tOtherwise, ", @@ -2068,9 +2068,9 @@ def plotSBS( ########################################################################################################################### elif plot_type == "1536": first_line = matrix_path.iloc[0, :] - if first_line[0][1] == ">": + if first_line.iloc[0][1] == ">": pcawg = True - if first_line[0][6] != "]" and first_line[0][1] != ">": + if first_line.iloc[0][6] != "]" and first_line.iloc[0][1] != ">": sys.exit( "The matrix does not match the correct SBS1536 format. Please check you formatting and rerun this plotting function." ) @@ -2367,23 +2367,23 @@ def plotSBS( sample_index = 2 else: line = matrix_path.iloc[lines_tmp, :] - nuc = line[0] - mut_type = line[0][3:6] - penta_key = line[0][0] + line[0][-1] - tri_key = line[0][1] + line[0][-2] + nuc = line.iloc[0] + mut_type = line.iloc[0][3:6] + penta_key = line.iloc[0][0] + line.iloc[0][-1] + tri_key = line.iloc[0][1] + line.iloc[0][-2] sample_index = 1 - tri = line[0][1:8] + tri = line.iloc[0][1:8] for sample in samples: if tri not in mutations_96[sample][mut_type]: mutations_96[sample][mut_type][tri] = 0 if percentage: - mutCount = float(line[sample_index]) + mutCount = float(line.iloc[sample_index]) if mutCount < 1 and mutCount > 0: sig_probs = True else: - mutCount = int(line[sample_index]) + mutCount = int(line.iloc[sample_index]) if pcawg: sample_ref = sample_index - 2 @@ -3362,14 +3362,14 @@ def plotID( or plot_type == "83" ): first_line = matrix_path.iloc[0, :] - if first_line[0][1] == "D" or first_line[0][0] == "D": + if first_line.iloc[0][1] == "D" or first_line.iloc[0][0] == "D": pcawg = True - mutation_type = first_line[0] + mutation_type = first_line.iloc[0] mutation_type_list = mutation_type.split(":") if ( len(mutation_type_list) != 4 - and first_line[0][1] != "D" - and first_line[0][0] != "D" + and first_line.iloc[0][1] != "D" + and first_line.iloc[0][0] != "D" ): sys.exit( "The matrix does not match the correct ID96 format. Please check you formatting and rerun this plotting function." @@ -3522,9 +3522,9 @@ def plotID( sample_index = 4 else: line = matrix_path.iloc[lines_tmp, :] - if line[0] not in indel_types: + if line.iloc[0] not in indel_types: continue - categories = line[0].split(":") + categories = line.iloc[0].split(":") mut_type = categories[0] + categories[1] + categories[2] repeat_size = int(categories[3]) if categories[2] == "M": @@ -3534,11 +3534,11 @@ def plotID( for sample in samples: if mut_type in mutations[sample].keys(): if percentage: - mutCount = float(line[sample_index]) + mutCount = float(line.iloc[sample_index]) if mutCount < 1 and mutCount > 0: sig_probs = True else: - mutCount = int(line[sample_index]) + mutCount = int(line.iloc[sample_index]) mutations[sample][mut_type][repeat_size] = mutCount else: continue @@ -4601,11 +4601,11 @@ def plotID( for sample in samples: # if mut_type in mutations[sample].keys(): if percentage: - mutCount = float(line[sample_index]) + mutCount = float(line.iloc[sample_index]) if mutCount < 1 and mutCount > 0: sig_probs = True else: - mutCount = int(line[sample_index]) + mutCount = int(line.iloc[sample_index]) mutations[sample][mut_type][repeat_size] = mutCount # else: @@ -5369,11 +5369,11 @@ def plotID( for sample in samples: if mut_type in mutations[sample].keys(): if percentage: - mutCount = float(line[sample_index]) + mutCount = float(line.iloc[sample_index]) if mutCount < 1 and mutCount > 0: sig_probs = True else: - mutCount = int(line[sample_index]) + mutCount = int(line.iloc[sample_index]) if bias == "T": mutations[sample][mut_type][repeat_size][0] = mutCount else: @@ -6198,10 +6198,10 @@ def plotDBS( if plot_type == "78" or plot_type == "78DBS" or plot_type == "DBS78": first_line = matrix_path.iloc[0, :] - mutation_type = first_line[0] - if first_line[0][2] != ">": + mutation_type = first_line.iloc[0] + if first_line.iloc[0][2] != ">": pcawg = True - if len(mutation_type) != 5 and first_line[0][2] == ">": + if len(mutation_type) != 5 and first_line.iloc[0][2] == ">": sys.exit( "The matrix does not match the correct DBS96 format. Please check you formatting and rerun this plotting function." ) @@ -6326,9 +6326,9 @@ def plotDBS( sample_index = 2 else: line = matrix_path.iloc[lines_tmp, :] - mut = line[0] - nuc = line[0][3:] - mut_type = line[0][0:2] + mut = line.iloc[0] + nuc = line.iloc[0][3:] + mut_type = line.iloc[0][0:2] if mut not in dinucs: nuc = revcompl(nuc) mut_type = revcompl(mut_type) @@ -6336,11 +6336,11 @@ def plotDBS( for sample in samples: if percentage: - mutCount = float(line[sample_index]) + mutCount = float(line.iloc[sample_index]) if mutCount < 1 and mutCount > 0: sig_probs = True else: - mutCount = int(line[sample_index]) + mutCount = int(line.iloc[sample_index]) mutations[sample][mut_type][nuc] = mutCount sample_index += 1 @@ -6942,11 +6942,11 @@ def plotDBS( for sample in samples: if percentage: - mutCount = float(line[sample_index]) + mutCount = float(line.iloc[sample_index]) if mutCount < 1 and mutCount > 0: sig_probs = True else: - mutCount = int(line[sample_index]) + mutCount = int(line.iloc[sample_index]) if nuc not in mutations[sample][mut_type]: mutations[sample][mut_type][nuc] = [0, 0] if bias == "T": diff --git a/SigProfilerAssignment/decompose_subroutines.py b/SigProfilerAssignment/decompose_subroutines.py index b6211a4..005738d 100644 --- a/SigProfilerAssignment/decompose_subroutines.py +++ b/SigProfilerAssignment/decompose_subroutines.py @@ -1234,9 +1234,9 @@ def make_final_solution( all_similarities, cosine_similarities = calculate_similarities( allgenomes, est_genomes, allcolnames ) - all_similarities.iloc[:, [3, 5]] = ( - all_similarities.iloc[:, [3, 5]].astype(str) + "%" - ) + # Convert columns to string with "%" suffix, using column names to avoid dtype conflicts in Python 3.12 + all_similarities["L1_Norm_%"] = all_similarities["L1_Norm_%"].astype(str) + "%" + all_similarities["L2_Norm_%"] = all_similarities["L2_Norm_%"].astype(str) + "%" if refit_denovo_signatures: all_similarities.to_csv(