From 83e2dc87cf4d5975920c98c6dc8971d03c0dd6b7 Mon Sep 17 00:00:00 2001 From: John Orgera <65687576+johnoooh@users.noreply.github.com> Date: Fri, 6 Feb 2026 16:12:06 -0500 Subject: [PATCH 1/3] Fix FASTA header to preserve mutation identity through netMHCpan Prepend identifier_key (e.g. 2CAE_MC) to MUT and WT FASTA headers so that netMHCpan Identity column retains the encoded mutation type needed by generate_input.py for downstream neoantigen input generation. Co-Authored-By: Claude Opus 4.6 --- .../1.2/resources/usr/bin/generateMutFasta.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py b/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py index ed576e38..6d309efe 100755 --- a/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py +++ b/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py @@ -131,9 +131,9 @@ def main(): + " Alt:" + str(mut.maf_row["Tumor_Seq_Allele2"]) ) - out_fa.write(">" + id_string + "\n") + out_fa.write(">" + mut.identifier_key + " " + id_string + "\n") out_fa.write(mut.mt_altered_aa + "\n") - out_WT_fa.write(">" + id_string + "\n") + out_WT_fa.write(">" + mut.identifier_key + " " + id_string + "\n") out_WT_fa.write(mut.wt_altered_aa + "\n") ### write out WT/MT CDS + AA for debugging purposes From 81acf713656392b60b394c9e549389c3fa622f6b Mon Sep 17 00:00:00 2001 From: John Orgera <65687576+johnoooh@users.noreply.github.com> Date: Fri, 6 Feb 2026 16:24:55 -0500 Subject: [PATCH 2/3] Update generatemutfasta/1.2 test snapshots for identifier_key header Co-Authored-By: Claude Opus 4.6 --- .../generatemutfasta/1.2/tests/main.nf.test.snap | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap b/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap index 60c19112..ee0f19e9 100644 --- a/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap +++ b/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap @@ -10,7 +10,7 @@ "id": "test", "single_end": false }, - "test.MUT.sequences.fa:md5,dff338ec438ac75aa674b64ea8e26544" + "test.MUT.sequences.fa:md5,5c86389b9b56eaa6f9aeb9391c7dea9d" ] ], [ @@ -19,16 +19,16 @@ "id": "test", "single_end": false }, - "test.WT.sequences.fa:md5,51415a40a725a16eaa8f5c51fa43799e" + "test.WT.sequences.fa:md5,02e4c68d88d856416b40cef391049211" ] ], "test_generate_mut_fasta.log" ], "meta": { "nf-test": "0.9.2", - "nextflow": "25.10.2" + "nextflow": "24.10.5" }, - "timestamp": "2026-01-28T15:30:02.572804474" + "timestamp": "2026-02-06T16:24:15.454193" }, "generatemutfasta_1.2 - maf - fasta - stub": { "content": [ @@ -96,9 +96,9 @@ } ], "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" + "nf-test": "0.9.2", + "nextflow": "24.10.5" }, - "timestamp": "2025-12-18T16:08:27.448965" + "timestamp": "2026-02-06T16:24:30.641252" } } \ No newline at end of file From 0e81776d00ecd79b6cc5edd8f6b607d1a6b21c91 Mon Sep 17 00:00:00 2001 From: John Orgera <65687576+johnoooh@users.noreply.github.com> Date: Mon, 9 Feb 2026 15:25:32 -0500 Subject: [PATCH 3/3] Use identifier_key with _M/_W suffix as sole FASTA header netMHCpan concatenates the entire FASTA header (replacing spaces with underscores) and truncates to 15 characters. The previous commit included transcript and variant info after identifier_key, producing truncated Identity values that didn't match generate_input.py's mutation_dict keys. Restore the original format of >identifier_key_M and >identifier_key_W to keep identities under 15 chars. Co-Authored-By: Claude Opus 4.6 --- .../1.2/resources/usr/bin/generateMutFasta.py | 17 ++--------------- .../1.2/tests/main.nf.test.snap | 8 ++++---- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py b/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py index 6d309efe..1afc95ce 100755 --- a/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py +++ b/modules/msk/generatemutfasta/1.2/resources/usr/bin/generateMutFasta.py @@ -118,22 +118,9 @@ def main(): n_missing_tx_id += 1 if len(mut.mt_altered_aa) > 5: - id_string = ( - str(mut.maf_row["Transcript_ID"]) - + " Variant " - + str(mut.maf_row["Chromosome"]) - + ":" - + str(mut.maf_row["Start_Position"]) - + "-" - + str(mut.maf_row["End_Position"]) - + " Ref:" - + str(mut.maf_row["Reference_Allele"]) - + " Alt:" - + str(mut.maf_row["Tumor_Seq_Allele2"]) - ) - out_fa.write(">" + mut.identifier_key + " " + id_string + "\n") + out_fa.write(">" + mut.identifier_key + "_M\n") out_fa.write(mut.mt_altered_aa + "\n") - out_WT_fa.write(">" + mut.identifier_key + " " + id_string + "\n") + out_WT_fa.write(">" + mut.identifier_key + "_W\n") out_WT_fa.write(mut.wt_altered_aa + "\n") ### write out WT/MT CDS + AA for debugging purposes diff --git a/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap b/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap index ee0f19e9..cc9060bc 100644 --- a/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap +++ b/modules/msk/generatemutfasta/1.2/tests/main.nf.test.snap @@ -10,7 +10,7 @@ "id": "test", "single_end": false }, - "test.MUT.sequences.fa:md5,5c86389b9b56eaa6f9aeb9391c7dea9d" + "test.MUT.sequences.fa:md5,3d2ff66590a4329f9a24e03bdf84e0ab" ] ], [ @@ -19,7 +19,7 @@ "id": "test", "single_end": false }, - "test.WT.sequences.fa:md5,02e4c68d88d856416b40cef391049211" + "test.WT.sequences.fa:md5,4bfcfc4d29d01ddc4108f39350936228" ] ], "test_generate_mut_fasta.log" @@ -28,7 +28,7 @@ "nf-test": "0.9.2", "nextflow": "24.10.5" }, - "timestamp": "2026-02-06T16:24:15.454193" + "timestamp": "2026-02-09T15:03:37.884362" }, "generatemutfasta_1.2 - maf - fasta - stub": { "content": [ @@ -99,6 +99,6 @@ "nf-test": "0.9.2", "nextflow": "24.10.5" }, - "timestamp": "2026-02-06T16:24:30.641252" + "timestamp": "2026-02-09T15:03:51.569621" } } \ No newline at end of file