diff --git a/modules/nf-core/percolator/environment.yml b/modules/nf-core/percolator/environment.yml new file mode 100644 index 00000000000..966fe62ad10 --- /dev/null +++ b/modules/nf-core/percolator/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::percolator=3.7.1" diff --git a/modules/nf-core/percolator/main.nf b/modules/nf-core/percolator/main.nf new file mode 100644 index 00000000000..cfcc0783f9e --- /dev/null +++ b/modules/nf-core/percolator/main.nf @@ -0,0 +1,63 @@ +process PERCOLATOR { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/percolator:3.7.1--h6351f2a_0' + : 'quay.io/biocontainers/percolator:3.7.1--h6351f2a_0'}" + + input: + tuple val(meta), path(peptide_identification) + + output: + tuple val(meta), path("${prefix}.pout.xml"), emit: pout_xml + tuple val(meta), path("${prefix}.pep.xml"), emit: pout_pepxml + tuple val(meta), path("${prefix}.features.pin"), emit: features_pin + tuple val(meta), path("${prefix}.weights.tsv"), emit: weights + tuple val(meta), path("${prefix}.pep.target.pout"), emit: target_peptides, optional: true + tuple val(meta), path("${prefix}.pep.decoy.pout"), emit: decoy_peptides, optional: true + tuple val(meta), path("${prefix}.psm.target.pout"), emit: target_psms + tuple val(meta), path("${prefix}.psm.decoy.pout"), emit: decoy_psms + tuple val(meta), path("${prefix}.protein.target.pout"), emit: target_proteins, optional: true + tuple val(meta), path("${prefix}.protein.decoy.pout"), emit: decoy_proteins, optional: true + tuple val("${task.process}"), val('percolator'), eval('percolator --help 2>&1 | head -1 | sed "s;Percolator version \\([^,]*\\),.*;\\1;"'), topic: versions, emit: versions_percolator + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + percolator \\ + ${args} \\ + --num-threads ${task.cpus} \\ + --xmloutput ${prefix}.pout.xml \\ + --pepxml-output ${prefix}.pep.xml \\ + --tab-out ${prefix}.features.pin \\ + --weights ${prefix}.weights.tsv \\ + --results-peptides ${prefix}.pep.target.pout \\ + --decoy-results-peptides ${prefix}.pep.decoy.pout \\ + --results-psms ${prefix}.psm.target.pout \\ + --decoy-results-psms ${prefix}.psm.decoy.pout \\ + --results-proteins ${prefix}.protein.target.pout \\ + --decoy-results-proteins ${prefix}.protein.decoy.pout \\ + ${peptide_identification} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.pout.xml + touch ${prefix}.pep.xml + touch ${prefix}.features.pin + touch ${prefix}.weights.tsv + touch ${prefix}.pep.target.pout + touch ${prefix}.pep.decoy.pout + touch ${prefix}.psm.target.pout + touch ${prefix}.psm.decoy.pout + touch ${prefix}.protein.target.pout + touch ${prefix}.protein.decoy.pout + """ +} diff --git a/modules/nf-core/percolator/meta.yml b/modules/nf-core/percolator/meta.yml new file mode 100644 index 00000000000..b057e827b94 --- /dev/null +++ b/modules/nf-core/percolator/meta.yml @@ -0,0 +1,151 @@ +name: "percolator" +description: Rescore peptide-spectrum matches and estimate false discovery rates + using the Percolator semi-supervised learning algorithm. +keywords: + - proteomics + - spectrum identification + - psm + - peptide + - protein + - rescoring + - false discovery rate + - features +tools: + - "percolator": + description: "Semi-supervised learning for peptide identification from shotgun + proteomics datasets." + homepage: "http://percolator.ms" + documentation: "http://percolator.ms" + tool_dev_url: "https://github.com/percolator/percolator" + doi: "10.1038/nmeth1113" + licence: ["Apache-2.0"] + identifier: biotools:percolator + +input: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ + id:'sample1' ]` + - peptide_identification: + type: file + description: peptide identifications as PIN (Percolator input) file + pattern: "*.pin" + ontologies: [] +output: + pout_xml: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1' ]` + - ${prefix}.pout.xml: + type: file + description: Percolator output in XML format containing all PSM-level results + pattern: "*.pout.xml" + ontologies: + - edam: "http://edamontology.org/format_2332" # XML + pout_pepxml: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1' ]` + - ${prefix}.pep.xml: + type: file + description: Percolator output in pepXML format + pattern: "*.pep.xml" + ontologies: + - edam: "http://edamontology.org/format_3655" # pepXML + features_pin: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1' ]` + - ${prefix}.features.pin: + type: file + description: Tab-separated file with rescored features (PIN format) + pattern: "*.features.pin" + ontologies: [] + weights: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1' ]` + - ${prefix}.weights.tsv: + type: file + description: TSV file containing the final feature weights + pattern: "*.weights.tsv" + ontologies: [] + target_peptides: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1' ]` + - ${prefix}.pep.target.pout: + type: file + description: Target peptide-level results in tab separated format (pout) + pattern: "*.pep.target.pout" + ontologies: [] + decoy_peptides: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1' ]` + - ${prefix}.pep.decoy.pout: + type: file + description: Decoy peptide-level results in tab separated format (pout) + pattern: "*.pep.decoy.pout" + ontologies: [] + target_psms: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1' ]` + - ${prefix}.psm.target.pout: + type: file + description: Target PSM-level results in tab separated format (pout) + pattern: "*.psm.target.pout" + ontologies: [] + decoy_psms: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1' ]` + - ${prefix}.psm.decoy.pout: + type: file + description: Decoy PSM-level results in tab separated format (pout) + pattern: "*.psm.decoy.pout" + ontologies: [] + target_proteins: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1' ]` + - ${prefix}.protein.target.pout: + type: file + description: Target protein-level results in tab separated format (pout) + pattern: "*.protein.target.pout" + ontologies: [] + decoy_proteins: + - - meta: + type: map + description: Groovy Map containing sample information. e.g. `[ id:'sample1' ]` + - ${prefix}.protein.decoy.pout: + type: file + description: Decoy protein-level results in tab separated format (pout) + pattern: "*.protein.decoy.pout" + ontologies: [] + versions_percolator: + - - ${task.process}: + type: string + description: The name of the process + - percolator: + type: string + description: The name of the tool + - percolator --help 2>&1 | head -1 | sed "s;Percolator version \([^,]*\),.*;\1;": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - percolator: + type: string + description: The name of the tool + - percolator --help 2>&1 | head -1 | sed "s;Percolator version \([^,]*\),.*;\1;": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@julianu" +maintainers: + - "@julianu" diff --git a/modules/nf-core/percolator/tests/main.nf.test b/modules/nf-core/percolator/tests/main.nf.test new file mode 100644 index 00000000000..f32c0193bd5 --- /dev/null +++ b/modules/nf-core/percolator/tests/main.nf.test @@ -0,0 +1,97 @@ +nextflow_process { + + name "Test Process PERCOLATOR" + script "../main.nf" + process "PERCOLATOR" + + tag "modules" + tag "modules_nfcore" + tag "percolator" + tag "comet" + tag "openms/decoydatabase" + + setup { + run("OPENMS_DECOYDATABASE") { + script "../../openms/decoydatabase/main.nf" + process { + """ + input[0] = channel.of([ + [ id:'test_db' ], + file(params.modules_testdata_base_path + 'proteomics/database/yeast_UPS.fasta', checkIfExists: true), + ]) + """ + } + } + + run("COMET") { + script "../../comet/main.nf" + + process { + """ + def originalParams = file(params.modules_testdata_base_path + 'proteomics/parameter/OVEMB150205.comet.params', checkIfExists: true) + def modifiedParams = new File('./modified.comet.params') + modifiedParams.text = originalParams.text + .replaceAll('(?m)^output_percolatorfile =.*', 'output_percolatorfile = 1') + .replaceAll('(?m)^output_txtfile =.*', 'output_txtfile = 0') + .replaceAll('(?m)^output_mzidentmlfile =.*', 'output_mzidentmlfile = 0') + + input[0] = OPENMS_DECOYDATABASE.out.decoy_fasta.map { meta, fasta -> + [ + [ id:'test'], + file(params.modules_testdata_base_path + 'proteomics/msspectra/OVEMB150205_12.mzML', checkIfExists: true), + fasta, + file(modifiedParams), + ] + } + """ + } + } + } + + test("percolator - OVEMB150205_12") { + when { + process { + """ + input[0] = COMET.out.pin + """ + } + } + + then { + def outputs = sanitizeOutput(process.out).findAll { key, val -> + ['pout_xml', 'features_pin', 'weights', 'target_psms', 'decoy_psms'].contains(key) + } + + assertAll( + { assert process.success }, + // all but pepXML can be matched as whole, pepXML has a timestamp and must be treated separately + { assert snapshot(outputs).match() }, + // in pepXML: check some lines for existence + { assert path(process.out.pout_pepxml.get(0).get(1)).readLines().any { it.contains('') }} + + ) + + } + } + + test("percolator - OVEMB150205_12 - stub") { + + options "-stub" + + when { + process { + """ + input[0] = COMET.out.pin + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + } +} diff --git a/modules/nf-core/percolator/tests/main.nf.test.snap b/modules/nf-core/percolator/tests/main.nf.test.snap new file mode 100644 index 00000000000..ff41981f14a --- /dev/null +++ b/modules/nf-core/percolator/tests/main.nf.test.snap @@ -0,0 +1,151 @@ +{ + "percolator - OVEMB150205_12": { + "content": [ + { + "decoy_psms": [ + [ + { + "id": "test" + }, + "test.psm.decoy.pout:md5,7c9909537ccb675680e02504f07f2a74" + ] + ], + "features_pin": [ + [ + { + "id": "test" + }, + "test.features.pin:md5,4f49bdcb40d891c8bfb61f794465d1be" + ] + ], + "pout_xml": [ + [ + { + "id": "test" + }, + "test.pout.xml:md5,6a6be470f5961d54f2ee6c9c8596a476" + ] + ], + "target_psms": [ + [ + { + "id": "test" + }, + "test.psm.target.pout:md5,61d48fdb8597aa5b5dde5c41b0cf1721" + ] + ], + "weights": [ + [ + { + "id": "test" + }, + "test.weights.tsv:md5,359b4a49c53892bef1738ae693520f69" + ] + ] + } + ], + "timestamp": "2026-04-23T18:13:39.425266156", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "percolator - OVEMB150205_12 - stub": { + "content": [ + { + "decoy_peptides": [ + [ + { + "id": "test" + }, + "test.pep.decoy.pout:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "decoy_proteins": [ + [ + { + "id": "test" + }, + "test.protein.decoy.pout:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "decoy_psms": [ + [ + { + "id": "test" + }, + "test.psm.decoy.pout:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "features_pin": [ + [ + { + "id": "test" + }, + "test.features.pin:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pout_pepxml": [ + [ + { + "id": "test" + }, + "test.pep.xml:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pout_xml": [ + [ + { + "id": "test" + }, + "test.pout.xml:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "target_peptides": [ + [ + { + "id": "test" + }, + "test.pep.target.pout:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "target_proteins": [ + [ + { + "id": "test" + }, + "test.protein.target.pout:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "target_psms": [ + [ + { + "id": "test" + }, + "test.psm.target.pout:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_percolator": [ + [ + "PERCOLATOR", + "percolator", + "3.07.1" + ] + ], + "weights": [ + [ + { + "id": "test" + }, + "test.weights.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "timestamp": "2026-04-23T18:13:45.966539564", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file