From 1412c1210fa52b32faf6d9307b2124213cd90831 Mon Sep 17 00:00:00 2001 From: Jessica Rowell Date: Tue, 13 May 2025 16:44:06 -0400 Subject: [PATCH 1/7] initial commit to add blastp annotator --- modules.json | 10 ++ modules/nf-core/blast/blastp/environment.yml | 7 + modules/nf-core/blast/blastp/main.nf | 82 ++++++++++ modules/nf-core/blast/blastp/meta.yml | 82 ++++++++++ .../nf-core/blast/blastp/tests/main.nf.test | 149 ++++++++++++++++++ .../blast/blastp/tests/main.nf.test.snap | 147 +++++++++++++++++ .../blast/blastp/tests/nextflow.config | 5 + .../nf-core/blast/makeblastdb/environment.yml | 7 + modules/nf-core/blast/makeblastdb/main.nf | 64 ++++++++ modules/nf-core/blast/makeblastdb/meta.yml | 49 ++++++ .../blast/makeblastdb/tests/main.nf.test | 90 +++++++++++ .../blast/makeblastdb/tests/main.nf.test.snap | 58 +++++++ .../blast/makeblastdb/tests/nextflow.config | 5 + subworkflows/local/blastp/main.nf | 36 +++++ subworkflows/local/blastp/meta.yml | 51 ++++++ subworkflows/local/blastp/tests/main.nf.test | 45 ++++++ 16 files changed, 887 insertions(+) create mode 100644 modules/nf-core/blast/blastp/environment.yml create mode 100644 modules/nf-core/blast/blastp/main.nf create mode 100644 modules/nf-core/blast/blastp/meta.yml create mode 100644 modules/nf-core/blast/blastp/tests/main.nf.test create mode 100644 modules/nf-core/blast/blastp/tests/main.nf.test.snap create mode 100644 modules/nf-core/blast/blastp/tests/nextflow.config create mode 100644 modules/nf-core/blast/makeblastdb/environment.yml create mode 100644 modules/nf-core/blast/makeblastdb/main.nf create mode 100644 modules/nf-core/blast/makeblastdb/meta.yml create mode 100644 modules/nf-core/blast/makeblastdb/tests/main.nf.test create mode 100644 modules/nf-core/blast/makeblastdb/tests/main.nf.test.snap create mode 100644 modules/nf-core/blast/makeblastdb/tests/nextflow.config create mode 100644 subworkflows/local/blastp/main.nf create mode 100644 subworkflows/local/blastp/meta.yml create mode 100644 subworkflows/local/blastp/tests/main.nf.test diff --git a/modules.json b/modules.json index 58e1d30..3c98776 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,16 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "blast/blastp": { + "branch": "master", + "git_sha": "be58de79943289acb561a6246d1da5f85555a224", + "installed_by": ["modules"] + }, + "blast/makeblastdb": { + "branch": "master", + "git_sha": "c7a7f06819adcf6f922e11b47f308b7c74484d67", + "installed_by": ["modules"] + }, "multiqc": { "branch": "master", "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", diff --git a/modules/nf-core/blast/blastp/environment.yml b/modules/nf-core/blast/blastp/environment.yml new file mode 100644 index 0000000..8fb1f8a --- /dev/null +++ b/modules/nf-core/blast/blastp/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::blast=2.16.0 diff --git a/modules/nf-core/blast/blastp/main.nf b/modules/nf-core/blast/blastp/main.nf new file mode 100644 index 0000000..2947ae9 --- /dev/null +++ b/modules/nf-core/blast/blastp/main.nf @@ -0,0 +1,82 @@ +process BLAST_BLASTP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/5222a42b366a0468a4c795f5057c2b8cfe39489548f8bd807e8ac0f80069bad5/data': + 'community.wave.seqera.io/library/blast:2.16.0--540f4b669b0a0ddd' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(db) + val out_ext + + output: + tuple val(meta), path("*.xml"), optional: true, emit: xml + tuple val(meta), path("*.tsv"), optional: true, emit: tsv + tuple val(meta), path("*.csv"), optional: true, emit: csv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getExtension() == "gz" ? true : false + def fasta_name = is_compressed ? fasta.getBaseName() : fasta + switch ( out_ext ) { + case "xml": outfmt = 5; break + case "tsv": outfmt = 6; break + case "csv": outfmt = 10; break + default: + outfmt = '6'; + out_ext = 'tsv'; + log.warn("Unknown output file format provided (${out_ext}): selecting BLAST default of tabular BLAST output (tsv)"); + break + } + + """ + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + DB=`find -L ./ -name "*.phr" | sed 's/\\.phr\$//'` + blastp \\ + -query ${fasta_name} \\ + -out ${prefix}.${out_ext} \\ + -db \$DB \\ + -num_threads ${task.cpus} \\ + -outfmt ${outfmt} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(blastp -version 2>&1 | sed 's/^.*blastp: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + switch ( out_ext ) { + case "xml": outfmt = 5; break + case "tsv": outfmt = 6; break + case "csv": outfmt = 10; break + default: + outfmt = '6'; + out_ext = 'tsv'; + log.warn("Unknown output file format provided (${out_ext}): selecting BLAST default of tabular BLAST output (tsv)"); + break + } + + """ + touch ${prefix}.${out_ext} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(blastp -version 2>&1 | sed 's/^.*blastp: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/blast/blastp/meta.yml b/modules/nf-core/blast/blastp/meta.yml new file mode 100644 index 0000000..27052a2 --- /dev/null +++ b/modules/nf-core/blast/blastp/meta.yml @@ -0,0 +1,82 @@ +name: blast_blastp +description: BLASTP (Basic Local Alignment Search Tool- Protein) compares an amino + acid (protein) query sequence against a protein database +keywords: + - fasta + - blast + - blastp + - protein +tools: + - blast: + description: | + BLAST+ is a new suite of BLAST tools that utilizes the NCBI C++ Toolkit. + homepage: https://blast.ncbi.nlm.nih.gov/Blast.cgi + documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs + doi: 10.1016/S0022-2836(05)80360-2 + licence: ["US-Government-Work"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Input fasta file containing queries sequences + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - - meta2: + type: map + description: | + Groovy Map containing db information + e.g. [ id:'test2', single_end:false ] + - db: + type: directory + description: Directory containing the blast database + pattern: "*" + - - out_ext: + type: string + description: | + Specify the type of output file to be generated. `xml` corresponds to BLAST xml format. + `tsv` corresponds to BLAST tabular format. `csv` corresponds to BLAST comma separated format. + pattern: "xml|tsv|csv" +output: + - xml: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.xml": + type: file + description: File containing blastp hits in XML format + pattern: "*.{xml}" + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.tsv": + type: file + description: File containing blastp hits in tabular format + pattern: "*.{tsv}" + - csv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.csv": + type: file + description: File containing blastp hits in comma separated format + pattern: "*.csv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@vagkaratzas" +maintainers: + - "@vagkaratzas" diff --git a/modules/nf-core/blast/blastp/tests/main.nf.test b/modules/nf-core/blast/blastp/tests/main.nf.test new file mode 100644 index 0000000..eb69fb8 --- /dev/null +++ b/modules/nf-core/blast/blastp/tests/main.nf.test @@ -0,0 +1,149 @@ +nextflow_process { + + name "Test Process BLAST_BLASTP" + script "../main.nf" + process "BLAST_BLASTP" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "blast" + tag "blast/blastp" + tag "blast/makeblastdb" + + setup { + run("BLAST_MAKEBLASTDB") { + script "../../makeblastdb/main.nf" + process { + """ + input[0] = [ [id:'test2'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + """ + } + } + } + + test("Should search for proteins against a blast db and return the default (tsv) file with hits") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + input[2] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tsv).match("tsv_default") }, + { assert snapshot(process.out.versions).match("versions_tsv_default") } + ) + } + + } + + test("Should search for zipped proteins against a blast db and return the default (tsv) file with hits") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + input[2] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tsv).match("gz_tsv_default") }, + { assert snapshot(process.out.versions).match("versions_gz_tsv_default") } + ) + } + + } + + test("Should search for proteins against a blast db and return an xml file with hits") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + input[2] = 'xml' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.xml).match("xml") }, + { assert snapshot(process.out.versions).match("versions_xml") } + ) + } + + } + + test("Should search for proteins against a blast db and return a tsv file with hits") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + input[2] = 'tsv' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tsv).match("tsv") }, + { assert snapshot(process.out.versions).match("versions_tsv") } + ) + } + + } + + test("Should search for proteins against a blast db and return a csv file with hits") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + input[2] = 'csv' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.csv).match("csv") }, + { assert snapshot(process.out.versions).match("versions_csv") } + ) + } + + } + +} diff --git a/modules/nf-core/blast/blastp/tests/main.nf.test.snap b/modules/nf-core/blast/blastp/tests/main.nf.test.snap new file mode 100644 index 0000000..732c191 --- /dev/null +++ b/modules/nf-core/blast/blastp/tests/main.nf.test.snap @@ -0,0 +1,147 @@ +{ + "tsv_default": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tsv:md5,dd58eb09f38e6f951aa00c133a18ef5b" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2023-11-07T11:45:50.969016761" + }, + "versions_tsv_default": { + "content": [ + [ + "versions.yml:md5,e128101d410ec4eb0205f170435dbef9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T11:32:56.182516991" + }, + "xml": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.xml:md5,14b191547a4990bfda1fe836e8e61f5c" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T11:33:06.103607353" + }, + "tsv": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tsv:md5,dd58eb09f38e6f951aa00c133a18ef5b" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2023-11-07T11:45:50.969016761" + }, + "versions_csv": { + "content": [ + [ + "versions.yml:md5,e128101d410ec4eb0205f170435dbef9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T11:33:18.404632639" + }, + "versions_xml": { + "content": [ + [ + "versions.yml:md5,e128101d410ec4eb0205f170435dbef9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T11:33:06.107483927" + }, + "versions_tsv": { + "content": [ + [ + "versions.yml:md5,e128101d410ec4eb0205f170435dbef9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T11:33:10.656751678" + }, + "csv": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.csv:md5,19214c68e1a208705dd4d6f0c5dea78b" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2023-11-07T11:45:57.39156832" + }, + "gz_tsv_default": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tsv:md5,dd58eb09f38e6f951aa00c133a18ef5b" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2023-11-07T11:45:57.39156832" + }, + "versions_gz_tsv_default": { + "content": [ + [ + "versions.yml:md5,e128101d410ec4eb0205f170435dbef9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T11:33:01.261812684" + } +} \ No newline at end of file diff --git a/modules/nf-core/blast/blastp/tests/nextflow.config b/modules/nf-core/blast/blastp/tests/nextflow.config new file mode 100644 index 0000000..e993c72 --- /dev/null +++ b/modules/nf-core/blast/blastp/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BLAST_MAKEBLASTDB { + ext.args = '-dbtype prot' + } +} diff --git a/modules/nf-core/blast/makeblastdb/environment.yml b/modules/nf-core/blast/makeblastdb/environment.yml new file mode 100644 index 0000000..8fb1f8a --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::blast=2.16.0 diff --git a/modules/nf-core/blast/makeblastdb/main.nf b/modules/nf-core/blast/makeblastdb/main.nf new file mode 100644 index 0000000..796c7be --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/main.nf @@ -0,0 +1,64 @@ +process BLAST_MAKEBLASTDB { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/blast:2.16.0--h66d330f_5': + 'biocontainers/blast:2.16.0--h66d330f_5' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("${prefix}"), emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getExtension() == "gz" ? true : false + def fasta_name = is_compressed ? fasta.getBaseName() : fasta + """ + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + makeblastdb \\ + -in ${fasta_name} \\ + -out ${prefix}/${fasta_name} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(makeblastdb -version 2>&1 | sed 's/^.*makeblastdb: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getExtension() == "gz" ? true : false + def fasta_name = is_compressed ? fasta.getBaseName() : fasta + """ + touch ${fasta_name}.fasta + touch ${fasta_name}.fasta.ndb + touch ${fasta_name}.fasta.nhr + touch ${fasta_name}.fasta.nin + touch ${fasta_name}.fasta.njs + touch ${fasta_name}.fasta.not + touch ${fasta_name}.fasta.nsq + touch ${fasta_name}.fasta.ntf + touch ${fasta_name}.fasta.nto + mkdir ${prefix} + mv ${fasta_name}* ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(makeblastdb -version 2>&1 | sed 's/^.*makeblastdb: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/blast/makeblastdb/meta.yml b/modules/nf-core/blast/makeblastdb/meta.yml new file mode 100644 index 0000000..3b50654 --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/meta.yml @@ -0,0 +1,49 @@ +name: blast_makeblastdb +description: Builds a BLAST database +keywords: + - fasta + - blast + - database +tools: + - blast: + description: | + BLAST finds regions of similarity between biological sequences. + homepage: https://blast.ncbi.nlm.nih.gov/Blast.cgi + documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs + doi: 10.1016/S0022-2836(05)80360-2 + licence: ["US-Government-Work"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}: + type: directory + description: Output directory containing blast database files + pattern: "*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@vagkaratzas" + - "@DLBPointon" diff --git a/modules/nf-core/blast/makeblastdb/tests/main.nf.test b/modules/nf-core/blast/makeblastdb/tests/main.nf.test new file mode 100644 index 0000000..b822689 --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/tests/main.nf.test @@ -0,0 +1,90 @@ +nextflow_process { + + name "Test Process BLAST_MAKEBLASTDB" + script "../main.nf" + process "BLAST_MAKEBLASTDB" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "blast" + tag "blast/makeblastdb" + + test("Should build a blast db folder from a fasta file") { + + when { + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert process.out.db.size() == 1 + + def all_files = ( new File(process.out.db[0][1]) ).listFiles() + def stable_file_names = [ + 'genome.fasta.ndb', + 'genome.fasta.nhr', + 'genome.fasta.not', + 'genome.fasta.nsq', + 'genome.fasta.ntf', + 'genome.fasta.nto' + ] + + def stable_files = all_files.findAll { it.name in stable_file_names }.toSorted() + + assert snapshot( + all_files.collect { it.name }.toSorted(), + stable_files, + process.out.versions[0] + ).match() + } + ) + } + + } + + test("Should build a blast db folder from a zipped fasta file") { + + when { + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert process.out.db.size() == 1 + + def all_files = ( new File(process.out.db[0][1]) ).listFiles() + def stable_file_names = [ + 'genome.fasta.ndb', + 'genome.fasta.nhr', + 'genome.fasta.not', + 'genome.fasta.nsq', + 'genome.fasta.ntf', + 'genome.fasta.nto' + ] + + def stable_files = all_files.findAll { it.name in stable_file_names }.toSorted() + + assert snapshot( + all_files.collect { it.name }.toSorted(), + stable_files, + process.out.versions[0] + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/blast/makeblastdb/tests/main.nf.test.snap b/modules/nf-core/blast/makeblastdb/tests/main.nf.test.snap new file mode 100644 index 0000000..8154acb --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/tests/main.nf.test.snap @@ -0,0 +1,58 @@ +{ + "Should build a blast db folder from a fasta file": { + "content": [ + [ + "genome.fasta.ndb", + "genome.fasta.nhr", + "genome.fasta.nin", + "genome.fasta.njs", + "genome.fasta.not", + "genome.fasta.nsq", + "genome.fasta.ntf", + "genome.fasta.nto" + ], + [ + "genome.fasta.ndb:md5,0d553c830656469211de113c5022f06d", + "genome.fasta.nhr:md5,f4b4ddb034fd3dd7b25c89e9d50c004e", + "genome.fasta.not:md5,1e53e9d08f1d23af0299cfa87478a7bb", + "genome.fasta.nsq:md5,982cbc7d9e38743b9b1037588862b9da", + "genome.fasta.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb", + "genome.fasta.nto:md5,33cdeccccebe80329f1fdbee7f5874cb" + ], + "versions.yml:md5,91a8afa89354bef8a3c127cafaf1f46d" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-12T09:03:14.830721389" + }, + "Should build a blast db folder from a zipped fasta file": { + "content": [ + [ + "genome.fasta.ndb", + "genome.fasta.nhr", + "genome.fasta.nin", + "genome.fasta.njs", + "genome.fasta.not", + "genome.fasta.nsq", + "genome.fasta.ntf", + "genome.fasta.nto" + ], + [ + "genome.fasta.ndb:md5,0d553c830656469211de113c5022f06d", + "genome.fasta.nhr:md5,f4b4ddb034fd3dd7b25c89e9d50c004e", + "genome.fasta.not:md5,1e53e9d08f1d23af0299cfa87478a7bb", + "genome.fasta.nsq:md5,982cbc7d9e38743b9b1037588862b9da", + "genome.fasta.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb", + "genome.fasta.nto:md5,33cdeccccebe80329f1fdbee7f5874cb" + ], + "versions.yml:md5,91a8afa89354bef8a3c127cafaf1f46d" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-12T09:03:23.653118873" + } +} \ No newline at end of file diff --git a/modules/nf-core/blast/makeblastdb/tests/nextflow.config b/modules/nf-core/blast/makeblastdb/tests/nextflow.config new file mode 100644 index 0000000..0899289 --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BLAST_MAKEBLASTDB { + ext.args = '-dbtype nucl' + } +} diff --git a/subworkflows/local/blastp/main.nf b/subworkflows/local/blastp/main.nf new file mode 100644 index 0000000..0e348a5 --- /dev/null +++ b/subworkflows/local/blastp/main.nf @@ -0,0 +1,36 @@ +// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/subworkflows +// You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A subworkflow SHOULD import at least two modules + +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' + +workflow BLAST { + + take: + // TODO nf-core: edit input (take) channels + ch_bam // channel: [ val(meta), [ bam ] ] + + main: + + ch_versions = Channel.empty() + + // TODO nf-core: substitute modules here for the modules of your subworkflow + + SAMTOOLS_SORT ( ch_bam ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + + SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + emit: + // TODO nf-core: edit emitted channels + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] + + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/local/blastp/meta.yml b/subworkflows/local/blastp/meta.yml new file mode 100644 index 0000000..f7f593f --- /dev/null +++ b/subworkflows/local/blastp/meta.yml @@ -0,0 +1,51 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "blast" +## TODO nf-core: Add a description of the subworkflow and list keywords +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +## TODO nf-core: Add a list of the modules and/or subworkflows used in the subworkflow +components: + - samtools/sort + - samtools/index +## TODO nf-core: List all of the channels used as input with a description and their structure +input: + - ch_bam: + type: file + description: | + The input channel containing the BAM/CRAM/SAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.{bam/cram/sam}" +## TODO nf-core: List all of the channels used as output with a descriptions and their structure +output: + - bam: + type: file + description: | + Channel containing BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bai: + type: file + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" + - csi: + type: file + description: | + Channel containing CSI files + Structure: [ val(meta), path(csi) ] + pattern: "*.csi" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@jessicarowell" +maintainers: + - "@jessicarowell" diff --git a/subworkflows/local/blastp/tests/main.nf.test b/subworkflows/local/blastp/tests/main.nf.test new file mode 100644 index 0000000..01bdc3f --- /dev/null +++ b/subworkflows/local/blastp/tests/main.nf.test @@ -0,0 +1,45 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core subworkflows test blast +nextflow_workflow { + + name "Test Subworkflow BLAST" + script "../main.nf" + workflow "BLAST" + + tag "subworkflows" + tag "subworkflows_" + tag "subworkflows/blast" + // TODO nf-core: Add tags for all modules used within this subworkflow. Example: + tag "samtools" + tag "samtools/sort" + tag "samtools/index" + + + // TODO nf-core: Change the test name preferably indicating the test-data and file-format used + test("sarscov2 - bam - single_end") { + + when { + workflow { + """ + // TODO nf-core: define inputs of the workflow here. Example: + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + input[1] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match()} + //TODO nf-core: Add all required assertions to verify the test output. + ) + } + } +} From 9b096d389174adf3e233ac6d780fd0e4200e0d95 Mon Sep 17 00:00:00 2001 From: Jessica Rowell Date: Tue, 13 May 2025 16:52:16 -0400 Subject: [PATCH 2/7] trailing whitespace: functional_annotation subworkflow --- subworkflows/local/functional_annotation/main.nf | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/functional_annotation/main.nf b/subworkflows/local/functional_annotation/main.nf index 6ba000a..83068e7 100644 --- a/subworkflows/local/functional_annotation/main.nf +++ b/subworkflows/local/functional_annotation/main.nf @@ -1,4 +1,6 @@ -// Import Annotator Subworfklows +// Import Annotator Subworfklows +include { BLASTP } from '../blastp/main' + workflow FUNCTIONAL_ANNOTATION { @@ -25,8 +27,13 @@ workflow FUNCTIONAL_ANNOTATION { .set { ch_multifasta } // - // SUBWORKFLOW: Annotator Name + // SUBWORKFLOW: BLASTP // + BLASTP ( + ch_multifasta + ) + ch_versions = ch_versions.mix(BLASTP.out.versions.first()) + emit: // TODO nf-core: edit emitted channels From 511e5ef85c0e4984fed90e7b7f19bcd62f253330 Mon Sep 17 00:00:00 2001 From: Jessica Rowell Date: Wed, 14 May 2025 12:22:42 -0400 Subject: [PATCH 3/7] adding nf-tests and fixing subworkflow --- conf/test.config | 3 +- nextflow.config | 5 + subworkflows/local/blastp/main.nf | 27 ++-- subworkflows/local/blastp/tests/main.nf.test | 145 ++++++++++++++++--- 4 files changed, 143 insertions(+), 37 deletions(-) diff --git a/conf/test.config b/conf/test.config index 9de8668..d87ff22 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,5 +27,6 @@ params { // TODO nf-core: Give any required params for the test so that command line flags are not needed // From: https://github.com/nf-core/proteinfold/blob/1.1.1/conf/test.config // Example: https://github.com/nf-core/test-datasets/blob/proteinfold/testdata/samplesheet/v1.2/samplesheet.csv - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' } diff --git a/nextflow.config b/nextflow.config index 9b72f6d..01e0ac8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,6 +20,10 @@ params { max_multiqc_email_size = '25.MB' multiqc_methods_description = null + // Blastp options + blast_ref_fasta = null + blastp_outfmt = 'tsv' // tsv, csv, xml + // Boilerplate options outdir = null publish_dir_mode = 'copy' @@ -33,6 +37,7 @@ params { show_hidden = false version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + modules_testdata_base_path = null trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options config_profile_name = null config_profile_description = null diff --git a/subworkflows/local/blastp/main.nf b/subworkflows/local/blastp/main.nf index 0e348a5..0336837 100644 --- a/subworkflows/local/blastp/main.nf +++ b/subworkflows/local/blastp/main.nf @@ -4,33 +4,32 @@ // https://nf-co.re/join // TODO nf-core: A subworkflow SHOULD import at least two modules -include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' -include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { MAKEBLASTDB } from '../../../modules/nf-core/blast/makeblastdb/main' +include { BLASTP } from '../../../modules/nf-core/blast/blastp/main' workflow BLAST { take: - // TODO nf-core: edit input (take) channels - ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ val(meta), [ fasta ] ] + blastp_outfmt main: ch_versions = Channel.empty() // TODO nf-core: substitute modules here for the modules of your subworkflow + makeblastdb_input = file("${params.blast_ref_fasta}") + MAKEBLASTDB ( [ [ id: makeblastdb_input.getSimpleName() ] , makeblastdb_input] ) + ch_versions = ch_versions.mix(MAKEBLASTDB.out.versions.first()) - SAMTOOLS_SORT ( ch_bam ) - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) - - SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + BLASTP ( ch_fasta, MAKEBLASTDB.out.db, blastp_outfmt) + ch_versions = ch_versions.mix(BLASTP.out.versions.first()) emit: - // TODO nf-core: edit emitted channels - bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] + xml = BLASTP.out.xml // channel: [ val(meta), [ xml ] ] + csv = BLASTP.out.csv // channel: [ val(meta), [ csv ] ] + tsv = BLASTP.out.tsv // channel: [ val(meta), [ tsv ] ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/blastp/tests/main.nf.test b/subworkflows/local/blastp/tests/main.nf.test index 01bdc3f..29bcff7 100644 --- a/subworkflows/local/blastp/tests/main.nf.test +++ b/subworkflows/local/blastp/tests/main.nf.test @@ -1,44 +1,145 @@ // TODO nf-core: Once you have added the required tests, please run the following command to build this file: -// nf-core subworkflows test blast +// nf-core subworkflows test blastp nextflow_workflow { - name "Test Subworkflow BLAST" + name "Test Subworkflow BLASTP" script "../main.nf" - workflow "BLAST" + workflow "BLASTP" tag "subworkflows" tag "subworkflows_" tag "subworkflows/blast" - // TODO nf-core: Add tags for all modules used within this subworkflow. Example: - tag "samtools" - tag "samtools/sort" - tag "samtools/index" + // all modules used within this subworkflow. + tag "blast" + tag "blast/makeblastdb" + tag "blast/blastp" - // TODO nf-core: Change the test name preferably indicating the test-data and file-format used - test("sarscov2 - bam - single_end") { + test("Should search for proteins against a blast db and return the default (tsv) file with hits") { when { - workflow { + params { + blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.tsv, + process.out.versions + ).match() } + ) + } + + } + + test("Should search for zipped proteins against a blast db and return the default (tsv) file with hits") { + + when { + params { + blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ] + input[1] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.tsv, + process.out.versions + ).match() } + ) + } + + } + + test("Should search for proteins against a blast db and return an xml file with hits") { + + when { + params { + blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = 'xml' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.xml, + process.out.versions + ).match() } + ) + } + + } + + test("Should search for proteins against a blast db and return a tsv file with hits") { + + when { + params { + blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = 'tsv' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.tsv, + process.out.versions + ).match() } + ) + } + + } + + test("Should search for proteins against a blast db and return a csv file with hits") { + + when { + params { + blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' + } + process { """ - // TODO nf-core: define inputs of the workflow here. Example: - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), - ] - input[1] = [ - [ id:'genome' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), - ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = 'csv' """ } } then { assertAll( - { assert workflow.success}, - { assert snapshot(workflow.out).match()} - //TODO nf-core: Add all required assertions to verify the test output. + { assert process.success }, + { assert snapshot( + process.out.csv, + process.out.versions + ).match() } ) } } From ae0d27679921baf60a78abb178efc81856bbc029 Mon Sep 17 00:00:00 2001 From: Jessica Rowell Date: Wed, 14 May 2025 16:30:58 -0400 Subject: [PATCH 4/7] modifications to get nf-test working for blast annotator --- subworkflows/local/blastp/main.nf | 21 +- subworkflows/local/blastp/tests/main.nf.test | 218 +++++++++--------- .../local/functional_annotation/main.nf | 2 +- 3 files changed, 122 insertions(+), 119 deletions(-) diff --git a/subworkflows/local/blastp/main.nf b/subworkflows/local/blastp/main.nf index 0336837..72a1455 100644 --- a/subworkflows/local/blastp/main.nf +++ b/subworkflows/local/blastp/main.nf @@ -4,10 +4,10 @@ // https://nf-co.re/join // TODO nf-core: A subworkflow SHOULD import at least two modules -include { MAKEBLASTDB } from '../../../modules/nf-core/blast/makeblastdb/main' -include { BLASTP } from '../../../modules/nf-core/blast/blastp/main' +include { BLAST_MAKEBLASTDB } from '../../../modules/nf-core/blast/makeblastdb/main' +include { BLAST_BLASTP } from '../../../modules/nf-core/blast/blastp/main' -workflow BLAST { +workflow BLASTP { take: ch_fasta // channel: [ val(meta), [ fasta ] ] @@ -19,16 +19,17 @@ workflow BLAST { // TODO nf-core: substitute modules here for the modules of your subworkflow makeblastdb_input = file("${params.blast_ref_fasta}") - MAKEBLASTDB ( [ [ id: makeblastdb_input.getSimpleName() ] , makeblastdb_input] ) - ch_versions = ch_versions.mix(MAKEBLASTDB.out.versions.first()) + BLAST_MAKEBLASTDB ( [ [ id: makeblastdb_input.getSimpleName() ] , makeblastdb_input] ) + ch_versions = ch_versions.mix(BLAST_MAKEBLASTDB.out.versions) + + BLAST_BLASTP ( ch_fasta, BLAST_MAKEBLASTDB.out.db, blastp_outfmt) + ch_versions = ch_versions.mix(BLAST_BLASTP.out.versions) - BLASTP ( ch_fasta, MAKEBLASTDB.out.db, blastp_outfmt) - ch_versions = ch_versions.mix(BLASTP.out.versions.first()) emit: - xml = BLASTP.out.xml // channel: [ val(meta), [ xml ] ] - csv = BLASTP.out.csv // channel: [ val(meta), [ csv ] ] - tsv = BLASTP.out.tsv // channel: [ val(meta), [ tsv ] ] + xml = BLAST_BLASTP.out.xml // channel: [ val(meta), [ xml ] ] + csv = BLAST_BLASTP.out.csv // channel: [ val(meta), [ csv ] ] + tsv = BLAST_BLASTP.out.tsv // channel: [ val(meta), [ tsv ] ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/blastp/tests/main.nf.test b/subworkflows/local/blastp/tests/main.nf.test index 29bcff7..9743538 100644 --- a/subworkflows/local/blastp/tests/main.nf.test +++ b/subworkflows/local/blastp/tests/main.nf.test @@ -19,11 +19,13 @@ nextflow_workflow { when { params { - blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' + //blast_ref_fasta = 'proteome.fasta' + blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta' + //blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' } - process { + workflow { """ - input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] input[1] = '' """ } @@ -31,116 +33,116 @@ nextflow_workflow { then { assertAll( - { assert process.success }, + { assert workflow.success }, { assert snapshot( - process.out.tsv, - process.out.versions + workflow.out.tsv, + workflow.out.versions ).match() } ) } } - test("Should search for zipped proteins against a blast db and return the default (tsv) file with hits") { - - when { - params { - blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' - } - process { - """ - input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ] - input[1] = '' - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.tsv, - process.out.versions - ).match() } - ) - } - - } - - test("Should search for proteins against a blast db and return an xml file with hits") { - - when { - params { - blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' - } - process { - """ - input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] - input[1] = 'xml' - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.xml, - process.out.versions - ).match() } - ) - } - - } - - test("Should search for proteins against a blast db and return a tsv file with hits") { - - when { - params { - blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' - } - process { - """ - input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] - input[1] = 'tsv' - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.tsv, - process.out.versions - ).match() } - ) - } - - } - - test("Should search for proteins against a blast db and return a csv file with hits") { - - when { - params { - blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' - } - process { - """ - input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] - input[1] = 'csv' - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.csv, - process.out.versions - ).match() } - ) - } - } + // test("Should search for zipped proteins against a blast db and return the default (tsv) file with hits") { + + // when { + // params { + // blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' + // } + // workflow { + // """ + // input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] + // input[1] = '' + // """ + // } + // } + + // then { + // assertAll( + // { assert workflow.success }, + // { assert snapshot( + // workflow.out.tsv, + // workflow.out.versions + // ).match() } + // ) + // } + + // } + + // test("Should search for proteins against a blast db and return an xml file with hits") { + + // when { + // params { + // blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' + // } + // workflow { + // """ + // input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] + // input[1] = 'xml' + // """ + // } + // } + + // then { + // assertAll( + // { assert workflow.success }, + // { assert snapshot( + // workflow.out.xml, + // workflow.out.versions + // ).match() } + // ) + // } + + // } + + // test("Should search for proteins against a blast db and return a tsv file with hits") { + + // when { + // params { + // blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' + // } + // workflow { + // """ + // input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] + // input[1] = 'tsv' + // """ + // } + // } + + // then { + // assertAll( + // { assert workflow.success }, + // { assert snapshot( + // workflow.out.tsv, + // workflow.out.versions + // ).match() } + // ) + // } + + // } + + // test("Should search for proteins against a blast db and return a csv file with hits") { + + // when { + // params { + // blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' + // } + // workflow { + // """ + // input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] + // input[1] = 'csv' + // """ + // } + // } + + // then { + // assertAll( + // { assert workflow.success }, + // { assert snapshot( + // workflow.out.csv, + // workflow.out.versions + // ).match() } + // ) + // } + // } } diff --git a/subworkflows/local/functional_annotation/main.nf b/subworkflows/local/functional_annotation/main.nf index 83068e7..d9ba434 100644 --- a/subworkflows/local/functional_annotation/main.nf +++ b/subworkflows/local/functional_annotation/main.nf @@ -30,7 +30,7 @@ workflow FUNCTIONAL_ANNOTATION { // SUBWORKFLOW: BLASTP // BLASTP ( - ch_multifasta + ch_multifasta, params.blastp_outfmt ) ch_versions = ch_versions.mix(BLASTP.out.versions.first()) From e836af0700490cb4ac056a496317ebd3e68940db Mon Sep 17 00:00:00 2001 From: Jessica Rowell Date: Wed, 14 May 2025 16:32:53 -0400 Subject: [PATCH 5/7] add default dbtype for makeblastdb --- conf/modules.config | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 94a2a32..0e9623c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -30,4 +30,8 @@ process { withName: SEQKIT_STATS { ext.args = ' ' // turn off --all default argument } + + withName: BLAST_MAKEBLASTDB { + ext.args = '-dbtype prot' + } } From e59fdf7011f060b69d3c9b6e02bb6bd3345fe1ef Mon Sep 17 00:00:00 2001 From: Jessica Rowell Date: Wed, 14 May 2025 17:14:06 -0400 Subject: [PATCH 6/7] correct nf tests --- subworkflows/local/blastp/tests/main.nf.test | 205 +++++++++---------- 1 file changed, 102 insertions(+), 103 deletions(-) diff --git a/subworkflows/local/blastp/tests/main.nf.test b/subworkflows/local/blastp/tests/main.nf.test index 9743538..2e9bb8b 100644 --- a/subworkflows/local/blastp/tests/main.nf.test +++ b/subworkflows/local/blastp/tests/main.nf.test @@ -19,7 +19,6 @@ nextflow_workflow { when { params { - //blast_ref_fasta = 'proteome.fasta' blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta' //blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' } @@ -43,106 +42,106 @@ nextflow_workflow { } - // test("Should search for zipped proteins against a blast db and return the default (tsv) file with hits") { - - // when { - // params { - // blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' - // } - // workflow { - // """ - // input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] - // input[1] = '' - // """ - // } - // } - - // then { - // assertAll( - // { assert workflow.success }, - // { assert snapshot( - // workflow.out.tsv, - // workflow.out.versions - // ).match() } - // ) - // } - - // } - - // test("Should search for proteins against a blast db and return an xml file with hits") { - - // when { - // params { - // blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' - // } - // workflow { - // """ - // input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] - // input[1] = 'xml' - // """ - // } - // } - - // then { - // assertAll( - // { assert workflow.success }, - // { assert snapshot( - // workflow.out.xml, - // workflow.out.versions - // ).match() } - // ) - // } - - // } - - // test("Should search for proteins against a blast db and return a tsv file with hits") { - - // when { - // params { - // blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' - // } - // workflow { - // """ - // input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] - // input[1] = 'tsv' - // """ - // } - // } - - // then { - // assertAll( - // { assert workflow.success }, - // { assert snapshot( - // workflow.out.tsv, - // workflow.out.versions - // ).match() } - // ) - // } - - // } - - // test("Should search for proteins against a blast db and return a csv file with hits") { - - // when { - // params { - // blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' - // } - // workflow { - // """ - // input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] - // input[1] = 'csv' - // """ - // } - // } - - // then { - // assertAll( - // { assert workflow.success }, - // { assert snapshot( - // workflow.out.csv, - // workflow.out.versions - // ).match() } - // ) - // } - // } + test("Should search for zipped proteins against a blast db and return the default (tsv) file with hits") { + + when { + params { + blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta' + //blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' + } + workflow { + """ + input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] + input[1] = '' + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.tsv, + workflow.out.versions + ).match() } + ) + } + } + + test("Should search for proteins against a blast db and return an xml file with hits") { + + when { + params { + blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta' + } + workflow { + """ + input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] + input[1] = 'xml' + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.xml, + workflow.out.versions + ).match() } + ) + } + + } + + test("Should search for proteins against a blast db and return a tsv file with hits") { + + when { + params { + blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta' + } + workflow { + """ + input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] + input[1] = 'tsv' + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.tsv, + workflow.out.versions + ).match() } + ) + } + + } + + test("Should search for proteins against a blast db and return a csv file with hits") { + + when { + params { + blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta' + } + workflow { + """ + input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] + input[1] = 'csv' + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.csv, + workflow.out.versions + ).match() } + ) + } + } } From 691eb36580d7381bd4ae5b0fd4a663bdcf1e4448 Mon Sep 17 00:00:00 2001 From: Jessica Rowell Date: Wed, 14 May 2025 17:54:58 -0400 Subject: [PATCH 7/7] add blast params to nextflow schema --- nextflow_schema.json | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 9185010..a4c2b31 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -199,5 +199,20 @@ { "$ref": "#/$defs/generic_options" } - ] + ], + "properties": { + "blast_ref_fasta": { + "type": "string", + "help_text": "Reference protein database to BLAST against" + }, + "blastp_outfmt": { + "type": "string", + "default": "tsv", + "pattern": "^\\S+\\.(xml|tsv|csv)$", + "help_text": "Desired format for BLAST output (tsv, csv, xml)" + }, + "modules_testdata_base_path": { + "type": "string" + } + } }