diff --git a/conf/modules.config b/conf/modules.config index 94a2a32..0e9623c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -30,4 +30,8 @@ process { withName: SEQKIT_STATS { ext.args = ' ' // turn off --all default argument } + + withName: BLAST_MAKEBLASTDB { + ext.args = '-dbtype prot' + } } diff --git a/conf/test.config b/conf/test.config index 9de8668..d87ff22 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,5 +27,6 @@ params { // TODO nf-core: Give any required params for the test so that command line flags are not needed // From: https://github.com/nf-core/proteinfold/blob/1.1.1/conf/test.config // Example: https://github.com/nf-core/test-datasets/blob/proteinfold/testdata/samplesheet/v1.2/samplesheet.csv - input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv' + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' } diff --git a/modules.json b/modules.json index a6a464d..d42ef63 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,15 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "blast/blastp": { + "branch": "master", + "git_sha": "be58de79943289acb561a6246d1da5f85555a224", + "installed_by": ["modules"] + }, + "blast/makeblastdb": { + "branch": "master", + "git_sha": "c7a7f06819adcf6f922e11b47f308b7c74484d67" + }, "mmseqs/search": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", diff --git a/modules/nf-core/blast/blastp/environment.yml b/modules/nf-core/blast/blastp/environment.yml new file mode 100644 index 0000000..8fb1f8a --- /dev/null +++ b/modules/nf-core/blast/blastp/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::blast=2.16.0 diff --git a/modules/nf-core/blast/blastp/main.nf b/modules/nf-core/blast/blastp/main.nf new file mode 100644 index 0000000..2947ae9 --- /dev/null +++ b/modules/nf-core/blast/blastp/main.nf @@ -0,0 +1,82 @@ +process BLAST_BLASTP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/5222a42b366a0468a4c795f5057c2b8cfe39489548f8bd807e8ac0f80069bad5/data': + 'community.wave.seqera.io/library/blast:2.16.0--540f4b669b0a0ddd' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(db) + val out_ext + + output: + tuple val(meta), path("*.xml"), optional: true, emit: xml + tuple val(meta), path("*.tsv"), optional: true, emit: tsv + tuple val(meta), path("*.csv"), optional: true, emit: csv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getExtension() == "gz" ? true : false + def fasta_name = is_compressed ? fasta.getBaseName() : fasta + switch ( out_ext ) { + case "xml": outfmt = 5; break + case "tsv": outfmt = 6; break + case "csv": outfmt = 10; break + default: + outfmt = '6'; + out_ext = 'tsv'; + log.warn("Unknown output file format provided (${out_ext}): selecting BLAST default of tabular BLAST output (tsv)"); + break + } + + """ + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + DB=`find -L ./ -name "*.phr" | sed 's/\\.phr\$//'` + blastp \\ + -query ${fasta_name} \\ + -out ${prefix}.${out_ext} \\ + -db \$DB \\ + -num_threads ${task.cpus} \\ + -outfmt ${outfmt} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(blastp -version 2>&1 | sed 's/^.*blastp: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + switch ( out_ext ) { + case "xml": outfmt = 5; break + case "tsv": outfmt = 6; break + case "csv": outfmt = 10; break + default: + outfmt = '6'; + out_ext = 'tsv'; + log.warn("Unknown output file format provided (${out_ext}): selecting BLAST default of tabular BLAST output (tsv)"); + break + } + + """ + touch ${prefix}.${out_ext} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(blastp -version 2>&1 | sed 's/^.*blastp: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/blast/blastp/meta.yml b/modules/nf-core/blast/blastp/meta.yml new file mode 100644 index 0000000..27052a2 --- /dev/null +++ b/modules/nf-core/blast/blastp/meta.yml @@ -0,0 +1,82 @@ +name: blast_blastp +description: BLASTP (Basic Local Alignment Search Tool- Protein) compares an amino + acid (protein) query sequence against a protein database +keywords: + - fasta + - blast + - blastp + - protein +tools: + - blast: + description: | + BLAST+ is a new suite of BLAST tools that utilizes the NCBI C++ Toolkit. + homepage: https://blast.ncbi.nlm.nih.gov/Blast.cgi + documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs + doi: 10.1016/S0022-2836(05)80360-2 + licence: ["US-Government-Work"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Input fasta file containing queries sequences + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - - meta2: + type: map + description: | + Groovy Map containing db information + e.g. [ id:'test2', single_end:false ] + - db: + type: directory + description: Directory containing the blast database + pattern: "*" + - - out_ext: + type: string + description: | + Specify the type of output file to be generated. `xml` corresponds to BLAST xml format. + `tsv` corresponds to BLAST tabular format. `csv` corresponds to BLAST comma separated format. + pattern: "xml|tsv|csv" +output: + - xml: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.xml": + type: file + description: File containing blastp hits in XML format + pattern: "*.{xml}" + - tsv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.tsv": + type: file + description: File containing blastp hits in tabular format + pattern: "*.{tsv}" + - csv: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.csv": + type: file + description: File containing blastp hits in comma separated format + pattern: "*.csv" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@vagkaratzas" +maintainers: + - "@vagkaratzas" diff --git a/modules/nf-core/blast/blastp/tests/main.nf.test b/modules/nf-core/blast/blastp/tests/main.nf.test new file mode 100644 index 0000000..eb69fb8 --- /dev/null +++ b/modules/nf-core/blast/blastp/tests/main.nf.test @@ -0,0 +1,149 @@ +nextflow_process { + + name "Test Process BLAST_BLASTP" + script "../main.nf" + process "BLAST_BLASTP" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "blast" + tag "blast/blastp" + tag "blast/makeblastdb" + + setup { + run("BLAST_MAKEBLASTDB") { + script "../../makeblastdb/main.nf" + process { + """ + input[0] = [ [id:'test2'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + """ + } + } + } + + test("Should search for proteins against a blast db and return the default (tsv) file with hits") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + input[2] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tsv).match("tsv_default") }, + { assert snapshot(process.out.versions).match("versions_tsv_default") } + ) + } + + } + + test("Should search for zipped proteins against a blast db and return the default (tsv) file with hits") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + input[2] = '' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tsv).match("gz_tsv_default") }, + { assert snapshot(process.out.versions).match("versions_gz_tsv_default") } + ) + } + + } + + test("Should search for proteins against a blast db and return an xml file with hits") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + input[2] = 'xml' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.xml).match("xml") }, + { assert snapshot(process.out.versions).match("versions_xml") } + ) + } + + } + + test("Should search for proteins against a blast db and return a tsv file with hits") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + input[2] = 'tsv' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.tsv).match("tsv") }, + { assert snapshot(process.out.versions).match("versions_tsv") } + ) + } + + } + + test("Should search for proteins against a blast db and return a csv file with hits") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + input[2] = 'csv' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.csv).match("csv") }, + { assert snapshot(process.out.versions).match("versions_csv") } + ) + } + + } + +} diff --git a/modules/nf-core/blast/blastp/tests/main.nf.test.snap b/modules/nf-core/blast/blastp/tests/main.nf.test.snap new file mode 100644 index 0000000..732c191 --- /dev/null +++ b/modules/nf-core/blast/blastp/tests/main.nf.test.snap @@ -0,0 +1,147 @@ +{ + "tsv_default": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tsv:md5,dd58eb09f38e6f951aa00c133a18ef5b" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2023-11-07T11:45:50.969016761" + }, + "versions_tsv_default": { + "content": [ + [ + "versions.yml:md5,e128101d410ec4eb0205f170435dbef9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T11:32:56.182516991" + }, + "xml": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.xml:md5,14b191547a4990bfda1fe836e8e61f5c" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T11:33:06.103607353" + }, + "tsv": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tsv:md5,dd58eb09f38e6f951aa00c133a18ef5b" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2023-11-07T11:45:50.969016761" + }, + "versions_csv": { + "content": [ + [ + "versions.yml:md5,e128101d410ec4eb0205f170435dbef9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T11:33:18.404632639" + }, + "versions_xml": { + "content": [ + [ + "versions.yml:md5,e128101d410ec4eb0205f170435dbef9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T11:33:06.107483927" + }, + "versions_tsv": { + "content": [ + [ + "versions.yml:md5,e128101d410ec4eb0205f170435dbef9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T11:33:10.656751678" + }, + "csv": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.csv:md5,19214c68e1a208705dd4d6f0c5dea78b" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2023-11-07T11:45:57.39156832" + }, + "gz_tsv_default": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tsv:md5,dd58eb09f38e6f951aa00c133a18ef5b" + ] + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2023-11-07T11:45:57.39156832" + }, + "versions_gz_tsv_default": { + "content": [ + [ + "versions.yml:md5,e128101d410ec4eb0205f170435dbef9" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-01T11:33:01.261812684" + } +} \ No newline at end of file diff --git a/modules/nf-core/blast/blastp/tests/nextflow.config b/modules/nf-core/blast/blastp/tests/nextflow.config new file mode 100644 index 0000000..e993c72 --- /dev/null +++ b/modules/nf-core/blast/blastp/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BLAST_MAKEBLASTDB { + ext.args = '-dbtype prot' + } +} diff --git a/modules/nf-core/blast/makeblastdb/environment.yml b/modules/nf-core/blast/makeblastdb/environment.yml new file mode 100644 index 0000000..8fb1f8a --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::blast=2.16.0 diff --git a/modules/nf-core/blast/makeblastdb/main.nf b/modules/nf-core/blast/makeblastdb/main.nf new file mode 100644 index 0000000..796c7be --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/main.nf @@ -0,0 +1,64 @@ +process BLAST_MAKEBLASTDB { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/blast:2.16.0--h66d330f_5': + 'biocontainers/blast:2.16.0--h66d330f_5' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("${prefix}"), emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getExtension() == "gz" ? true : false + def fasta_name = is_compressed ? fasta.getBaseName() : fasta + """ + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + makeblastdb \\ + -in ${fasta_name} \\ + -out ${prefix}/${fasta_name} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(makeblastdb -version 2>&1 | sed 's/^.*makeblastdb: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getExtension() == "gz" ? true : false + def fasta_name = is_compressed ? fasta.getBaseName() : fasta + """ + touch ${fasta_name}.fasta + touch ${fasta_name}.fasta.ndb + touch ${fasta_name}.fasta.nhr + touch ${fasta_name}.fasta.nin + touch ${fasta_name}.fasta.njs + touch ${fasta_name}.fasta.not + touch ${fasta_name}.fasta.nsq + touch ${fasta_name}.fasta.ntf + touch ${fasta_name}.fasta.nto + mkdir ${prefix} + mv ${fasta_name}* ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(makeblastdb -version 2>&1 | sed 's/^.*makeblastdb: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/blast/makeblastdb/meta.yml b/modules/nf-core/blast/makeblastdb/meta.yml new file mode 100644 index 0000000..3b50654 --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/meta.yml @@ -0,0 +1,49 @@ +name: blast_makeblastdb +description: Builds a BLAST database +keywords: + - fasta + - blast + - database +tools: + - blast: + description: | + BLAST finds regions of similarity between biological sequences. + homepage: https://blast.ncbi.nlm.nih.gov/Blast.cgi + documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs + doi: 10.1016/S0022-2836(05)80360-2 + licence: ["US-Government-Work"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" +output: + - db: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}: + type: directory + description: Output directory containing blast database files + pattern: "*" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@vagkaratzas" + - "@DLBPointon" diff --git a/modules/nf-core/blast/makeblastdb/tests/main.nf.test b/modules/nf-core/blast/makeblastdb/tests/main.nf.test new file mode 100644 index 0000000..b822689 --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/tests/main.nf.test @@ -0,0 +1,90 @@ +nextflow_process { + + name "Test Process BLAST_MAKEBLASTDB" + script "../main.nf" + process "BLAST_MAKEBLASTDB" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "blast" + tag "blast/makeblastdb" + + test("Should build a blast db folder from a fasta file") { + + when { + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert process.out.db.size() == 1 + + def all_files = ( new File(process.out.db[0][1]) ).listFiles() + def stable_file_names = [ + 'genome.fasta.ndb', + 'genome.fasta.nhr', + 'genome.fasta.not', + 'genome.fasta.nsq', + 'genome.fasta.ntf', + 'genome.fasta.nto' + ] + + def stable_files = all_files.findAll { it.name in stable_file_names }.toSorted() + + assert snapshot( + all_files.collect { it.name }.toSorted(), + stable_files, + process.out.versions[0] + ).match() + } + ) + } + + } + + test("Should build a blast db folder from a zipped fasta file") { + + when { + process { + """ + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert process.out.db.size() == 1 + + def all_files = ( new File(process.out.db[0][1]) ).listFiles() + def stable_file_names = [ + 'genome.fasta.ndb', + 'genome.fasta.nhr', + 'genome.fasta.not', + 'genome.fasta.nsq', + 'genome.fasta.ntf', + 'genome.fasta.nto' + ] + + def stable_files = all_files.findAll { it.name in stable_file_names }.toSorted() + + assert snapshot( + all_files.collect { it.name }.toSorted(), + stable_files, + process.out.versions[0] + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/blast/makeblastdb/tests/main.nf.test.snap b/modules/nf-core/blast/makeblastdb/tests/main.nf.test.snap new file mode 100644 index 0000000..8154acb --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/tests/main.nf.test.snap @@ -0,0 +1,58 @@ +{ + "Should build a blast db folder from a fasta file": { + "content": [ + [ + "genome.fasta.ndb", + "genome.fasta.nhr", + "genome.fasta.nin", + "genome.fasta.njs", + "genome.fasta.not", + "genome.fasta.nsq", + "genome.fasta.ntf", + "genome.fasta.nto" + ], + [ + "genome.fasta.ndb:md5,0d553c830656469211de113c5022f06d", + "genome.fasta.nhr:md5,f4b4ddb034fd3dd7b25c89e9d50c004e", + "genome.fasta.not:md5,1e53e9d08f1d23af0299cfa87478a7bb", + "genome.fasta.nsq:md5,982cbc7d9e38743b9b1037588862b9da", + "genome.fasta.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb", + "genome.fasta.nto:md5,33cdeccccebe80329f1fdbee7f5874cb" + ], + "versions.yml:md5,91a8afa89354bef8a3c127cafaf1f46d" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-12T09:03:14.830721389" + }, + "Should build a blast db folder from a zipped fasta file": { + "content": [ + [ + "genome.fasta.ndb", + "genome.fasta.nhr", + "genome.fasta.nin", + "genome.fasta.njs", + "genome.fasta.not", + "genome.fasta.nsq", + "genome.fasta.ntf", + "genome.fasta.nto" + ], + [ + "genome.fasta.ndb:md5,0d553c830656469211de113c5022f06d", + "genome.fasta.nhr:md5,f4b4ddb034fd3dd7b25c89e9d50c004e", + "genome.fasta.not:md5,1e53e9d08f1d23af0299cfa87478a7bb", + "genome.fasta.nsq:md5,982cbc7d9e38743b9b1037588862b9da", + "genome.fasta.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb", + "genome.fasta.nto:md5,33cdeccccebe80329f1fdbee7f5874cb" + ], + "versions.yml:md5,91a8afa89354bef8a3c127cafaf1f46d" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-12T09:03:23.653118873" + } +} \ No newline at end of file diff --git a/modules/nf-core/blast/makeblastdb/tests/nextflow.config b/modules/nf-core/blast/makeblastdb/tests/nextflow.config new file mode 100644 index 0000000..0899289 --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BLAST_MAKEBLASTDB { + ext.args = '-dbtype nucl' + } +} diff --git a/nextflow.config b/nextflow.config index 9b72f6d..01e0ac8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,6 +20,10 @@ params { max_multiqc_email_size = '25.MB' multiqc_methods_description = null + // Blastp options + blast_ref_fasta = null + blastp_outfmt = 'tsv' // tsv, csv, xml + // Boilerplate options outdir = null publish_dir_mode = 'copy' @@ -33,6 +37,7 @@ params { show_hidden = false version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + modules_testdata_base_path = null trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options config_profile_name = null config_profile_description = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 9185010..a4c2b31 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -199,5 +199,20 @@ { "$ref": "#/$defs/generic_options" } - ] + ], + "properties": { + "blast_ref_fasta": { + "type": "string", + "help_text": "Reference protein database to BLAST against" + }, + "blastp_outfmt": { + "type": "string", + "default": "tsv", + "pattern": "^\\S+\\.(xml|tsv|csv)$", + "help_text": "Desired format for BLAST output (tsv, csv, xml)" + }, + "modules_testdata_base_path": { + "type": "string" + } + } } diff --git a/subworkflows/local/blastp/main.nf b/subworkflows/local/blastp/main.nf new file mode 100644 index 0000000..72a1455 --- /dev/null +++ b/subworkflows/local/blastp/main.nf @@ -0,0 +1,36 @@ +// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :) +// https://github.com/nf-core/modules/tree/master/subworkflows +// You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace: +// https://nf-co.re/join +// TODO nf-core: A subworkflow SHOULD import at least two modules + +include { BLAST_MAKEBLASTDB } from '../../../modules/nf-core/blast/makeblastdb/main' +include { BLAST_BLASTP } from '../../../modules/nf-core/blast/blastp/main' + +workflow BLASTP { + + take: + ch_fasta // channel: [ val(meta), [ fasta ] ] + blastp_outfmt + + main: + + ch_versions = Channel.empty() + + // TODO nf-core: substitute modules here for the modules of your subworkflow + makeblastdb_input = file("${params.blast_ref_fasta}") + BLAST_MAKEBLASTDB ( [ [ id: makeblastdb_input.getSimpleName() ] , makeblastdb_input] ) + ch_versions = ch_versions.mix(BLAST_MAKEBLASTDB.out.versions) + + BLAST_BLASTP ( ch_fasta, BLAST_MAKEBLASTDB.out.db, blastp_outfmt) + ch_versions = ch_versions.mix(BLAST_BLASTP.out.versions) + + + emit: + xml = BLAST_BLASTP.out.xml // channel: [ val(meta), [ xml ] ] + csv = BLAST_BLASTP.out.csv // channel: [ val(meta), [ csv ] ] + tsv = BLAST_BLASTP.out.tsv // channel: [ val(meta), [ tsv ] ] + + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/local/blastp/meta.yml b/subworkflows/local/blastp/meta.yml new file mode 100644 index 0000000..f7f593f --- /dev/null +++ b/subworkflows/local/blastp/meta.yml @@ -0,0 +1,51 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "blast" +## TODO nf-core: Add a description of the subworkflow and list keywords +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +## TODO nf-core: Add a list of the modules and/or subworkflows used in the subworkflow +components: + - samtools/sort + - samtools/index +## TODO nf-core: List all of the channels used as input with a description and their structure +input: + - ch_bam: + type: file + description: | + The input channel containing the BAM/CRAM/SAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.{bam/cram/sam}" +## TODO nf-core: List all of the channels used as output with a descriptions and their structure +output: + - bam: + type: file + description: | + Channel containing BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bai: + type: file + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" + - csi: + type: file + description: | + Channel containing CSI files + Structure: [ val(meta), path(csi) ] + pattern: "*.csi" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@jessicarowell" +maintainers: + - "@jessicarowell" diff --git a/subworkflows/local/blastp/tests/main.nf.test b/subworkflows/local/blastp/tests/main.nf.test new file mode 100644 index 0000000..2e9bb8b --- /dev/null +++ b/subworkflows/local/blastp/tests/main.nf.test @@ -0,0 +1,147 @@ +// TODO nf-core: Once you have added the required tests, please run the following command to build this file: +// nf-core subworkflows test blastp +nextflow_workflow { + + name "Test Subworkflow BLASTP" + script "../main.nf" + workflow "BLASTP" + + tag "subworkflows" + tag "subworkflows_" + tag "subworkflows/blast" + // all modules used within this subworkflow. + tag "blast" + tag "blast/makeblastdb" + tag "blast/blastp" + + + test("Should search for proteins against a blast db and return the default (tsv) file with hits") { + + when { + params { + blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta' + //blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' + } + workflow { + """ + input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] + input[1] = '' + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.tsv, + workflow.out.versions + ).match() } + ) + } + + } + + test("Should search for zipped proteins against a blast db and return the default (tsv) file with hits") { + + when { + params { + blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta' + //blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta' + } + workflow { + """ + input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] + input[1] = '' + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.tsv, + workflow.out.versions + ).match() } + ) + } + } + + test("Should search for proteins against a blast db and return an xml file with hits") { + + when { + params { + blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta' + } + workflow { + """ + input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] + input[1] = 'xml' + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.xml, + workflow.out.versions + ).match() } + ) + } + + } + + test("Should search for proteins against a blast db and return a tsv file with hits") { + + when { + params { + blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta' + } + workflow { + """ + input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] + input[1] = 'tsv' + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.tsv, + workflow.out.versions + ).match() } + ) + } + + } + + test("Should search for proteins against a blast db and return a csv file with hits") { + + when { + params { + blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta' + } + workflow { + """ + input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ] + input[1] = 'csv' + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.csv, + workflow.out.versions + ).match() } + ) + } + } +} diff --git a/subworkflows/local/functional_annotation/main.nf b/subworkflows/local/functional_annotation/main.nf index abde53c..d9ba434 100644 --- a/subworkflows/local/functional_annotation/main.nf +++ b/subworkflows/local/functional_annotation/main.nf @@ -1,4 +1,6 @@ // Import Annotator Subworfklows +include { BLASTP } from '../blastp/main' + workflow FUNCTIONAL_ANNOTATION { @@ -25,8 +27,13 @@ workflow FUNCTIONAL_ANNOTATION { .set { ch_multifasta } // - // SUBWORKFLOW: Annotator Name + // SUBWORKFLOW: BLASTP // + BLASTP ( + ch_multifasta, params.blastp_outfmt + ) + ch_versions = ch_versions.mix(BLASTP.out.versions.first()) + emit: // TODO nf-core: edit emitted channels