From 1412c1210fa52b32faf6d9307b2124213cd90831 Mon Sep 17 00:00:00 2001
From: Jessica Rowell <JRowell@cdc.gov>
Date: Tue, 13 May 2025 16:44:06 -0400
Subject: [PATCH 1/7] initial commit to add blastp annotator

---
 modules.json                                  |  10 ++
 modules/nf-core/blast/blastp/environment.yml  |   7 +
 modules/nf-core/blast/blastp/main.nf          |  82 ++++++++++
 modules/nf-core/blast/blastp/meta.yml         |  82 ++++++++++
 .../nf-core/blast/blastp/tests/main.nf.test   | 149 ++++++++++++++++++
 .../blast/blastp/tests/main.nf.test.snap      | 147 +++++++++++++++++
 .../blast/blastp/tests/nextflow.config        |   5 +
 .../nf-core/blast/makeblastdb/environment.yml |   7 +
 modules/nf-core/blast/makeblastdb/main.nf     |  64 ++++++++
 modules/nf-core/blast/makeblastdb/meta.yml    |  49 ++++++
 .../blast/makeblastdb/tests/main.nf.test      |  90 +++++++++++
 .../blast/makeblastdb/tests/main.nf.test.snap |  58 +++++++
 .../blast/makeblastdb/tests/nextflow.config   |   5 +
 subworkflows/local/blastp/main.nf             |  36 +++++
 subworkflows/local/blastp/meta.yml            |  51 ++++++
 subworkflows/local/blastp/tests/main.nf.test  |  45 ++++++
 16 files changed, 887 insertions(+)
 create mode 100644 modules/nf-core/blast/blastp/environment.yml
 create mode 100644 modules/nf-core/blast/blastp/main.nf
 create mode 100644 modules/nf-core/blast/blastp/meta.yml
 create mode 100644 modules/nf-core/blast/blastp/tests/main.nf.test
 create mode 100644 modules/nf-core/blast/blastp/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/blast/blastp/tests/nextflow.config
 create mode 100644 modules/nf-core/blast/makeblastdb/environment.yml
 create mode 100644 modules/nf-core/blast/makeblastdb/main.nf
 create mode 100644 modules/nf-core/blast/makeblastdb/meta.yml
 create mode 100644 modules/nf-core/blast/makeblastdb/tests/main.nf.test
 create mode 100644 modules/nf-core/blast/makeblastdb/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/blast/makeblastdb/tests/nextflow.config
 create mode 100644 subworkflows/local/blastp/main.nf
 create mode 100644 subworkflows/local/blastp/meta.yml
 create mode 100644 subworkflows/local/blastp/tests/main.nf.test

diff --git a/modules.json b/modules.json
index 58e1d30..3c98776 100644
--- a/modules.json
+++ b/modules.json
@@ -5,6 +5,16 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "blast/blastp": {
+                        "branch": "master",
+                        "git_sha": "be58de79943289acb561a6246d1da5f85555a224",
+                        "installed_by": ["modules"]
+                    },
+                    "blast/makeblastdb": {
+                        "branch": "master",
+                        "git_sha": "c7a7f06819adcf6f922e11b47f308b7c74484d67",
+                        "installed_by": ["modules"]
+                    },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad",
diff --git a/modules/nf-core/blast/blastp/environment.yml b/modules/nf-core/blast/blastp/environment.yml
new file mode 100644
index 0000000..8fb1f8a
--- /dev/null
+++ b/modules/nf-core/blast/blastp/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::blast=2.16.0
diff --git a/modules/nf-core/blast/blastp/main.nf b/modules/nf-core/blast/blastp/main.nf
new file mode 100644
index 0000000..2947ae9
--- /dev/null
+++ b/modules/nf-core/blast/blastp/main.nf
@@ -0,0 +1,82 @@
+process BLAST_BLASTP {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/5222a42b366a0468a4c795f5057c2b8cfe39489548f8bd807e8ac0f80069bad5/data':
+        'community.wave.seqera.io/library/blast:2.16.0--540f4b669b0a0ddd' }"
+
+    input:
+    tuple val(meta) , path(fasta)
+    tuple val(meta2), path(db)
+    val out_ext
+
+    output:
+    tuple val(meta), path("*.xml"), optional: true, emit: xml
+    tuple val(meta), path("*.tsv"), optional: true, emit: tsv
+    tuple val(meta), path("*.csv"), optional: true, emit: csv
+    path "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def is_compressed = fasta.getExtension() == "gz" ? true : false
+    def fasta_name = is_compressed ? fasta.getBaseName() : fasta
+    switch ( out_ext ) {
+        case "xml": outfmt = 5; break
+        case "tsv": outfmt = 6; break
+        case "csv": outfmt = 10; break
+        default:
+            outfmt = '6';
+            out_ext = 'tsv';
+            log.warn("Unknown output file format provided (${out_ext}): selecting BLAST default of tabular BLAST output (tsv)");
+            break
+    }
+
+    """
+    if [ "${is_compressed}" == "true" ]; then
+        gzip -c -d ${fasta} > ${fasta_name}
+    fi
+
+    DB=`find -L ./ -name "*.phr" | sed 's/\\.phr\$//'`
+    blastp \\
+        -query ${fasta_name} \\
+        -out ${prefix}.${out_ext} \\
+        -db \$DB \\
+        -num_threads ${task.cpus} \\
+        -outfmt ${outfmt} \\
+        ${args}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        blast: \$(blastp -version 2>&1 | sed 's/^.*blastp: //; s/ .*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    switch ( out_ext ) {
+        case "xml": outfmt = 5; break
+        case "tsv": outfmt = 6; break
+        case "csv": outfmt = 10; break
+        default:
+            outfmt = '6';
+            out_ext = 'tsv';
+            log.warn("Unknown output file format provided (${out_ext}): selecting BLAST default of tabular BLAST output (tsv)");
+            break
+    }
+
+    """
+    touch ${prefix}.${out_ext}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        blast: \$(blastp -version 2>&1 | sed 's/^.*blastp: //; s/ .*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/blast/blastp/meta.yml b/modules/nf-core/blast/blastp/meta.yml
new file mode 100644
index 0000000..27052a2
--- /dev/null
+++ b/modules/nf-core/blast/blastp/meta.yml
@@ -0,0 +1,82 @@
+name: blast_blastp
+description: BLASTP (Basic Local Alignment Search Tool- Protein) compares an amino
+  acid (protein) query sequence against a protein database
+keywords:
+  - fasta
+  - blast
+  - blastp
+  - protein
+tools:
+  - blast:
+      description: |
+        BLAST+ is a new suite of BLAST tools that utilizes the NCBI C++ Toolkit.
+      homepage: https://blast.ncbi.nlm.nih.gov/Blast.cgi
+      documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs
+      doi: 10.1016/S0022-2836(05)80360-2
+      licence: ["US-Government-Work"]
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'test', single_end:false ]`
+    - fasta:
+        type: file
+        description: Input fasta file containing queries sequences
+        pattern: "*.{fa,fasta,fa.gz,fasta.gz}"
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing db information
+          e.g. [ id:'test2', single_end:false ]
+    - db:
+        type: directory
+        description: Directory containing the blast database
+        pattern: "*"
+  - - out_ext:
+        type: string
+        description: |
+          Specify the type of output file to be generated. `xml` corresponds to BLAST xml format.
+          `tsv` corresponds to BLAST tabular format. `csv` corresponds to BLAST comma separated format.
+        pattern: "xml|tsv|csv"
+output:
+  - xml:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'test', single_end:false ]`
+      - "*.xml":
+          type: file
+          description: File containing blastp hits in XML format
+          pattern: "*.{xml}"
+  - tsv:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'test', single_end:false ]`
+      - "*.tsv":
+          type: file
+          description: File containing blastp hits in tabular format
+          pattern: "*.{tsv}"
+  - csv:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'test', single_end:false ]`
+      - "*.csv":
+          type: file
+          description: File containing blastp hits in comma separated format
+          pattern: "*.csv"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+authors:
+  - "@vagkaratzas"
+maintainers:
+  - "@vagkaratzas"
diff --git a/modules/nf-core/blast/blastp/tests/main.nf.test b/modules/nf-core/blast/blastp/tests/main.nf.test
new file mode 100644
index 0000000..eb69fb8
--- /dev/null
+++ b/modules/nf-core/blast/blastp/tests/main.nf.test
@@ -0,0 +1,149 @@
+nextflow_process {
+
+    name "Test Process BLAST_BLASTP"
+    script "../main.nf"
+    process "BLAST_BLASTP"
+    config "./nextflow.config"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "blast"
+    tag "blast/blastp"
+    tag "blast/makeblastdb"
+
+    setup {
+        run("BLAST_MAKEBLASTDB") {
+            script "../../makeblastdb/main.nf"
+            process {
+                """
+                input[0] = [ [id:'test2'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+                """
+            }
+        }
+    }
+
+    test("Should search for proteins against a blast db and return the default (tsv) file with hits") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+                input[1] = BLAST_MAKEBLASTDB.out.db
+                input[2] = ''
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.tsv).match("tsv_default") },
+                { assert snapshot(process.out.versions).match("versions_tsv_default") }
+            )
+        }
+
+    }
+
+    test("Should search for zipped proteins against a blast db and return the default (tsv) file with hits") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ]
+                input[1] = BLAST_MAKEBLASTDB.out.db
+                input[2] = ''
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.tsv).match("gz_tsv_default") },
+                { assert snapshot(process.out.versions).match("versions_gz_tsv_default") }
+            )
+        }
+
+    }
+
+    test("Should search for proteins against a blast db and return an xml file with hits") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+                input[1] = BLAST_MAKEBLASTDB.out.db
+                input[2] = 'xml'
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.xml).match("xml") },
+                { assert snapshot(process.out.versions).match("versions_xml") }
+            )
+        }
+
+    }
+
+    test("Should search for proteins against a blast db and return a tsv file with hits") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+                input[1] = BLAST_MAKEBLASTDB.out.db
+                input[2] = 'tsv'
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.tsv).match("tsv") },
+                { assert snapshot(process.out.versions).match("versions_tsv") }
+            )
+        }
+
+    }
+
+    test("Should search for proteins against a blast db and return a csv file with hits") {
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+                input[1] = BLAST_MAKEBLASTDB.out.db
+                input[2] = 'csv'
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.csv).match("csv") },
+                { assert snapshot(process.out.versions).match("versions_csv") }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/blast/blastp/tests/main.nf.test.snap b/modules/nf-core/blast/blastp/tests/main.nf.test.snap
new file mode 100644
index 0000000..732c191
--- /dev/null
+++ b/modules/nf-core/blast/blastp/tests/main.nf.test.snap
@@ -0,0 +1,147 @@
+{
+    "tsv_default": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.tsv:md5,dd58eb09f38e6f951aa00c133a18ef5b"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2023-11-07T11:45:50.969016761"
+    },
+    "versions_tsv_default": {
+        "content": [
+            [
+                "versions.yml:md5,e128101d410ec4eb0205f170435dbef9"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2025-04-01T11:32:56.182516991"
+    },
+    "xml": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.xml:md5,14b191547a4990bfda1fe836e8e61f5c"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2025-04-01T11:33:06.103607353"
+    },
+    "tsv": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.tsv:md5,dd58eb09f38e6f951aa00c133a18ef5b"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2023-11-07T11:45:50.969016761"
+    },
+    "versions_csv": {
+        "content": [
+            [
+                "versions.yml:md5,e128101d410ec4eb0205f170435dbef9"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2025-04-01T11:33:18.404632639"
+    },
+    "versions_xml": {
+        "content": [
+            [
+                "versions.yml:md5,e128101d410ec4eb0205f170435dbef9"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2025-04-01T11:33:06.107483927"
+    },
+    "versions_tsv": {
+        "content": [
+            [
+                "versions.yml:md5,e128101d410ec4eb0205f170435dbef9"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2025-04-01T11:33:10.656751678"
+    },
+    "csv": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.csv:md5,19214c68e1a208705dd4d6f0c5dea78b"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2023-11-07T11:45:57.39156832"
+    },
+    "gz_tsv_default": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.tsv:md5,dd58eb09f38e6f951aa00c133a18ef5b"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2023-11-07T11:45:57.39156832"
+    },
+    "versions_gz_tsv_default": {
+        "content": [
+            [
+                "versions.yml:md5,e128101d410ec4eb0205f170435dbef9"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2025-04-01T11:33:01.261812684"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/blast/blastp/tests/nextflow.config b/modules/nf-core/blast/blastp/tests/nextflow.config
new file mode 100644
index 0000000..e993c72
--- /dev/null
+++ b/modules/nf-core/blast/blastp/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: BLAST_MAKEBLASTDB {
+        ext.args = '-dbtype prot'
+    }
+}
diff --git a/modules/nf-core/blast/makeblastdb/environment.yml b/modules/nf-core/blast/makeblastdb/environment.yml
new file mode 100644
index 0000000..8fb1f8a
--- /dev/null
+++ b/modules/nf-core/blast/makeblastdb/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::blast=2.16.0
diff --git a/modules/nf-core/blast/makeblastdb/main.nf b/modules/nf-core/blast/makeblastdb/main.nf
new file mode 100644
index 0000000..796c7be
--- /dev/null
+++ b/modules/nf-core/blast/makeblastdb/main.nf
@@ -0,0 +1,64 @@
+process BLAST_MAKEBLASTDB {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/blast:2.16.0--h66d330f_5':
+        'biocontainers/blast:2.16.0--h66d330f_5' }"
+
+    input:
+    tuple val(meta), path(fasta)
+
+    output:
+    tuple val(meta), path("${prefix}"), emit: db
+    path "versions.yml"               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args           = task.ext.args ?: ''
+    prefix             = task.ext.prefix ?: "${meta.id}"
+    def is_compressed  = fasta.getExtension() == "gz" ? true : false
+    def fasta_name     = is_compressed ? fasta.getBaseName() : fasta
+    """
+    if [ "${is_compressed}" == "true" ]; then
+        gzip -c -d ${fasta} > ${fasta_name}
+    fi
+
+    makeblastdb \\
+        -in ${fasta_name} \\
+        -out ${prefix}/${fasta_name} \\
+        ${args}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        blast: \$(makeblastdb -version 2>&1 | sed 's/^.*makeblastdb: //; s/ .*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def args           = task.ext.args ?: ''
+    prefix             = task.ext.prefix ?: "${meta.id}"
+    def is_compressed  = fasta.getExtension() == "gz" ? true : false
+    def fasta_name     = is_compressed ? fasta.getBaseName() : fasta
+    """
+    touch ${fasta_name}.fasta
+    touch ${fasta_name}.fasta.ndb
+    touch ${fasta_name}.fasta.nhr
+    touch ${fasta_name}.fasta.nin
+    touch ${fasta_name}.fasta.njs
+    touch ${fasta_name}.fasta.not
+    touch ${fasta_name}.fasta.nsq
+    touch ${fasta_name}.fasta.ntf
+    touch ${fasta_name}.fasta.nto
+    mkdir ${prefix}
+    mv ${fasta_name}* ${prefix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        blast: \$(makeblastdb -version 2>&1 | sed 's/^.*makeblastdb: //; s/ .*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/blast/makeblastdb/meta.yml b/modules/nf-core/blast/makeblastdb/meta.yml
new file mode 100644
index 0000000..3b50654
--- /dev/null
+++ b/modules/nf-core/blast/makeblastdb/meta.yml
@@ -0,0 +1,49 @@
+name: blast_makeblastdb
+description: Builds a BLAST database
+keywords:
+  - fasta
+  - blast
+  - database
+tools:
+  - blast:
+      description: |
+        BLAST finds regions of similarity between biological sequences.
+      homepage: https://blast.ncbi.nlm.nih.gov/Blast.cgi
+      documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs
+      doi: 10.1016/S0022-2836(05)80360-2
+      licence: ["US-Government-Work"]
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - fasta:
+        type: file
+        description: Input fasta file
+        pattern: "*.{fa,fasta,fa.gz,fasta.gz}"
+output:
+  - db:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - ${prefix}:
+          type: directory
+          description: Output directory containing blast database files
+          pattern: "*"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+authors:
+  - "@joseespinosa"
+  - "@drpatelh"
+maintainers:
+  - "@joseespinosa"
+  - "@drpatelh"
+  - "@vagkaratzas"
+  - "@DLBPointon"
diff --git a/modules/nf-core/blast/makeblastdb/tests/main.nf.test b/modules/nf-core/blast/makeblastdb/tests/main.nf.test
new file mode 100644
index 0000000..b822689
--- /dev/null
+++ b/modules/nf-core/blast/makeblastdb/tests/main.nf.test
@@ -0,0 +1,90 @@
+nextflow_process {
+
+    name "Test Process BLAST_MAKEBLASTDB"
+    script "../main.nf"
+    process "BLAST_MAKEBLASTDB"
+    config "./nextflow.config"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "blast"
+    tag "blast/makeblastdb"
+
+    test("Should build a blast db folder from a fasta file") {
+
+        when {
+            process {
+                """
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                {
+                    assert process.out.db.size() == 1
+
+                    def all_files = ( new File(process.out.db[0][1]) ).listFiles()
+                    def stable_file_names = [
+                        'genome.fasta.ndb',
+                        'genome.fasta.nhr',
+                        'genome.fasta.not',
+                        'genome.fasta.nsq',
+                        'genome.fasta.ntf',
+                        'genome.fasta.nto'
+                    ]
+
+                    def stable_files = all_files.findAll { it.name in stable_file_names }.toSorted()
+
+                    assert snapshot(
+                        all_files.collect { it.name }.toSorted(),
+                        stable_files,
+                        process.out.versions[0]
+                    ).match()
+                }
+            )
+        }
+
+    }
+
+    test("Should build a blast db folder from a zipped fasta file") {
+
+        when {
+            process {
+                """
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                {
+                    assert process.out.db.size() == 1
+
+                    def all_files = ( new File(process.out.db[0][1]) ).listFiles()
+                    def stable_file_names = [
+                        'genome.fasta.ndb',
+                        'genome.fasta.nhr',
+                        'genome.fasta.not',
+                        'genome.fasta.nsq',
+                        'genome.fasta.ntf',
+                        'genome.fasta.nto'
+                    ]
+
+                    def stable_files = all_files.findAll { it.name in stable_file_names }.toSorted()
+
+                    assert snapshot(
+                        all_files.collect { it.name }.toSorted(),
+                        stable_files,
+                        process.out.versions[0]
+                    ).match()
+                }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/blast/makeblastdb/tests/main.nf.test.snap b/modules/nf-core/blast/makeblastdb/tests/main.nf.test.snap
new file mode 100644
index 0000000..8154acb
--- /dev/null
+++ b/modules/nf-core/blast/makeblastdb/tests/main.nf.test.snap
@@ -0,0 +1,58 @@
+{
+    "Should build a blast db folder from a fasta file": {
+        "content": [
+            [
+                "genome.fasta.ndb",
+                "genome.fasta.nhr",
+                "genome.fasta.nin",
+                "genome.fasta.njs",
+                "genome.fasta.not",
+                "genome.fasta.nsq",
+                "genome.fasta.ntf",
+                "genome.fasta.nto"
+            ],
+            [
+                "genome.fasta.ndb:md5,0d553c830656469211de113c5022f06d",
+                "genome.fasta.nhr:md5,f4b4ddb034fd3dd7b25c89e9d50c004e",
+                "genome.fasta.not:md5,1e53e9d08f1d23af0299cfa87478a7bb",
+                "genome.fasta.nsq:md5,982cbc7d9e38743b9b1037588862b9da",
+                "genome.fasta.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb",
+                "genome.fasta.nto:md5,33cdeccccebe80329f1fdbee7f5874cb"
+            ],
+            "versions.yml:md5,91a8afa89354bef8a3c127cafaf1f46d"
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2025-04-12T09:03:14.830721389"
+    },
+    "Should build a blast db folder from a zipped fasta file": {
+        "content": [
+            [
+                "genome.fasta.ndb",
+                "genome.fasta.nhr",
+                "genome.fasta.nin",
+                "genome.fasta.njs",
+                "genome.fasta.not",
+                "genome.fasta.nsq",
+                "genome.fasta.ntf",
+                "genome.fasta.nto"
+            ],
+            [
+                "genome.fasta.ndb:md5,0d553c830656469211de113c5022f06d",
+                "genome.fasta.nhr:md5,f4b4ddb034fd3dd7b25c89e9d50c004e",
+                "genome.fasta.not:md5,1e53e9d08f1d23af0299cfa87478a7bb",
+                "genome.fasta.nsq:md5,982cbc7d9e38743b9b1037588862b9da",
+                "genome.fasta.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb",
+                "genome.fasta.nto:md5,33cdeccccebe80329f1fdbee7f5874cb"
+            ],
+            "versions.yml:md5,91a8afa89354bef8a3c127cafaf1f46d"
+        ],
+        "meta": {
+            "nf-test": "0.9.2",
+            "nextflow": "24.10.5"
+        },
+        "timestamp": "2025-04-12T09:03:23.653118873"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/blast/makeblastdb/tests/nextflow.config b/modules/nf-core/blast/makeblastdb/tests/nextflow.config
new file mode 100644
index 0000000..0899289
--- /dev/null
+++ b/modules/nf-core/blast/makeblastdb/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: BLAST_MAKEBLASTDB {
+        ext.args = '-dbtype nucl'
+    }
+}
diff --git a/subworkflows/local/blastp/main.nf b/subworkflows/local/blastp/main.nf
new file mode 100644
index 0000000..0e348a5
--- /dev/null
+++ b/subworkflows/local/blastp/main.nf
@@ -0,0 +1,36 @@
+// TODO nf-core: If in doubt look at other nf-core/subworkflows to see how we are doing things! :)
+//               https://github.com/nf-core/modules/tree/master/subworkflows
+//               You can also ask for help via your pull request or on the #subworkflows channel on the nf-core Slack workspace:
+//               https://nf-co.re/join
+// TODO nf-core: A subworkflow SHOULD import at least two modules
+
+include { SAMTOOLS_SORT      } from '../../../modules/nf-core/samtools/sort/main'
+include { SAMTOOLS_INDEX     } from '../../../modules/nf-core/samtools/index/main'
+
+workflow BLAST {
+
+    take:
+    // TODO nf-core: edit input (take) channels
+    ch_bam // channel: [ val(meta), [ bam ] ]
+
+    main:
+
+    ch_versions = Channel.empty()
+
+    // TODO nf-core: substitute modules here for the modules of your subworkflow
+
+    SAMTOOLS_SORT ( ch_bam )
+    ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first())
+
+    SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam )
+    ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
+
+    emit:
+    // TODO nf-core: edit emitted channels
+    bam      = SAMTOOLS_SORT.out.bam           // channel: [ val(meta), [ bam ] ]
+    bai      = SAMTOOLS_INDEX.out.bai          // channel: [ val(meta), [ bai ] ]
+    csi      = SAMTOOLS_INDEX.out.csi          // channel: [ val(meta), [ csi ] ]
+
+    versions = ch_versions                     // channel: [ versions.yml ]
+}
+
diff --git a/subworkflows/local/blastp/meta.yml b/subworkflows/local/blastp/meta.yml
new file mode 100644
index 0000000..f7f593f
--- /dev/null
+++ b/subworkflows/local/blastp/meta.yml
@@ -0,0 +1,51 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "blast"
+## TODO nf-core: Add a description of the subworkflow and list keywords
+description: Sort SAM/BAM/CRAM file
+keywords:
+  - sort
+  - bam
+  - sam
+  - cram
+## TODO nf-core: Add a list of the modules and/or subworkflows used in the subworkflow
+components:
+  - samtools/sort
+  - samtools/index
+## TODO nf-core: List all of the channels used as input with a description and their structure
+input:
+  - ch_bam:
+      type: file
+      description: |
+        The input channel containing the BAM/CRAM/SAM files
+        Structure: [ val(meta), path(bam) ]
+      pattern: "*.{bam/cram/sam}"
+## TODO nf-core: List all of the channels used as output with a descriptions and their structure
+output:
+  - bam:
+      type: file
+      description: |
+        Channel containing BAM files
+        Structure: [ val(meta), path(bam) ]
+      pattern: "*.bam"
+  - bai:
+      type: file
+      description: |
+        Channel containing indexed BAM (BAI) files
+        Structure: [ val(meta), path(bai) ]
+      pattern: "*.bai"
+  - csi:
+      type: file
+      description: |
+        Channel containing CSI files
+        Structure: [ val(meta), path(csi) ]
+      pattern: "*.csi"
+  - versions:
+      type: file
+      description: |
+        File containing software versions
+        Structure: [ path(versions.yml) ]
+      pattern: "versions.yml"
+authors:
+  - "@jessicarowell"
+maintainers:
+  - "@jessicarowell"
diff --git a/subworkflows/local/blastp/tests/main.nf.test b/subworkflows/local/blastp/tests/main.nf.test
new file mode 100644
index 0000000..01bdc3f
--- /dev/null
+++ b/subworkflows/local/blastp/tests/main.nf.test
@@ -0,0 +1,45 @@
+// TODO nf-core: Once you have added the required tests, please run the following command to build this file:
+// nf-core subworkflows test blast
+nextflow_workflow {
+
+    name "Test Subworkflow BLAST"
+    script "../main.nf"
+    workflow "BLAST"
+
+    tag "subworkflows"
+    tag "subworkflows_"
+    tag "subworkflows/blast"
+    // TODO nf-core: Add tags for all modules used within this subworkflow. Example:
+    tag "samtools"
+    tag "samtools/sort"
+    tag "samtools/index"
+
+
+    // TODO nf-core: Change the test name preferably indicating the test-data and file-format used
+    test("sarscov2 - bam - single_end") {
+
+        when {
+            workflow {
+                """
+                // TODO nf-core: define inputs of the workflow here. Example:
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                ]
+                input[1] = [
+                    [ id:'genome' ],
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success},
+                { assert snapshot(workflow.out).match()}
+                //TODO nf-core: Add all required assertions to verify the test output.
+            )
+        }
+    }
+}

From 9b096d389174adf3e233ac6d780fd0e4200e0d95 Mon Sep 17 00:00:00 2001
From: Jessica Rowell <JRowell@cdc.gov>
Date: Tue, 13 May 2025 16:52:16 -0400
Subject: [PATCH 2/7] trailing whitespace: functional_annotation subworkflow

---
 subworkflows/local/functional_annotation/main.nf | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/subworkflows/local/functional_annotation/main.nf b/subworkflows/local/functional_annotation/main.nf
index 6ba000a..83068e7 100644
--- a/subworkflows/local/functional_annotation/main.nf
+++ b/subworkflows/local/functional_annotation/main.nf
@@ -1,4 +1,6 @@
-// Import Annotator Subworfklows 
+// Import Annotator Subworfklows
+include { BLASTP                } from '../blastp/main'
+
 
 workflow FUNCTIONAL_ANNOTATION {
 
@@ -25,8 +27,13 @@ workflow FUNCTIONAL_ANNOTATION {
         .set { ch_multifasta }
 
     //
-    // SUBWORKFLOW: Annotator Name
+    // SUBWORKFLOW: BLASTP
     //
+    BLASTP (
+    ch_multifasta
+    )
+    ch_versions = ch_versions.mix(BLASTP.out.versions.first())
+
 
     emit:
     // TODO nf-core: edit emitted channels

From 511e5ef85c0e4984fed90e7b7f19bcd62f253330 Mon Sep 17 00:00:00 2001
From: Jessica Rowell <JRowell@cdc.gov>
Date: Wed, 14 May 2025 12:22:42 -0400
Subject: [PATCH 3/7] adding nf-tests and fixing subworkflow

---
 conf/test.config                             |   3 +-
 nextflow.config                              |   5 +
 subworkflows/local/blastp/main.nf            |  27 ++--
 subworkflows/local/blastp/tests/main.nf.test | 145 ++++++++++++++++---
 4 files changed, 143 insertions(+), 37 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 9de8668..d87ff22 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -27,5 +27,6 @@ params {
     // TODO nf-core: Give any required params for the test so that command line flags are not needed
     // From: https://github.com/nf-core/proteinfold/blob/1.1.1/conf/test.config
     // Example: https://github.com/nf-core/test-datasets/blob/proteinfold/testdata/samplesheet/v1.2/samplesheet.csv
-    input           = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv'
+    input                       = params.pipelines_testdata_base_path + 'proteinfold/testdata/samplesheet/v1.2/samplesheet.csv'
+    modules_testdata_base_path  = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/'
 }
diff --git a/nextflow.config b/nextflow.config
index 9b72f6d..01e0ac8 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -20,6 +20,10 @@ params {
     max_multiqc_email_size     = '25.MB'
     multiqc_methods_description = null
 
+    // Blastp options
+    blast_ref_fasta              = null
+    blastp_outfmt                = 'tsv' // tsv, csv, xml
+
     // Boilerplate options
     outdir                       = null
     publish_dir_mode             = 'copy'
@@ -33,6 +37,7 @@ params {
     show_hidden                  = false
     version                      = false
     pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/'
+    modules_testdata_base_path   = null
     trace_report_suffix          = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options
     config_profile_name        = null
     config_profile_description = null
diff --git a/subworkflows/local/blastp/main.nf b/subworkflows/local/blastp/main.nf
index 0e348a5..0336837 100644
--- a/subworkflows/local/blastp/main.nf
+++ b/subworkflows/local/blastp/main.nf
@@ -4,33 +4,32 @@
 //               https://nf-co.re/join
 // TODO nf-core: A subworkflow SHOULD import at least two modules
 
-include { SAMTOOLS_SORT      } from '../../../modules/nf-core/samtools/sort/main'
-include { SAMTOOLS_INDEX     } from '../../../modules/nf-core/samtools/index/main'
+include { MAKEBLASTDB      } from '../../../modules/nf-core/blast/makeblastdb/main'
+include { BLASTP           } from '../../../modules/nf-core/blast/blastp/main'
 
 workflow BLAST {
 
     take:
-    // TODO nf-core: edit input (take) channels
-    ch_bam // channel: [ val(meta), [ bam ] ]
+    ch_fasta // channel: [ val(meta), [ fasta ] ]
+    blastp_outfmt
 
     main:
 
     ch_versions = Channel.empty()
 
     // TODO nf-core: substitute modules here for the modules of your subworkflow
+    makeblastdb_input = file("${params.blast_ref_fasta}")
+    MAKEBLASTDB ( [ [ id: makeblastdb_input.getSimpleName() ] , makeblastdb_input] )
+    ch_versions = ch_versions.mix(MAKEBLASTDB.out.versions.first())
 
-    SAMTOOLS_SORT ( ch_bam )
-    ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first())
-
-    SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam )
-    ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
+    BLASTP ( ch_fasta, MAKEBLASTDB.out.db, blastp_outfmt)
+    ch_versions = ch_versions.mix(BLASTP.out.versions.first())
 
     emit:
-    // TODO nf-core: edit emitted channels
-    bam      = SAMTOOLS_SORT.out.bam           // channel: [ val(meta), [ bam ] ]
-    bai      = SAMTOOLS_INDEX.out.bai          // channel: [ val(meta), [ bai ] ]
-    csi      = SAMTOOLS_INDEX.out.csi          // channel: [ val(meta), [ csi ] ]
+    xml      = BLASTP.out.xml           // channel: [ val(meta), [ xml ] ]
+    csv      = BLASTP.out.csv           // channel: [ val(meta), [ csv ] ]
+    tsv      = BLASTP.out.tsv           // channel: [ val(meta), [ tsv ] ]
 
-    versions = ch_versions                     // channel: [ versions.yml ]
+    versions = ch_versions              // channel: [ versions.yml ]
 }
 
diff --git a/subworkflows/local/blastp/tests/main.nf.test b/subworkflows/local/blastp/tests/main.nf.test
index 01bdc3f..29bcff7 100644
--- a/subworkflows/local/blastp/tests/main.nf.test
+++ b/subworkflows/local/blastp/tests/main.nf.test
@@ -1,44 +1,145 @@
 // TODO nf-core: Once you have added the required tests, please run the following command to build this file:
-// nf-core subworkflows test blast
+// nf-core subworkflows test blastp
 nextflow_workflow {
 
-    name "Test Subworkflow BLAST"
+    name "Test Subworkflow BLASTP"
     script "../main.nf"
-    workflow "BLAST"
+    workflow "BLASTP"
 
     tag "subworkflows"
     tag "subworkflows_"
     tag "subworkflows/blast"
-    // TODO nf-core: Add tags for all modules used within this subworkflow. Example:
-    tag "samtools"
-    tag "samtools/sort"
-    tag "samtools/index"
+    // all modules used within this subworkflow.
+    tag "blast"
+    tag "blast/makeblastdb"
+    tag "blast/blastp"
 
 
-    // TODO nf-core: Change the test name preferably indicating the test-data and file-format used
-    test("sarscov2 - bam - single_end") {
+    test("Should search for proteins against a blast db and return the default (tsv) file with hits") {
 
         when {
-            workflow {
+            params {
+                blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
+            }
+            process {
+                """
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+                input[1] = ''
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.tsv,
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("Should search for zipped proteins against a blast db and return the default (tsv) file with hits") {
+
+        when {
+            params {
+                blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
+            }
+            process {
+                """
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ]
+                input[1] = ''
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.tsv,
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("Should search for proteins against a blast db and return an xml file with hits") {
+
+        when {
+            params {
+                blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
+            }
+            process {
+                """
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+                input[1] = 'xml'
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.xml,
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("Should search for proteins against a blast db and return a tsv file with hits") {
+
+        when {
+            params {
+                blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
+            }
+            process {
+                """
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+                input[1] = 'tsv'
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.tsv,
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("Should search for proteins against a blast db and return a csv file with hits") {
+
+        when {
+            params {
+                blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
+            }
+            process {
                 """
-                // TODO nf-core: define inputs of the workflow here. Example:
-                input[0] = [
-                    [ id:'test', single_end:false ], // meta map
-                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
-                ]
-                input[1] = [
-                    [ id:'genome' ],
-                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
-                ]
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+                input[1] = 'csv'
                 """
             }
         }
 
         then {
             assertAll(
-                { assert workflow.success},
-                { assert snapshot(workflow.out).match()}
-                //TODO nf-core: Add all required assertions to verify the test output.
+                { assert process.success },
+                { assert snapshot(
+                    process.out.csv,
+                    process.out.versions
+                ).match() }
             )
         }
     }

From ae0d27679921baf60a78abb178efc81856bbc029 Mon Sep 17 00:00:00 2001
From: Jessica Rowell <JRowell@cdc.gov>
Date: Wed, 14 May 2025 16:30:58 -0400
Subject: [PATCH 4/7] modifications to get nf-test working for blast annotator

---
 subworkflows/local/blastp/main.nf             |  21 +-
 subworkflows/local/blastp/tests/main.nf.test  | 218 +++++++++---------
 .../local/functional_annotation/main.nf       |   2 +-
 3 files changed, 122 insertions(+), 119 deletions(-)

diff --git a/subworkflows/local/blastp/main.nf b/subworkflows/local/blastp/main.nf
index 0336837..72a1455 100644
--- a/subworkflows/local/blastp/main.nf
+++ b/subworkflows/local/blastp/main.nf
@@ -4,10 +4,10 @@
 //               https://nf-co.re/join
 // TODO nf-core: A subworkflow SHOULD import at least two modules
 
-include { MAKEBLASTDB      } from '../../../modules/nf-core/blast/makeblastdb/main'
-include { BLASTP           } from '../../../modules/nf-core/blast/blastp/main'
+include { BLAST_MAKEBLASTDB       } from '../../../modules/nf-core/blast/makeblastdb/main'
+include { BLAST_BLASTP            } from '../../../modules/nf-core/blast/blastp/main'
 
-workflow BLAST {
+workflow BLASTP {
 
     take:
     ch_fasta // channel: [ val(meta), [ fasta ] ]
@@ -19,16 +19,17 @@ workflow BLAST {
 
     // TODO nf-core: substitute modules here for the modules of your subworkflow
     makeblastdb_input = file("${params.blast_ref_fasta}")
-    MAKEBLASTDB ( [ [ id: makeblastdb_input.getSimpleName() ] , makeblastdb_input] )
-    ch_versions = ch_versions.mix(MAKEBLASTDB.out.versions.first())
+    BLAST_MAKEBLASTDB ( [ [ id: makeblastdb_input.getSimpleName() ] , makeblastdb_input] )
+    ch_versions = ch_versions.mix(BLAST_MAKEBLASTDB.out.versions)
+
+    BLAST_BLASTP ( ch_fasta, BLAST_MAKEBLASTDB.out.db, blastp_outfmt)
+    ch_versions = ch_versions.mix(BLAST_BLASTP.out.versions)
 
-    BLASTP ( ch_fasta, MAKEBLASTDB.out.db, blastp_outfmt)
-    ch_versions = ch_versions.mix(BLASTP.out.versions.first())
 
     emit:
-    xml      = BLASTP.out.xml           // channel: [ val(meta), [ xml ] ]
-    csv      = BLASTP.out.csv           // channel: [ val(meta), [ csv ] ]
-    tsv      = BLASTP.out.tsv           // channel: [ val(meta), [ tsv ] ]
+    xml      = BLAST_BLASTP.out.xml           // channel: [ val(meta), [ xml ] ]
+    csv      = BLAST_BLASTP.out.csv           // channel: [ val(meta), [ csv ] ]
+    tsv      = BLAST_BLASTP.out.tsv           // channel: [ val(meta), [ tsv ] ]
 
     versions = ch_versions              // channel: [ versions.yml ]
 }
diff --git a/subworkflows/local/blastp/tests/main.nf.test b/subworkflows/local/blastp/tests/main.nf.test
index 29bcff7..9743538 100644
--- a/subworkflows/local/blastp/tests/main.nf.test
+++ b/subworkflows/local/blastp/tests/main.nf.test
@@ -19,11 +19,13 @@ nextflow_workflow {
 
         when {
             params {
-                blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
+                //blast_ref_fasta = 'proteome.fasta'
+                blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta'
+                //blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
             }
-            process {
+            workflow {
                 """
-                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+                input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ]
                 input[1] = ''
                 """
             }
@@ -31,116 +33,116 @@ nextflow_workflow {
 
         then {
             assertAll(
-                { assert process.success },
+                { assert workflow.success },
                 { assert snapshot(
-                    process.out.tsv,
-                    process.out.versions
+                    workflow.out.tsv,
+                    workflow.out.versions
                 ).match() }
             )
         }
 
     }
 
-    test("Should search for zipped proteins against a blast db and return the default (tsv) file with hits") {
-
-        when {
-            params {
-                blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
-            }
-            process {
-                """
-                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ]
-                input[1] = ''
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(
-                    process.out.tsv,
-                    process.out.versions
-                ).match() }
-            )
-        }
-
-    }
-
-    test("Should search for proteins against a blast db and return an xml file with hits") {
-
-        when {
-            params {
-                blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
-            }
-            process {
-                """
-                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
-                input[1] = 'xml'
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(
-                    process.out.xml,
-                    process.out.versions
-                ).match() }
-            )
-        }
-
-    }
-
-    test("Should search for proteins against a blast db and return a tsv file with hits") {
-
-        when {
-            params {
-                blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
-            }
-            process {
-                """
-                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
-                input[1] = 'tsv'
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(
-                    process.out.tsv,
-                    process.out.versions
-                ).match() }
-            )
-        }
-
-    }
-
-    test("Should search for proteins against a blast db and return a csv file with hits") {
-
-        when {
-            params {
-                blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
-            }
-            process {
-                """
-                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
-                input[1] = 'csv'
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert process.success },
-                { assert snapshot(
-                    process.out.csv,
-                    process.out.versions
-                ).match() }
-            )
-        }
-    }
+    // test("Should search for zipped proteins against a blast db and return the default (tsv) file with hits") {
+
+    //     when {
+    //         params {
+    //             blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
+    //         }
+    //         workflow {
+    //             """
+    //             input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ]
+    //             input[1] = ''
+    //             """
+    //         }
+    //     }
+
+    //     then {
+    //         assertAll(
+    //             { assert workflow.success },
+    //             { assert snapshot(
+    //                 workflow.out.tsv,
+    //                 workflow.out.versions
+    //             ).match() }
+    //         )
+    //     }
+
+    // }
+
+    // test("Should search for proteins against a blast db and return an xml file with hits") {
+
+    //     when {
+    //         params {
+    //             blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
+    //         }
+    //         workflow {
+    //             """
+    //             input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ]
+    //             input[1] = 'xml'
+    //             """
+    //         }
+    //     }
+
+    //     then {
+    //         assertAll(
+    //             { assert workflow.success },
+    //             { assert snapshot(
+    //                 workflow.out.xml,
+    //                 workflow.out.versions
+    //             ).match() }
+    //         )
+    //     }
+
+    // }
+
+    // test("Should search for proteins against a blast db and return a tsv file with hits") {
+
+    //     when {
+    //         params {
+    //             blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
+    //         }
+    //         workflow {
+    //             """
+    //             input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ]
+    //             input[1] = 'tsv'
+    //             """
+    //         }
+    //     }
+
+    //     then {
+    //         assertAll(
+    //             { assert workflow.success },
+    //             { assert snapshot(
+    //                 workflow.out.tsv,
+    //                 workflow.out.versions
+    //             ).match() }
+    //         )
+    //     }
+
+    // }
+
+    // test("Should search for proteins against a blast db and return a csv file with hits") {
+
+    //     when {
+    //         params {
+    //             blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
+    //         }
+    //         workflow {
+    //             """
+    //             input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ]
+    //             input[1] = 'csv'
+    //             """
+    //         }
+    //     }
+
+    //     then {
+    //         assertAll(
+    //             { assert workflow.success },
+    //             { assert snapshot(
+    //                 workflow.out.csv,
+    //                 workflow.out.versions
+    //             ).match() }
+    //         )
+    //     }
+    // }
 }
diff --git a/subworkflows/local/functional_annotation/main.nf b/subworkflows/local/functional_annotation/main.nf
index 83068e7..d9ba434 100644
--- a/subworkflows/local/functional_annotation/main.nf
+++ b/subworkflows/local/functional_annotation/main.nf
@@ -30,7 +30,7 @@ workflow FUNCTIONAL_ANNOTATION {
     // SUBWORKFLOW: BLASTP
     //
     BLASTP (
-    ch_multifasta
+    ch_multifasta, params.blastp_outfmt
     )
     ch_versions = ch_versions.mix(BLASTP.out.versions.first())
 

From e836af0700490cb4ac056a496317ebd3e68940db Mon Sep 17 00:00:00 2001
From: Jessica Rowell <JRowell@cdc.gov>
Date: Wed, 14 May 2025 16:32:53 -0400
Subject: [PATCH 5/7] add default dbtype for makeblastdb

---
 conf/modules.config | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/conf/modules.config b/conf/modules.config
index 94a2a32..0e9623c 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -30,4 +30,8 @@ process {
     withName: SEQKIT_STATS {
         ext.args = ' ' // turn off --all default argument
     }
+
+    withName: BLAST_MAKEBLASTDB {
+        ext.args = '-dbtype prot'
+    }
 }

From e59fdf7011f060b69d3c9b6e02bb6bd3345fe1ef Mon Sep 17 00:00:00 2001
From: Jessica Rowell <JRowell@cdc.gov>
Date: Wed, 14 May 2025 17:14:06 -0400
Subject: [PATCH 6/7] correct nf tests

---
 subworkflows/local/blastp/tests/main.nf.test | 205 +++++++++----------
 1 file changed, 102 insertions(+), 103 deletions(-)

diff --git a/subworkflows/local/blastp/tests/main.nf.test b/subworkflows/local/blastp/tests/main.nf.test
index 9743538..2e9bb8b 100644
--- a/subworkflows/local/blastp/tests/main.nf.test
+++ b/subworkflows/local/blastp/tests/main.nf.test
@@ -19,7 +19,6 @@ nextflow_workflow {
 
         when {
             params {
-                //blast_ref_fasta = 'proteome.fasta'
                 blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta'
                 //blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
             }
@@ -43,106 +42,106 @@ nextflow_workflow {
 
     }
 
-    // test("Should search for zipped proteins against a blast db and return the default (tsv) file with hits") {
-
-    //     when {
-    //         params {
-    //             blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
-    //         }
-    //         workflow {
-    //             """
-    //             input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ]
-    //             input[1] = ''
-    //             """
-    //         }
-    //     }
-
-    //     then {
-    //         assertAll(
-    //             { assert workflow.success },
-    //             { assert snapshot(
-    //                 workflow.out.tsv,
-    //                 workflow.out.versions
-    //             ).match() }
-    //         )
-    //     }
-
-    // }
-
-    // test("Should search for proteins against a blast db and return an xml file with hits") {
-
-    //     when {
-    //         params {
-    //             blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
-    //         }
-    //         workflow {
-    //             """
-    //             input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ]
-    //             input[1] = 'xml'
-    //             """
-    //         }
-    //     }
-
-    //     then {
-    //         assertAll(
-    //             { assert workflow.success },
-    //             { assert snapshot(
-    //                 workflow.out.xml,
-    //                 workflow.out.versions
-    //             ).match() }
-    //         )
-    //     }
-
-    // }
-
-    // test("Should search for proteins against a blast db and return a tsv file with hits") {
-
-    //     when {
-    //         params {
-    //             blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
-    //         }
-    //         workflow {
-    //             """
-    //             input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ]
-    //             input[1] = 'tsv'
-    //             """
-    //         }
-    //     }
-
-    //     then {
-    //         assertAll(
-    //             { assert workflow.success },
-    //             { assert snapshot(
-    //                 workflow.out.tsv,
-    //                 workflow.out.versions
-    //             ).match() }
-    //         )
-    //     }
-
-    // }
-
-    // test("Should search for proteins against a blast db and return a csv file with hits") {
-
-    //     when {
-    //         params {
-    //             blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
-    //         }
-    //         workflow {
-    //             """
-    //             input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ]
-    //             input[1] = 'csv'
-    //             """
-    //         }
-    //     }
-
-    //     then {
-    //         assertAll(
-    //             { assert workflow.success },
-    //             { assert snapshot(
-    //                 workflow.out.csv,
-    //                 workflow.out.versions
-    //             ).match() }
-    //         )
-    //     }
-    // }
+    test("Should search for zipped proteins against a blast db and return the default (tsv) file with hits") {
+
+        when {
+            params {
+                blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta'
+                //blast_ref_fasta = params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta'
+            }
+            workflow {
+                """
+                input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ]
+                input[1] = ''
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(
+                    workflow.out.tsv,
+                    workflow.out.versions
+                ).match() }
+            )
+        }
+    }
+
+    test("Should search for proteins against a blast db and return an xml file with hits") {
+
+        when {
+            params {
+                blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta'
+            }
+            workflow {
+                """
+                input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ]
+                input[1] = 'xml'
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(
+                    workflow.out.xml,
+                    workflow.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("Should search for proteins against a blast db and return a tsv file with hits") {
+
+        when {
+            params {
+                blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta'
+            }
+            workflow {
+                """
+                input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ]
+                input[1] = 'tsv'
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(
+                    workflow.out.tsv,
+                    workflow.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("Should search for proteins against a blast db and return a csv file with hits") {
+
+        when {
+            params {
+                blast_ref_fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/proteome.fasta'
+            }
+            workflow {
+                """
+                input[0] = [ [id:'test'], file(params.blast_ref_fasta, checkIfExists: true) ]
+                input[1] = 'csv'
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(
+                    workflow.out.csv,
+                    workflow.out.versions
+                ).match() }
+            )
+        }
+    }
 }

From 691eb36580d7381bd4ae5b0fd4a663bdcf1e4448 Mon Sep 17 00:00:00 2001
From: Jessica Rowell <JRowell@cdc.gov>
Date: Wed, 14 May 2025 17:54:58 -0400
Subject: [PATCH 7/7] add blast params to nextflow schema

---
 nextflow_schema.json | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 9185010..a4c2b31 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -199,5 +199,20 @@
         {
             "$ref": "#/$defs/generic_options"
         }
-    ]
+    ],
+    "properties": {
+        "blast_ref_fasta": {
+            "type": "string",
+            "help_text": "Reference protein database to BLAST against"
+        },
+        "blastp_outfmt": {
+            "type": "string",
+            "default": "tsv",
+            "pattern": "^\\S+\\.(xml|tsv|csv)$",
+            "help_text": "Desired format for BLAST output (tsv, csv, xml)"
+        },
+        "modules_testdata_base_path": {
+            "type": "string"
+        }
+    }
 }