diff --git a/CHANGELOG.md b/CHANGELOG.md
index a02b6f6..52b0828 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,11 @@ Initial release of nf-core/proteinannotator, created with the [nf-core](https://
### `Added`
+- Add [eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper) module with automatic database download capability
+ - New `EGGNOG` subworkflow for functional annotation
+ - New `EGGNOG_DOWNLOAD_DB` local module for automatic database downloads
+ - Parameters: `--skip_eggnog`, `--eggnog_data_dir`, `--eggnog_db`, `--eggnog_diamond_db`
+ - Comprehensive test suite for database download and annotation workflows
- [[PR #52](https://github.com/nf-core/proteinannotator/pull/52)] Add option to turn off InterProScan for testing
- [[PR #51](https://github.com/nf-core/proteinannotator/pull/51)] Update to nf-core/tools v3.3.1
- [[PR #47](https://github.com/nf-core/proteinannotator/pull/47)] Update metromap with more tools added from [May 2025 Hackathon](https://nf-co.re/events/2025/hackathon-boston)
diff --git a/CITATIONS.md b/CITATIONS.md
index 55763f9..21c4eee 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -16,7 +16,11 @@
- [InterProScan](https://academic.oup.com/bioinformatics/article/17/9/847/206564)
-> Zdobnov, Evgeni M., and Rolf Apweiler. “InterProScan – an Integration Platform for the Signature-Recognition Methods in InterPro.” Bioinformatics 17, no. 9 (September 1, 2001): 847–48. https://doi.org/10.1093/bioinformatics/17.9.847.
+> Zdobnov, Evgeni M., and Rolf Apweiler. "InterProScan – an Integration Platform for the Signature-Recognition Methods in InterPro." Bioinformatics 17, no. 9 (September 1, 2001): 847–48. https://doi.org/10.1093/bioinformatics/17.9.847.
+
+- [eggNOG-mapper](https://doi.org/10.1093/molbev/msab293)
+
+> Cantalapiedra CP, Hernández-Plaza A, Letunic I, Bork P, Huerta-Cepas J. eggNOG-mapper v2: Functional Annotation, Orthology Assignments, and Domain Prediction at the Metagenomic Scale. Mol Biol Evol. 2021 Dec 9;38(12):5825-5829. doi: 10.1093/molbev/msab293. PubMed PMID: 34597405; PubMed Central PMCID: PMC8662630.
- [MMseqs2](https://www.nature.com/articles/nbt.3988)
diff --git a/conf/modules.config b/conf/modules.config
index 94a2a32..a821328 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -30,4 +30,25 @@ process {
withName: SEQKIT_STATS {
ext.args = ' ' // turn off --all default argument
}
+
+ withName: EGGNOG_DOWNLOAD_DB {
+ publishDir = [
+ [
+ path: { "${params.outdir}/eggnog_databases" },
+ mode: params.publish_dir_mode,
+ enabled: true,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ ]
+ }
+
+ withName: EGGNOGMAPPER {
+ publishDir = [
+ [
+ path: { "${params.outdir}/eggnog" },
+ mode: params.publish_dir_mode,
+ pattern: "*.emapper.*"
+ ]
+ ]
+ }
}
diff --git a/docs/output.md b/docs/output.md
index e529664..7b684b6 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -13,7 +13,8 @@ The directories listed below will be created in the results directory after the
The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:
- [Functional Annotation](#functional-annotation) Annotate proteins with functional domains
- - [InterProScan](#Interproscan) - Search the InterPro database for functional domains
+ - [InterProScan](#interproscan) - Search the InterPro database for functional domains
+ - [eggNOG-mapper](#eggnog-mapper) - Fast functional annotation through orthology assignment
- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
- [SeqKit stats](#seqkit_stats) - Simple statistics for protein FASTA files
- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
@@ -268,6 +269,64 @@ The XML Schema Definition (XSD) is available [here](http://ftp.ebi.ac.uk/pub/sof
+#### eggNOG-mapper
+
+
+Output files
+
+- `eggnog/`
+ - `*.emapper.annotations`: Tab-separated file with functional annotations
+ - `*.emapper.seed_orthologs`: Tab-separated file with seed ortholog assignments
+ - `*.emapper.hits`: Tab-separated file with search hits from Diamond
+
+
+
+[eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper) is a tool for fast functional annotation of novel sequences using orthology assignments. It uses precomputed orthologous groups and phylogenies from the eggNOG database to transfer functional information from the eggNOG orthologous groups to target sequences.
+
+##### Annotations Output
+
+The main output file (`*.emapper.annotations`) is a tab-separated file containing the functional annotations for each query sequence. The columns include:
+
+1. **query**: Query sequence identifier
+2. **seed_ortholog**: Best matching ortholog from eggNOG database
+3. **evalue**: E-value of the best hit
+4. **score**: Bit score of the best hit
+5. **eggNOG_OGs**: Orthologous groups (OGs) assigned to the query
+6. **max_annot_lvl**: Taxonomic level used for annotation
+7. **COG_category**: COG functional category
+8. **Description**: Functional description
+9. **Preferred_name**: Preferred gene name
+10. **GOs**: Gene Ontology terms
+11. **EC**: Enzyme Commission numbers
+12. **KEGG_ko**: KEGG orthology identifiers
+13. **KEGG_Pathway**: KEGG pathway identifiers
+14. **KEGG_Module**: KEGG module identifiers
+15. **KEGG_Reaction**: KEGG reaction identifiers
+16. **KEGG_rclass**: KEGG reaction class
+17. **BRITE**: BRITE hierarchy
+18. **KEGG_TC**: KEGG transporter classification
+19. **CAZy**: CAZy family
+20. **BiGG_Reaction**: BiGG reaction identifiers
+21. **PFAMs**: Pfam domain annotations
+
+
+Example eggNOG-mapper annotations output
+
+```
+#query seed_ortholog evalue score eggNOG_OGs max_annot_lvl COG_category Description Preferred_name GOs EC KEGG_ko KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass BRITE KEGG_TC CAZy BiGG_Reaction PFAMs
+ENSSASP00005000002.1 ENSSASP00005000002.1 0.0 14179.0 COG0498@1|root,COG0498@2|Bacteria,1MUWQ@1224|Proteobacteria,2VHR6@28216|Betaproteobacteria,2KUMA@206389|Rhodocyclales 1224|Proteobacteria E threonine synthase - - - - - - - - - - - - -
+```
+
+
+
+##### Seed Orthologs Output
+
+The seed orthologs file (`*.emapper.seed_orthologs`) contains the list of orthologs that were used as seeds for the functional annotation. This file is useful for understanding which reference sequences were used for annotation transfer.
+
+##### Hits Output
+
+The hits file (`*.emapper.hits`) contains the raw search results from the Diamond homology search against the eggNOG database. This includes all significant hits before filtering and orthology assignment.
+
### MultiQC
diff --git a/docs/usage.md b/docs/usage.md
index 78273e5..1ad7594 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -87,6 +87,45 @@ Running [InterProScan](https://interproscan-docs.readthedocs.io/) requires a pre
For reproducibility and explicitness, `--interproscan_database_version` is a required parameter. InterProScan is quite resource-intensive and you can also choose to not run InterProScan with `--skip_interproscan`.
+### eggNOG-mapper
+
+[eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper) performs fast genome-wide functional annotation through orthology assignment. The pipeline supports automatic database download or using pre-downloaded databases:
+
+#### Automatic Database Download (Recommended for first run)
+
+By default, if no database paths are provided, the pipeline will automatically download the required eggNOG databases:
+
+```bash
+nextflow run nf-core/proteinannotator --input samplesheet.csv --outdir results -profile docker
+```
+
+Downloaded databases are saved to `results/eggnog_databases/eggnog_data/` for reuse in future runs.
+
+#### Using Pre-downloaded Databases (Recommended for subsequent runs)
+
+To avoid re-downloading databases, provide the path to previously downloaded databases:
+
+```bash
+nextflow run nf-core/proteinannotator \
+ --input samplesheet.csv \
+ --outdir results \
+ --eggnog_data_dir results/eggnog_databases/eggnog_data \
+ -profile docker
+```
+
+You can optionally specify individual database files:
+
+- `--eggnog_db`: Path to the eggNOG database file (\*.db)
+- `--eggnog_diamond_db`: Path to the Diamond protein database (\*.dmnd)
+
+#### Skipping eggNOG-mapper
+
+To skip eggNOG-mapper annotation entirely:
+
+```bash
+nextflow run nf-core/proteinannotator --input samplesheet.csv --outdir results --skip_eggnog -profile docker
+```
+
### Updating the pipeline
When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
diff --git a/modules.json b/modules.json
index 459aef4..4d35abc 100644
--- a/modules.json
+++ b/modules.json
@@ -5,6 +5,11 @@
"https://github.com/nf-core/modules.git": {
"modules": {
"nf-core": {
+ "eggnogmapper": {
+ "branch": "master",
+ "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726",
+ "installed_by": ["modules"]
+ },
"interproscan": {
"branch": "master",
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
diff --git a/modules/local/eggnog_download_db/environment.yml b/modules/local/eggnog_download_db/environment.yml
new file mode 100644
index 0000000..f380325
--- /dev/null
+++ b/modules/local/eggnog_download_db/environment.yml
@@ -0,0 +1,7 @@
+name: eggnog_download_db
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::eggnog-mapper=2.1.12
diff --git a/modules/local/eggnog_download_db/main.nf b/modules/local/eggnog_download_db/main.nf
new file mode 100644
index 0000000..1ae2e06
--- /dev/null
+++ b/modules/local/eggnog_download_db/main.nf
@@ -0,0 +1,52 @@
+process EGGNOG_DOWNLOAD_DB {
+ tag "eggnog_db_download"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/eggnog-mapper:2.1.12--pyhdfd78af_0':
+ 'biocontainers/eggnog-mapper:2.1.12--pyhdfd78af_0' }"
+
+ input:
+ val(download_databases)
+
+ output:
+ path("eggnog_data") , emit: data_dir
+ path("eggnog_data/*.db") , emit: db, optional: true
+ path("eggnog_data/*.dmnd") , emit: diamond_db
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ """
+ mkdir -p eggnog_data
+
+ # Download eggNOG databases
+ # -y: auto-yes to prompts
+ # -F: install novel families (optional, can be controlled via args)
+ download_eggnog_data.py \\
+ --data_dir eggnog_data \\
+ -y \\
+ $args
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ eggnog-mapper: \$(echo \$(emapper.py --version) | grep -o "emapper-[0-9]\\+\\.[0-9]\\+\\.[0-9]\\+" | sed "s/emapper-//")
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ mkdir -p eggnog_data
+ touch eggnog_data/eggnog.db
+ touch eggnog_data/eggnog_proteins.dmnd
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ eggnog-mapper: \$(echo \$(emapper.py --version) | grep -o "emapper-[0-9]\\+\\.[0-9]\\+\\.[0-9]\\+" | sed "s/emapper-//")
+ END_VERSIONS
+ """
+}
diff --git a/modules/local/eggnog_download_db/tests/main.nf.test b/modules/local/eggnog_download_db/tests/main.nf.test
new file mode 100644
index 0000000..bd89a75
--- /dev/null
+++ b/modules/local/eggnog_download_db/tests/main.nf.test
@@ -0,0 +1,67 @@
+nextflow_process {
+
+ name "Test Process EGGNOG_DOWNLOAD_DB"
+ script "../main.nf"
+ process "EGGNOG_DOWNLOAD_DB"
+ tag "modules"
+ tag "modules_local"
+ tag "eggnog_download_db"
+
+ test("Should download eggNOG databases - stub") {
+ tag "stub"
+ tag "CI"
+
+ options '-stub'
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = true
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out,
+ path(process.out.versions[0]).yaml
+ ).match() }
+ )
+ }
+ }
+
+ test("Should download eggNOG databases - real download (long running test)") {
+ tag "long_running"
+ tag "download"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = true
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert process.out.data_dir },
+ { assert path(process.out.data_dir).exists() },
+ { assert process.out.diamond_db },
+ { assert path(process.out.diamond_db).exists() },
+ { assert path(process.out.diamond_db).name.endsWith('.dmnd') },
+ { assert snapshot(
+ path(process.out.versions[0]).yaml
+ ).match("versions") }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/eggnogmapper/environment.yml b/modules/nf-core/eggnogmapper/environment.yml
new file mode 100644
index 0000000..6e6e069
--- /dev/null
+++ b/modules/nf-core/eggnogmapper/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::eggnog-mapper=2.1.12
diff --git a/modules/nf-core/eggnogmapper/main.nf b/modules/nf-core/eggnogmapper/main.nf
new file mode 100644
index 0000000..2489b7f
--- /dev/null
+++ b/modules/nf-core/eggnogmapper/main.nf
@@ -0,0 +1,68 @@
+process EGGNOGMAPPER {
+ tag "$meta.id"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/eggnog-mapper:2.1.12--pyhdfd78af_0':
+ 'biocontainers/eggnog-mapper:2.1.12--pyhdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(fasta)
+ path(eggnog_db)
+ path(eggnog_data_dir)
+ tuple val(meta2), path(eggnog_diamond_db)
+
+ output:
+ tuple val(meta), path("*.emapper.annotations") , emit: annotations
+ tuple val(meta), path("*.emapper.seed_orthologs"), emit: orthologs
+ tuple val(meta), path("*.emapper.hits") , emit: hits
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def is_compressed = fasta.extension == '.gz' ? true : false
+ def fasta_name = is_compressed ? fasta.baseName : "$fasta"
+ def dbmem = task.memory.toMega() > 40000 ? '--dbmem' : ''
+ def database_arg = eggnog_db ? "--database $eggnog_db" : ''
+ def dmnd_db_arg = eggnog_diamond_db ? "--dmnd_db $eggnog_diamond_db" : ''
+ """
+ if [ "$is_compressed" == "true" ]; then
+ gzip -c -d $fasta > $fasta_name
+ fi
+
+ emapper.py \\
+ --cpu ${task.cpus} \\
+ -i ${fasta_name} \\
+ --data_dir ${eggnog_data_dir} \\
+ -m diamond \\
+ $dmnd_db_arg \\
+ $database_arg \\
+ --output ${prefix} \\
+ ${dbmem} \\
+ $args
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ eggnog-mapper: \$(echo \$(emapper.py --version) | grep -o "emapper-[0-9]\\+\\.[0-9]\\+\\.[0-9]\\+" | sed "s/emapper-//")
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.emapper.annotations
+ touch ${prefix}.emapper.seed_orthologs
+ touch ${prefix}.emapper.hits
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ eggnog-mapper: \$(echo \$(emapper.py --version) | grep -o "emapper-[0-9]\\+\\.[0-9]\\+\\.[0-9]\\+" | sed "s/emapper-//")
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/eggnogmapper/meta.yml b/modules/nf-core/eggnogmapper/meta.yml
new file mode 100644
index 0000000..40af3c6
--- /dev/null
+++ b/modules/nf-core/eggnogmapper/meta.yml
@@ -0,0 +1,93 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
+name: "eggnogmapper"
+description: Fast genome-wide functional annotation through orthology assignment.
+keywords:
+ - annotation
+ - orthology
+ - genomics
+tools:
+ - "eggnogmapper":
+ description: "Fast genome-wide functional annotation through orthology assignment."
+ homepage: "https://github.com/eggnogdb/eggnog-mapper"
+ documentation: "https://github.com/eggnogdb/eggnog-mapper/wiki"
+ tool_dev_url: "https://github.com/eggnogdb/eggnog-mapper"
+ doi: "10.1093/molbev/msab293"
+ licence: ["AGPL v3"]
+ identifier: ""
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test', single_end:false ]`
+ - fasta:
+ type: file
+ description: Database of sequences in FASTA format
+ pattern: "*.{fasta,fa,fasta.gz,fa.gz}"
+ ontologies: []
+ - eggnog_db:
+ type: file
+ description: The eggnog database file (e.g. eggnog-mapper/data/eggnog.db)
+ pattern: "*.db"
+ ontologies: []
+ - eggnog_data_dir:
+ type: directory
+ description: Directory containing eggnog database files (e.g. eggnog-mapper/data)
+ pattern: "*"
+ - - meta2:
+ type: map
+ description: |
+ Groovy Map containing database information
+ e.g. `[ id:'test' ]`
+ - eggnog_diamond_db:
+ type: file
+ description: The eggnog Diamond protein database file (e.g. eggnog-mapper/data/eggnog_proteins.dmnd)
+ pattern: "*.dmnd"
+ ontologies: []
+output:
+ annotations:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test', single_end:false ]`
+ - "*.emapper.annotations":
+ type: file
+ description: TSV with the results from the annotation phase
+ pattern: "*.emapper.annotations"
+ ontologies: []
+ orthologs:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test', single_end:false ]`
+ - "*.emapper.seed_orthologs":
+ type: file
+ description: TSV with the results from parsing the hits, linking queries with
+ seed orthologs (with commented metadata)
+ pattern: "*.emapper.seed_orthologs"
+ ontologies: []
+ hits:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test', single_end:false ]`
+ - "*.emapper.hits":
+ type: file
+ description: TSV with the results from the Diamond search phase
+ pattern: "*.emapper.hits"
+ ontologies: []
+ versions:
+ - versions.yml:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ ontologies:
+ - edam: http://edamontology.org/format_3750 # YAML
+authors:
+ - "@vagkaratzas"
+maintainers:
+ - "@vagkaratzas"
+ - "@gallvp"
diff --git a/modules/nf-core/eggnogmapper/tests/main.nf.test b/modules/nf-core/eggnogmapper/tests/main.nf.test
new file mode 100644
index 0000000..69a804f
--- /dev/null
+++ b/modules/nf-core/eggnogmapper/tests/main.nf.test
@@ -0,0 +1,134 @@
+nextflow_process {
+
+ name "Test Process EGGNOGMAPPER"
+ script "../main.nf"
+ process "EGGNOGMAPPER"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "eggnogmapper"
+ tag "diamond/makedb"
+
+ setup {
+ run("DIAMOND_MAKEDB") {
+ script "../../diamond/makedb/main.nf"
+ process {
+ """
+ input[0] = [ [id:'test2'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+ input[1] = []
+ input[2] = []
+ input[3] = []
+ """
+ }
+ }
+ }
+
+ test("Should search for protein annotations against the eggnogmapper db") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+ eggnog_db = file("https://github.com/nf-core/test-datasets/raw/eddf5b0e3336e0f93c81d4b4843b07257f9efaec/data/delete_me/eggnogmapper/eggnog.db", checkIfExists: true)
+ eggnog_db.copyTo("${workDir}/tmp/eggnog.db")
+ eggnog_data_dir = "${workDir}/tmp/"
+ input[1] = eggnog_db
+ input[2] = eggnog_data_dir
+ input[3] = DIAMOND_MAKEDB.out.db
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.annotations.get(0).get(1)).readLines().contains("ENSSASP00005000002.1\tENSSASP00005000002.1\t0.0\t14179.0\tCOG0498@1|root,COG0498@2|Bacteria,1MUWQ@1224|Proteobacteria,2VHR6@28216|Betaproteobacteria,2KUMA@206389|Rhodocyclales\t1224|Proteobacteria\tE\tthreonine synthase\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-") },
+ { assert path(process.out.orthologs.get(0).get(1)).readLines().contains("ENSSASP00005000002.1\tENSSASP00005000002.1\t0.0\t14179.0\t1\t7096\t1\t7096\t100.0\t100.0\t100.0") },
+ { assert snapshot(path(process.out.hits.get(0).get(1)).readLines().size() >= 12).match("hits") },
+ { assert snapshot(
+ process.out.versions.collect{ path(it).yaml }
+ ).match("versions") }
+ )
+ }
+
+ }
+
+ test("Should search for protein annotations against the eggnogmapper db -- empty-params") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ eggnog_db = file("https://github.com/nf-core/test-datasets/raw/eddf5b0e3336e0f93c81d4b4843b07257f9efaec/data/delete_me/eggnogmapper/eggnog.db", checkIfExists: true)
+ eggnog_db.copyTo("${workDir}/tmp/eggnog.db")
+
+ ch_synced_inputs = DIAMOND_MAKEDB.out.db.map { meta, dmnd ->
+ dmnd.copyTo("${workDir}/tmp/eggnog_proteins.dmnd")
+
+ return true
+ }
+ | combine ( Channel.fromPath( "${workDir}/tmp/" ) )
+ eggnog_data_dir = ch_synced_inputs.map { sync_status, data_dir -> data_dir }
+
+ input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+ input[1] = []
+ input[2] = eggnog_data_dir
+ input[3] = [[], []]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.annotations.get(0).get(1)).readLines().contains("ENSSASP00005000002.1\tENSSASP00005000002.1\t0.0\t14179.0\tCOG0498@1|root,COG0498@2|Bacteria,1MUWQ@1224|Proteobacteria,2VHR6@28216|Betaproteobacteria,2KUMA@206389|Rhodocyclales\t1224|Proteobacteria\tE\tthreonine synthase\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-") },
+ { assert path(process.out.orthologs.get(0).get(1)).readLines().contains("ENSSASP00005000002.1\tENSSASP00005000002.1\t0.0\t14179.0\t1\t7096\t1\t7096\t100.0\t100.0\t100.0") },
+ { assert snapshot(path(process.out.hits.get(0).get(1)).readLines().size() >= 12).match("hits--emptyparams") },
+ { assert snapshot(
+ process.out.versions.collect{ path(it).yaml }
+ ).match("versions--empty-params") }
+ )
+ }
+
+ }
+
+ test("Should search for protein annotations against the eggnogmapper db - stub") {
+
+ options '-stub'
+
+
+ when {
+
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ]
+ eggnog_db = file("https://github.com/nf-core/test-datasets/raw/eddf5b0e3336e0f93c81d4b4843b07257f9efaec/data/delete_me/eggnogmapper/eggnog.db", checkIfExists: true)
+ eggnog_db.copyTo("${workDir}/tmp/eggnog.db")
+ eggnog_data_dir = "${workDir}/tmp/"
+ input[1] = eggnog_db
+ input[2] = eggnog_data_dir
+ input[3] = DIAMOND_MAKEDB.out.db
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out,
+ path(process.out.versions[0]).yaml
+ ).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/eggnogmapper/tests/main.nf.test.snap b/modules/nf-core/eggnogmapper/tests/main.nf.test.snap
new file mode 100644
index 0000000..4103ca4
--- /dev/null
+++ b/modules/nf-core/eggnogmapper/tests/main.nf.test.snap
@@ -0,0 +1,124 @@
+{
+ "hits": {
+ "content": [
+ true
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.6"
+ },
+ "timestamp": "2025-08-28T10:51:01.677075785"
+ },
+ "versions--empty-params": {
+ "content": [
+ [
+ {
+ "EGGNOGMAPPER": {
+ "eggnog-mapper": "2.1.12"
+ }
+ }
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.6"
+ },
+ "timestamp": "2025-08-28T10:51:25.522521855"
+ },
+ "versions": {
+ "content": [
+ [
+ {
+ "EGGNOGMAPPER": {
+ "eggnog-mapper": "2.1.12"
+ }
+ }
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.6"
+ },
+ "timestamp": "2025-08-28T10:51:02.042174914"
+ },
+ "hits--emptyparams": {
+ "content": [
+ true
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.6"
+ },
+ "timestamp": "2025-08-28T10:51:25.448709571"
+ },
+ "Should search for protein annotations against the eggnogmapper db - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.emapper.annotations:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.emapper.seed_orthologs:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "test.emapper.hits:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,d3e4efad28b5a924585ea3dfcf72c32c"
+ ],
+ "annotations": [
+ [
+ {
+ "id": "test"
+ },
+ "test.emapper.annotations:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "hits": [
+ [
+ {
+ "id": "test"
+ },
+ "test.emapper.hits:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "orthologs": [
+ [
+ {
+ "id": "test"
+ },
+ "test.emapper.seed_orthologs:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d3e4efad28b5a924585ea3dfcf72c32c"
+ ]
+ },
+ {
+ "EGGNOGMAPPER": {
+ "eggnog-mapper": "2.1.12"
+ }
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.2",
+ "nextflow": "25.04.6"
+ },
+ "timestamp": "2025-08-28T09:54:40.344995769"
+ }
+}
\ No newline at end of file
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 5aac74f..8ff857f 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -70,6 +70,35 @@
}
}
},
+ "eggnog_options": {
+ "title": "eggNOG-mapper Options",
+ "type": "object",
+ "description": "Options for eggNOG-mapper functional annotation tool",
+ "default": "",
+ "properties": {
+ "skip_eggnog": {
+ "type": "boolean",
+ "description": "Skip eggNOG-mapper annotation",
+ "default": false
+ },
+ "eggnog_db": {
+ "type": "string",
+ "description": "Path to the eggNOG database file (*.db). If not provided along with eggnog_data_dir, databases will be downloaded automatically.",
+ "format": "file-path"
+ },
+ "eggnog_data_dir": {
+ "type": "string",
+ "description": "Path to the eggNOG data directory containing database files. If not provided, databases will be downloaded automatically on first run.",
+ "format": "directory-path",
+ "help_text": "Leave empty to auto-download databases. Once downloaded, provide this path in future runs to reuse the databases."
+ },
+ "eggnog_diamond_db": {
+ "type": "string",
+ "description": "Path to the Diamond protein database (*.dmnd). If not provided, will be downloaded automatically.",
+ "format": "file-path"
+ }
+ }
+ },
"institutional_config_options": {
"title": "Institutional config options",
"type": "object",
@@ -227,6 +256,9 @@
{
"$ref": "#/$defs/interproscan_options"
},
+ {
+ "$ref": "#/$defs/eggnog_options"
+ },
{
"$ref": "#/$defs/institutional_config_options"
},
diff --git a/subworkflows/local/eggnog/main.nf b/subworkflows/local/eggnog/main.nf
new file mode 100644
index 0000000..33aabe3
--- /dev/null
+++ b/subworkflows/local/eggnog/main.nf
@@ -0,0 +1,43 @@
+// Import modules
+include { EGGNOGMAPPER } from '../../../modules/nf-core/eggnogmapper/main'
+include { EGGNOG_DOWNLOAD_DB } from '../../../modules/local/eggnog_download_db/main'
+
+
+workflow EGGNOG {
+ take:
+ ch_fasta // channel: [ val(meta), [ fasta ] ]
+
+ main:
+
+ ch_versions = Channel.empty()
+
+ // Download databases if not provided
+ if (!params.eggnog_data_dir) {
+ EGGNOG_DOWNLOAD_DB(true)
+ ch_eggnog_data_dir = EGGNOG_DOWNLOAD_DB.out.data_dir
+ ch_eggnog_db = EGGNOG_DOWNLOAD_DB.out.db
+ ch_eggnog_diamond_db = EGGNOG_DOWNLOAD_DB.out.diamond_db.map { dmnd -> [[id: 'eggnog_diamond'], dmnd] }
+ ch_versions = ch_versions.mix(EGGNOG_DOWNLOAD_DB.out.versions)
+ } else {
+ // Use provided database paths
+ ch_eggnog_data_dir = Channel.fromPath(params.eggnog_data_dir, checkIfExists: true)
+ ch_eggnog_db = params.eggnog_db ? Channel.fromPath(params.eggnog_db, checkIfExists: true) : Channel.empty()
+ ch_eggnog_diamond_db = params.eggnog_diamond_db ?
+ Channel.fromPath(params.eggnog_diamond_db, checkIfExists: true).map { dmnd -> [[id: 'eggnog_diamond'], dmnd] } :
+ Channel.value([[id: 'eggnog_diamond'], []])
+ }
+
+ EGGNOGMAPPER(
+ ch_fasta,
+ ch_eggnog_db.ifEmpty([]),
+ ch_eggnog_data_dir,
+ ch_eggnog_diamond_db
+ )
+ ch_versions = ch_versions.mix(EGGNOGMAPPER.out.versions.first())
+
+ emit:
+ annotations = EGGNOGMAPPER.out.annotations // channel: [ val(meta), path(*.emapper.annotations) ]
+ orthologs = EGGNOGMAPPER.out.orthologs // channel: [ val(meta), path(*.emapper.seed_orthologs) ]
+ hits = EGGNOGMAPPER.out.hits // channel: [ val(meta), path(*.emapper.hits) ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
diff --git a/subworkflows/local/eggnog/meta.yml b/subworkflows/local/eggnog/meta.yml
new file mode 100644
index 0000000..0fdeb77
--- /dev/null
+++ b/subworkflows/local/eggnog/meta.yml
@@ -0,0 +1,51 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "eggnog"
+## TODO nf-core: Add a description of the subworkflow and list keywords
+description: Sort SAM/BAM/CRAM file
+keywords:
+ - sort
+ - bam
+ - sam
+ - cram
+## TODO nf-core: Add a list of the modules and/or subworkflows used in the subworkflow
+components:
+ - samtools/sort
+ - samtools/index
+## TODO nf-core: List all of the channels used as input with a description and their structure
+input:
+ - ch_bam:
+ type: file
+ description: |
+ The input channel containing the BAM/CRAM/SAM files
+ Structure: [ val(meta), path(bam) ]
+ pattern: "*.{bam/cram/sam}"
+## TODO nf-core: List all of the channels used as output with a descriptions and their structure
+output:
+ - bam:
+ type: file
+ description: |
+ Channel containing BAM files
+ Structure: [ val(meta), path(bam) ]
+ pattern: "*.bam"
+ - bai:
+ type: file
+ description: |
+ Channel containing indexed BAM (BAI) files
+ Structure: [ val(meta), path(bai) ]
+ pattern: "*.bai"
+ - csi:
+ type: file
+ description: |
+ Channel containing CSI files
+ Structure: [ val(meta), path(csi) ]
+ pattern: "*.csi"
+ - versions:
+ type: file
+ description: |
+ File containing software versions
+ Structure: [ path(versions.yml) ]
+ pattern: "versions.yml"
+authors:
+ - "@olgabot"
+maintainers:
+ - "@olgabot"
diff --git a/subworkflows/local/eggnog/tests/main.nf.test b/subworkflows/local/eggnog/tests/main.nf.test
new file mode 100644
index 0000000..e5aa7e4
--- /dev/null
+++ b/subworkflows/local/eggnog/tests/main.nf.test
@@ -0,0 +1,127 @@
+nextflow_workflow {
+
+ name "Test Subworkflow EGGNOG"
+ script "../main.nf"
+ workflow "EGGNOG"
+
+ tag "subworkflows"
+ tag "subworkflows_local"
+ tag "subworkflows/eggnog"
+ tag "eggnogmapper"
+ tag "eggnog_download_db"
+
+ test("Test eggNOG-mapper with auto-download databases - stub") {
+ tag "stub"
+ tag "CI"
+
+ options "-stub"
+
+ when {
+ params {
+ pipelines_testdata_base_path = "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/"
+ eggnog_data_dir = null
+ }
+ workflow {
+ """
+ input[0] = Channel.fromList([
+ [
+ [ id:'test' ],
+ file(params.pipelines_testdata_base_path + 'proteinfold/testdata/sequences/T1024.fasta', checkIfExists: true)
+ ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert workflow.out.annotations },
+ { assert workflow.out.orthologs },
+ { assert workflow.out.hits },
+ { assert workflow.out.versions }
+ )
+ }
+ }
+
+ test("Test eggNOG-mapper with provided databases") {
+ tag "eggnog_provided_db"
+
+ when {
+ params {
+ pipelines_testdata_base_path = "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/"
+ modules_testdata_base_path = "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/"
+ }
+ workflow {
+ """
+ // Download small test database
+ eggnog_db = file("https://github.com/nf-core/test-datasets/raw/eddf5b0e3336e0f93c81d4b4843b07257f9efaec/data/delete_me/eggnogmapper/eggnog.db", checkIfExists: true)
+ eggnog_db.copyTo("\${workDir}/tmp/eggnog.db")
+
+ params.eggnog_data_dir = "\${workDir}/tmp/"
+
+ input[0] = Channel.fromList([
+ [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true)
+ ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert workflow.out.annotations },
+ { assert workflow.out.orthologs },
+ { assert workflow.out.hits },
+ { assert workflow.out.versions },
+ // Verify files exist
+ { assert path(workflow.out.annotations.get(0).get(1)).exists() },
+ { assert path(workflow.out.orthologs.get(0).get(1)).exists() },
+ { assert path(workflow.out.hits.get(0).get(1)).exists() },
+ // Verify annotation content contains expected data
+ { assert path(workflow.out.annotations.get(0).get(1)).readLines().any { it.contains("ENSSASP00005000002.1") } }
+ )
+ }
+ }
+
+ test("Test eggNOG-mapper with multiple sequences - stub") {
+ tag "eggnog_multiple"
+ tag "stub"
+
+ options "-stub"
+
+ when {
+ params {
+ pipelines_testdata_base_path = "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/"
+ eggnog_data_dir = null
+ }
+ workflow {
+ """
+ input[0] = Channel.fromList([
+ [
+ [ id:'T1024' ],
+ file(params.pipelines_testdata_base_path + 'proteinfold/testdata/sequences/T1024.fasta', checkIfExists: true)
+ ],
+ [
+ [ id:'T1026' ],
+ file(params.pipelines_testdata_base_path + 'proteinfold/testdata/sequences/T1026.fasta', checkIfExists: true)
+ ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert workflow.out.annotations },
+ { assert workflow.out.annotations.toList().size() == 2 },
+ { assert workflow.out.orthologs.toList().size() == 2 },
+ { assert workflow.out.hits.toList().size() == 2 }
+ )
+ }
+ }
+}
diff --git a/subworkflows/local/functional_annotation/main.nf b/subworkflows/local/functional_annotation/main.nf
index 4008fe0..aaf10cb 100644
--- a/subworkflows/local/functional_annotation/main.nf
+++ b/subworkflows/local/functional_annotation/main.nf
@@ -1,5 +1,6 @@
-// Import Annotator Subworfklows
+// Import Annotator Subworkflows
include { INTERPROSCAN } from '../interproscan/main'
+include { EGGNOG } from '../eggnog/main'
workflow FUNCTIONAL_ANNOTATION {
@@ -10,8 +11,6 @@ workflow FUNCTIONAL_ANNOTATION {
ch_versions = Channel.empty()
- // TODO nf-core: substitute modules here for the modules of your subworkflow
-
// Create a multifasta, with one fasta per entry, add the sequence ID to the meta id
ch_fasta
.map { meta, fasta ->
@@ -34,6 +33,27 @@ workflow FUNCTIONAL_ANNOTATION {
ch_versions = ch_versions.mix(INTERPROSCAN.out.versions.first())
}
+ //
+ // SUBWORKFLOW: Run eggNOG-mapper
+ //
+
+ ch_eggnog_annotations = Channel.empty()
+ ch_eggnog_orthologs = Channel.empty()
+ ch_eggnog_hits = Channel.empty()
+
+ if (!params.skip_eggnog) {
+ EGGNOG(
+ ch_multifasta
+ )
+ ch_eggnog_annotations = EGGNOG.out.annotations
+ ch_eggnog_orthologs = EGGNOG.out.orthologs
+ ch_eggnog_hits = EGGNOG.out.hits
+ ch_versions = ch_versions.mix(EGGNOG.out.versions)
+ }
+
emit:
- versions = ch_versions // channel: [ versions.yml ]
+ eggnog_annotations = ch_eggnog_annotations // channel: [ val(meta), path(*.emapper.annotations) ]
+ eggnog_orthologs = ch_eggnog_orthologs // channel: [ val(meta), path(*.emapper.seed_orthologs) ]
+ eggnog_hits = ch_eggnog_hits // channel: [ val(meta), path(*.emapper.hits) ]
+ versions = ch_versions // channel: [ versions.yml ]
}
diff --git a/subworkflows/local/functional_annotation/tests/main.nf.test b/subworkflows/local/functional_annotation/tests/main.nf.test
index d80b3e7..8d2bb24 100644
--- a/subworkflows/local/functional_annotation/tests/main.nf.test
+++ b/subworkflows/local/functional_annotation/tests/main.nf.test
@@ -1,5 +1,3 @@
-// TODO nf-core: Once you have added the required tests, please run the following command to build this file:
-// nf-core subworkflows test functional_annotation
nextflow_workflow {
name "Test Subworkflow FUNCTIONAL_ANNOTATION"
@@ -7,13 +5,11 @@ nextflow_workflow {
workflow "FUNCTIONAL_ANNOTATION"
tag "subworkflows"
- tag "subworkflows_"
+ tag "subworkflows_local"
tag "subworkflows/functional_annotation"
- // TODO nf-core: Add tags for all modules used within this subworkflow. Example:
-
- // tag "samtools"
- // tag "samtools/sort"
- // tag "samtools/index"
+ tag "eggnogmapper"
+ tag "eggnog_download_db"
+ tag "interproscan"
// TODO nf-core: Change the test name preferably indicating the test-data and file-format used
@@ -87,7 +83,6 @@ nextflow_workflow {
}
}
- // TODO nf-core: Change the test name preferably indicating the test-data and file-format used
test("Test single input fasta with 4 fasta records: snap25 isoforms - bcl2 - ced9") {
when {
@@ -96,7 +91,6 @@ nextflow_workflow {
}
workflow {
"""
- // TODO nf-core: define inputs of the workflow here. Example:
input[0] = Channel.fromList([
[
[ id:'test' ], // meta map
@@ -111,7 +105,114 @@ nextflow_workflow {
assertAll(
{ assert workflow.success},
{ assert snapshot(workflow.out).match()}
- //TODO nf-core: Add all required assertions to verify the test output.
+ )
+ }
+ }
+
+ test("Test eggNOG-mapper with auto-download databases - stub") {
+ tag "stub"
+ tag "CI"
+
+ options "-stub"
+
+ when {
+ params {
+ pipelines_testdata_base_path = "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/"
+ skip_interproscan = true
+ skip_eggnog = false
+ eggnog_data_dir = null
+ }
+ workflow {
+ """
+ input[0] = Channel.fromList([
+ [
+ [ id:'test' ],
+ file(params.pipelines_testdata_base_path + 'proteinfold/testdata/sequences/T1024.fasta', checkIfExists: true)
+ ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert workflow.out.eggnog_annotations },
+ { assert workflow.out.eggnog_orthologs },
+ { assert workflow.out.eggnog_hits },
+ { assert workflow.out.versions }
+ )
+ }
+ }
+
+ test("Test eggNOG-mapper with provided databases") {
+ tag "eggnog_provided_db"
+
+ when {
+ params {
+ pipelines_testdata_base_path = "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/"
+ skip_interproscan = true
+ skip_eggnog = false
+ }
+ workflow {
+ """
+ // Download small test database
+ eggnog_db = file("https://github.com/nf-core/test-datasets/raw/eddf5b0e3336e0f93c81d4b4843b07257f9efaec/data/delete_me/eggnogmapper/eggnog.db", checkIfExists: true)
+ eggnog_db.copyTo("\${workDir}/tmp/eggnog.db")
+
+ params.eggnog_data_dir = "\${workDir}/tmp/"
+
+ input[0] = Channel.fromList([
+ [
+ [ id:'test' ],
+ file(params.pipelines_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true)
+ ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert workflow.out.eggnog_annotations },
+ { assert workflow.out.eggnog_orthologs },
+ { assert workflow.out.eggnog_hits },
+ { assert workflow.out.versions },
+ // Verify annotation content
+ { assert path(workflow.out.eggnog_annotations.get(0).get(1)).exists() },
+ { assert path(workflow.out.eggnog_orthologs.get(0).get(1)).exists() },
+ { assert path(workflow.out.eggnog_hits.get(0).get(1)).exists() }
+ )
+ }
+ }
+
+ test("Test skip eggNOG-mapper") {
+
+ when {
+ params {
+ pipelines_testdata_base_path = "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/"
+ skip_interproscan = true
+ skip_eggnog = true
+ }
+ workflow {
+ """
+ input[0] = Channel.fromList([
+ [
+ [ id:'test' ],
+ file(params.pipelines_testdata_base_path + 'proteinfold/testdata/sequences/T1024.fasta', checkIfExists: true)
+ ]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert workflow.out.eggnog_annotations.toList().size() == 0 },
+ { assert workflow.out.eggnog_orthologs.toList().size() == 0 },
+ { assert workflow.out.eggnog_hits.toList().size() == 0 }
)
}
}