nf-core · olgabot · Jan 3, 2026 · Jan 4, 2026 · Jan 5, 2026 · Jan 10, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,11 @@ Initial release of nf-core/proteinannotator, created with the [nf-core](https://
 
 ### `Added`
 
+- Add [eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper) module with automatic database download capability
+  - New `EGGNOG` subworkflow for functional annotation
+  - New `EGGNOG_DOWNLOAD_DB` local module for automatic database downloads
+  - Parameters: `--skip_eggnog`, `--eggnog_data_dir`, `--eggnog_db`, `--eggnog_diamond_db`
+  - Comprehensive test suite for database download and annotation workflows
 - [[PR #52](https://github.com/nf-core/proteinannotator/pull/52)] Add option to turn off InterProScan for testing
 - [[PR #51](https://github.com/nf-core/proteinannotator/pull/51)] Update to nf-core/tools v3.3.1
 - [[PR #47](https://github.com/nf-core/proteinannotator/pull/47)] Update metromap with more tools added from [May 2025 Hackathon](https://nf-co.re/events/2025/hackathon-boston)

diff --git a/CITATIONS.md b/CITATIONS.md
@@ -16,7 +16,11 @@
 
 - [InterProScan](https://academic.oup.com/bioinformatics/article/17/9/847/206564)
 
-> Zdobnov, Evgeni M., and Rolf Apweiler. “InterProScan – an Integration Platform for the Signature-Recognition Methods in InterPro.” Bioinformatics 17, no. 9 (September 1, 2001): 847–48. https://doi.org/10.1093/bioinformatics/17.9.847.
+> Zdobnov, Evgeni M., and Rolf Apweiler. "InterProScan – an Integration Platform for the Signature-Recognition Methods in InterPro." Bioinformatics 17, no. 9 (September 1, 2001): 847–48. https://doi.org/10.1093/bioinformatics/17.9.847.
+
+- [eggNOG-mapper](https://doi.org/10.1093/molbev/msab293)
+
+> Cantalapiedra CP, Hernández-Plaza A, Letunic I, Bork P, Huerta-Cepas J. eggNOG-mapper v2: Functional Annotation, Orthology Assignments, and Domain Prediction at the Metagenomic Scale. Mol Biol Evol. 2021 Dec 9;38(12):5825-5829. doi: 10.1093/molbev/msab293. PubMed PMID: 34597405; PubMed Central PMCID: PMC8662630.
 
 - [MMseqs2](https://www.nature.com/articles/nbt.3988)
 

diff --git a/conf/modules.config b/conf/modules.config
@@ -30,4 +30,25 @@ process {
     withName: SEQKIT_STATS {
         ext.args = ' ' // turn off --all default argument
     }
+
+    withName: EGGNOG_DOWNLOAD_DB {
+        publishDir = [
+            [
+                path: { "${params.outdir}/eggnog_databases" },
+                mode: params.publish_dir_mode,
+                enabled: true,
+                saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            ]
+        ]
+    }
+
+    withName: EGGNOGMAPPER {
+        publishDir = [
+            [
+                path: { "${params.outdir}/eggnog" },
+                mode: params.publish_dir_mode,
+                pattern: "*.emapper.*"
+            ]
+        ]
+    }
 }
diff --git a/docs/output.md b/docs/output.md
@@ -13,7 +13,8 @@ The directories listed below will be created in the results directory after the
 The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:
 
 - [Functional Annotation](#functional-annotation) Annotate proteins with functional domains
-  - [InterProScan](#Interproscan) - Search the InterPro database for functional domains
+  - [InterProScan](#interproscan) - Search the InterPro database for functional domains
+  - [eggNOG-mapper](#eggnog-mapper) - Fast functional annotation through orthology assignment
 - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
 - [SeqKit stats](#seqkit_stats) - Simple statistics for protein FASTA files
 - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
@@ -268,6 +269,64 @@ The XML Schema Definition (XSD) is available [here](http://ftp.ebi.ac.uk/pub/sof
 
 </details>
 
+#### eggNOG-mapper
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `eggnog/`
+  - `*.emapper.annotations`: Tab-separated file with functional annotations
+  - `*.emapper.seed_orthologs`: Tab-separated file with seed ortholog assignments
+  - `*.emapper.hits`: Tab-separated file with search hits from Diamond
+
+</details>
+
+[eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper) is a tool for fast functional annotation of novel sequences using orthology assignments. It uses precomputed orthologous groups and phylogenies from the eggNOG database to transfer functional information from the eggNOG orthologous groups to target sequences.
+
+##### Annotations Output
+
+The main output file (`*.emapper.annotations`) is a tab-separated file containing the functional annotations for each query sequence. The columns include:
+
+1. **query**: Query sequence identifier
+2. **seed_ortholog**: Best matching ortholog from eggNOG database
+3. **evalue**: E-value of the best hit
+4. **score**: Bit score of the best hit
+5. **eggNOG_OGs**: Orthologous groups (OGs) assigned to the query
+6. **max_annot_lvl**: Taxonomic level used for annotation
+7. **COG_category**: COG functional category
+8. **Description**: Functional description
+9. **Preferred_name**: Preferred gene name
+10. **GOs**: Gene Ontology terms
+11. **EC**: Enzyme Commission numbers
+12. **KEGG_ko**: KEGG orthology identifiers
+13. **KEGG_Pathway**: KEGG pathway identifiers
+14. **KEGG_Module**: KEGG module identifiers
+15. **KEGG_Reaction**: KEGG reaction identifiers
+16. **KEGG_rclass**: KEGG reaction class
+17. **BRITE**: BRITE hierarchy
+18. **KEGG_TC**: KEGG transporter classification
+19. **CAZy**: CAZy family
+20. **BiGG_Reaction**: BiGG reaction identifiers
+21. **PFAMs**: Pfam domain annotations
+
+<details markdown="1">
+<summary>Example eggNOG-mapper annotations output</summary>
+
+```
+#query	seed_ortholog	evalue	score	eggNOG_OGs	max_annot_lvl	COG_category	Description	Preferred_name	GOs	EC	KEGG_ko	KEGG_Pathway	KEGG_Module	KEGG_Reaction	KEGG_rclass	BRITE	KEGG_TC	CAZy	BiGG_Reaction	PFAMs
+ENSSASP00005000002.1	ENSSASP00005000002.1	0.0	14179.0	COG0498@1|root,COG0498@2|Bacteria,1MUWQ@1224|Proteobacteria,2VHR6@28216|Betaproteobacteria,2KUMA@206389|Rhodocyclales	1224|Proteobacteria	E	threonine synthase	-	-	-	-	-	-	-	-	-	-	-	-	-
+```
+
+</details>
+
+##### Seed Orthologs Output
+
+The seed orthologs file (`*.emapper.seed_orthologs`) contains the list of orthologs that were used as seeds for the functional annotation. This file is useful for understanding which reference sequences were used for annotation transfer.
+
+##### Hits Output
+
+The hits file (`*.emapper.hits`) contains the raw search results from the Diamond homology search against the eggNOG database. This includes all significant hits before filtering and orthology assignment.
+
 ### MultiQC
 
 <details markdown="1">

diff --git a/docs/usage.md b/docs/usage.md
@@ -87,6 +87,45 @@ Running [InterProScan](https://interproscan-docs.readthedocs.io/) requires a pre
 
 For reproducibility and explicitness, `--interproscan_database_version` is a required parameter. InterProScan is quite resource-intensive and you can also choose to not run InterProScan with `--skip_interproscan`.
 
+### eggNOG-mapper
+
+[eggNOG-mapper](https://github.com/eggnogdb/eggnog-mapper) performs fast genome-wide functional annotation through orthology assignment. The pipeline supports automatic database download or using pre-downloaded databases:
+
+#### Automatic Database Download (Recommended for first run)
+
+By default, if no database paths are provided, the pipeline will automatically download the required eggNOG databases:
+
+```bash
+nextflow run nf-core/proteinannotator --input samplesheet.csv --outdir results -profile docker
+```
+
+Downloaded databases are saved to `results/eggnog_databases/eggnog_data/` for reuse in future runs.
+
+#### Using Pre-downloaded Databases (Recommended for subsequent runs)
+
+To avoid re-downloading databases, provide the path to previously downloaded databases:
+
+```bash
+nextflow run nf-core/proteinannotator \
+  --input samplesheet.csv \
+  --outdir results \
+  --eggnog_data_dir results/eggnog_databases/eggnog_data \
+  -profile docker
+```
+
+You can optionally specify individual database files:
+
+- `--eggnog_db`: Path to the eggNOG database file (\*.db)
+- `--eggnog_diamond_db`: Path to the Diamond protein database (\*.dmnd)
+
+#### Skipping eggNOG-mapper
+
+To skip eggNOG-mapper annotation entirely:
+
+```bash
+nextflow run nf-core/proteinannotator --input samplesheet.csv --outdir results --skip_eggnog -profile docker
+```
+
 ### Updating the pipeline
 
 When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:

diff --git a/modules.json b/modules.json
@@ -5,6 +5,11 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "eggnogmapper": {
+                        "branch": "master",
+                        "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726",
+                        "installed_by": ["modules"]
+                    },
                     "interproscan": {
                         "branch": "master",
                         "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",

diff --git a/modules/local/eggnog_download_db/environment.yml b/modules/local/eggnog_download_db/environment.yml
@@ -0,0 +1,7 @@
+name: eggnog_download_db
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::eggnog-mapper=2.1.12
diff --git a/modules/local/eggnog_download_db/main.nf b/modules/local/eggnog_download_db/main.nf
@@ -0,0 +1,52 @@
+process EGGNOG_DOWNLOAD_DB {
+    tag "eggnog_db_download"
+    label 'process_low'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/eggnog-mapper:2.1.12--pyhdfd78af_0':
+        'biocontainers/eggnog-mapper:2.1.12--pyhdfd78af_0' }"
+
+    input:
+    val(download_databases)
+
+    output:
+    path("eggnog_data")         , emit: data_dir
+    path("eggnog_data/*.db")    , emit: db, optional: true
+    path("eggnog_data/*.dmnd")  , emit: diamond_db
+    path "versions.yml"         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    mkdir -p eggnog_data
+
+    # Download eggNOG databases
+    # -y: auto-yes to prompts
+    # -F: install novel families (optional, can be controlled via args)
+    download_eggnog_data.py \\
+        --data_dir eggnog_data \\
+        -y \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        eggnog-mapper: \$(echo \$(emapper.py --version) | grep -o "emapper-[0-9]\\+\\.[0-9]\\+\\.[0-9]\\+" | sed "s/emapper-//")
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    mkdir -p eggnog_data
+    touch eggnog_data/eggnog.db
+    touch eggnog_data/eggnog_proteins.dmnd
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        eggnog-mapper: \$(echo \$(emapper.py --version) | grep -o "emapper-[0-9]\\+\\.[0-9]\\+\\.[0-9]\\+" | sed "s/emapper-//")
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/eggnog_download_db/tests/main.nf.test b/modules/local/eggnog_download_db/tests/main.nf.test
@@ -0,0 +1,67 @@
+nextflow_process {
+
+    name "Test Process EGGNOG_DOWNLOAD_DB"
+    script "../main.nf"
+    process "EGGNOG_DOWNLOAD_DB"
+    tag "modules"
+    tag "modules_local"
+    tag "eggnog_download_db"
+
+    test("Should download eggNOG databases - stub") {
+        tag "stub"
+        tag "CI"
+
+        options '-stub'
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = true
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out,
+                    path(process.out.versions[0]).yaml
+                ).match() }
+            )
+        }
+    }
+
+    test("Should download eggNOG databases - real download (long running test)") {
+        tag "long_running"
+        tag "download"
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] = true
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.data_dir },
+                { assert path(process.out.data_dir).exists() },
+                { assert process.out.diamond_db },
+                { assert path(process.out.diamond_db).exists() },
+                { assert path(process.out.diamond_db).name.endsWith('.dmnd') },
+                { assert snapshot(
+                    path(process.out.versions[0]).yaml
+                ).match("versions") }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/eggnogmapper/environment.yml b/modules/nf-core/eggnogmapper/environment.yml
diff --git a/modules/nf-core/eggnogmapper/main.nf b/modules/nf-core/eggnogmapper/main.nf