From 88107cd27581a84c4658612b10199fad44762ebb Mon Sep 17 00:00:00 2001
From: vagkaratzas <vagelaros.gee@gmail.com>
Date: Mon, 9 Feb 2026 14:53:17 +0000
Subject: [PATCH 1/3] zenodo updated

---
 README.md              | 5 ++---
 ro-crate-metadata.json | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index fb552f8..2eef432 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
 
 [![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)
 [![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)
-[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
+[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)
 [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)
 
 [![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)
@@ -99,8 +99,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `#
 
 ## Citations
 
-<!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->
-<!-- If you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->
+If you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)
 
 An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.
 
diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json
index 51bb9a3..8058a02 100644
--- a/ro-crate-metadata.json
+++ b/ro-crate-metadata.json
@@ -23,7 +23,7 @@
             "@type": "Dataset",
             "creativeWorkStatus": "Stable",
             "datePublished": "2026-02-09T10:42:29+00:00",
-            "description": "<h1>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-proteinannotator_logo_dark.png\">\n    <img alt=\"nf-core/proteinannotator\" src=\"docs/images/nf-core-proteinannotator_logo_light.png\">\n  </picture>\n</h1>\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n<p>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/proteinannotator_metromap_dark.png\">\n    <img alt=\"nf-core/proteinannotator\" src=\"docs/images/proteinannotator_metromap_light.png\">\n  </picture>\n</p>\n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n   such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/) and [FunFam](https://download.cathdb.info/cath/releases/all-releases/)\n2. Functional annotation:\n   - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n   -profile <docker/singularity/.../institute> \\\n   --input samplesheet.csv \\\n   --outdir <OUTDIR>\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n<!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->\n<!-- If you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
+            "description": "<h1>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/nf-core-proteinannotator_logo_dark.png\">\n    <img alt=\"nf-core/proteinannotator\" src=\"docs/images/nf-core-proteinannotator_logo_light.png\">\n  </picture>\n</h1>\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n<p>\n  <picture>\n    <source media=\"(prefers-color-scheme: dark)\" srcset=\"docs/images/proteinannotator_metromap_dark.png\">\n    <img alt=\"nf-core/proteinannotator\" src=\"docs/images/proteinannotator_metromap_light.png\">\n  </picture>\n</p>\n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n   such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/) and [FunFam](https://download.cathdb.info/cath/releases/all-releases/)\n2. Functional annotation:\n   - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n   -profile <docker/singularity/.../institute> \\\n   --input samplesheet.csv \\\n   --outdir <OUTDIR>\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n",
             "hasPart": [
                 {
                     "@id": "main.nf"

From 2ad495f1cb19324660d4569380cffe17ab2ffc62 Mon Sep 17 00:00:00 2001
From: angelphanth <angel.phanth@hotmail.com>
Date: Thu, 12 Mar 2026 16:50:26 +0000
Subject: [PATCH 2/3] copying integration of pfam and funfam

---
 conf/modules.config                           | 22 ++++++++++++
 conf/test.config                              |  1 +
 main.nf                                       |  3 ++
 nextflow.config                               |  3 ++
 nextflow_schema.json                          | 17 +++++++++
 subworkflows/local/domain_annotation/main.nf  | 36 +++++++++++++++++++
 subworkflows/local/domain_annotation/meta.yml | 16 +++++++++
 .../main.nf                                   |  4 +--
 workflows/proteinannotator.nf                 |  8 ++++-
 9 files changed, 107 insertions(+), 3 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index ec1428c..9f40a6d 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -90,6 +90,17 @@ process {
         ]
     }
 
+    withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:WGET_MROOT' {
+        ext.prefix = "HMM"
+        ext.suffix = "tar.gz"
+        ext.args   = '--no-check-certificate' // explicitly naming output
+        publishDir = [
+            path: { "${params.outdir}/downloaded_dbs/" },
+            mode: params.publish_dir_mode,
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
     withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_PFAM' {
         ext.args   = { "-E ${params.hmmsearch_evalue_cutoff}" }
         publishDir = [
@@ -110,6 +121,17 @@ process {
         ]
     }
 
+    withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_MROOT' {
+        ext.args   = { "-E ${params.hmmsearch_evalue_cutoff}" }
+        publishDir = [
+            path: { "${params.outdir}/domain_annotation/mroot/" },
+            mode: params.publish_dir_mode,
+            pattern: "*.domtbl.gz",
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+        ]
+    }
+
+
     withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:FUNCTIONAL_ANNOTATION:ARIA2' {
         publishDir = [
             path: { "${params.outdir}/downloaded_dbs/" },
diff --git a/conf/test.config b/conf/test.config
index 252ec87..02c92be 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -27,6 +27,7 @@ params {
     // Domain annotation
     pfam_latest_link   = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz'
     funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz'
+    mroot_latest_link  =  'https://pavlopoulos-lab.org/metagroot/DownloadHmm'
     // Functional annotation
     interproscan_db_url       = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz'
     interproscan_applications = 'Hamap,TIGRFAM,sfld'
diff --git a/main.nf b/main.nf
index 98d7d67..d7f1972 100644
--- a/main.nf
+++ b/main.nf
@@ -46,6 +46,9 @@ workflow NFCORE_PROTEINANNOTATOR {
         params.skip_funfam,
         params.funfam_db,
         params.funfam_latest_link,
+        params.skip_mroot,
+        params.mroot_db,
+        params.mroot_latest_link,
         params.skip_interproscan,
         params.interproscan_db_url,
         params.interproscan_db,
diff --git a/nextflow.config b/nextflow.config
index e56f91f..f83d29d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -25,6 +25,9 @@ params {
     skip_funfam             = false
     funfam_db               = null
     funfam_latest_link      = "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz"
+    skip_mroot              = false
+    mroot_db                = null
+    mroot_latest_link       = "https://pavlopoulos-lab.org/metagroot/DownloadHmm"
     hmmsearch_evalue_cutoff = 0.001
 
     // Functional annotation
diff --git a/nextflow_schema.json b/nextflow_schema.json
index b7ad6d8..754cd6c 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -276,6 +276,23 @@
                     "default": "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz",
                     "description": "CATH hosted link to the latest available (v4_3_0) FunFam HMM database file."
                 },
+                "skip_mroot": {
+                    "type": "boolean",
+                    "fa_icon": "fas fa-ban",
+                    "description": "Skip the domain annotation with the MetagRoot database.",
+                    "help": "Skips the domain annotation of input sequence against a MetagRoot database."
+                },
+                "mroot_db": {
+                    "type": "string",
+                    "format": "file-path",
+                    "description": "Path to an already installed MetagRoot HMM database (.tar.gz).",
+                    "help_text": "If left null and skip_mroot is false, the pipeline will start downloading the latest MetagRoot HMM library."
+                },
+                "mroot_latest_link": {
+                    "type": "string",
+                    "default": "https://pavlopoulos-lab.org/metagroot/DownloadHmm",
+                    "description": "MetagRoot hosted link to the latest available MetagRoot HMM database file."
+                },
                 "hmmsearch_evalue_cutoff": {
                     "type": "number",
                     "default": 0.001,
diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf
index 1ec8289..4e76a56 100644
--- a/subworkflows/local/domain_annotation/main.nf
+++ b/subworkflows/local/domain_annotation/main.nf
@@ -1,7 +1,10 @@
 include { ARIA2 as ARIA2_PFAM                 } from '../../../modules/nf-core/aria2/main'
 include { ARIA2 as ARIA2_FUNFAM               } from '../../../modules/nf-core/aria2/main'
+include { WGET as WGET_MROOT                  } from '../../../modules/nf-core/wget/main'
 include { HMMER_HMMSEARCH as HMMSEARCH_PFAM   } from '../../../modules/nf-core/hmmer/hmmsearch/main'
 include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main'
+include { HMMER_HMMSEARCH as HMMSEARCH_MROOT  } from '../../../modules/nf-core/hmmer/hmmsearch/main'
+include { UNTAR as UNTAR_MROOT                } from '../../../modules/nf-core/untar/main'
 
 workflow DOMAIN_ANNOTATION {
     take:
@@ -12,12 +15,16 @@ workflow DOMAIN_ANNOTATION {
     skip_funfam         // boolean
     funfam_db           // string, path to the funfam HMM database, if already exists
     funfam_latest_link  // string, path to the latest funfam HMM database, to download
+    skip_mroot          // boolean
+    mroot_db            // string, path to the metagroot HMM database, if already exists
+    mroot_latest_link   // string, path to the latest metagroot HMM database, to download
 
     main:
 
     ch_versions       = channel.empty()
     ch_pfam_domains   = channel.empty()
     ch_funfam_domains = channel.empty()
+    ch_mroot_domains  = channel.empty()
 
     if (!skip_pfam) {
         if (!pfam_db) {
@@ -59,8 +66,37 @@ workflow DOMAIN_ANNOTATION {
         ch_funfam_domains = HMMSEARCH_FUNFAM.out.domain_summary
     }
 
+    if (!skip_mroot) {
+        if (!mroot_db) {
+            ch_mroot_link = channel.of([ [ id: 'mroot' ], mroot_latest_link ])
+            // download file from url
+            WGET_MROOT( ch_mroot_link )
+            // untar file if its a tar.gz
+            UNTAR_MROOT( WGET_MROOT.out.outfile )
+            // extract hmm files from dir
+            ch_mroot_db = UNTAR_MROOT.out.untar
+            .map {
+                meta, dir ->
+                // collect all .hmm files from dir
+                def hmm_files = file("${dir}/**/*.hmm")
+                tuple(meta, hmm_files)
+            }
+        } else {
+            ch_mroot_db = channel.of([ [ id: 'mroot' ], mroot_db ])
+        }
+
+        ch_input_for_hmmsearch_mroot = ch_fasta
+            .combine(ch_mroot_db)
+            .map{ meta, seqs, _meta2, models -> [meta, models, seqs, false, false, true] }
+
+        HMMSEARCH_MROOT( ch_input_for_hmmsearch_mroot )
+        ch_versions = ch_versions.mix( HMMSEARCH_MROOT.out.versions.first() )
+        ch_mroot_domains = HMMSEARCH_MROOT.out.domain_summary
+    }
+
     emit:
     pfam_domains   = ch_pfam_domains
     funfam_domains = ch_funfam_domains
+    mroot_domains  = ch_mroot_domains
     versions       = ch_versions
 }
diff --git a/subworkflows/local/domain_annotation/meta.yml b/subworkflows/local/domain_annotation/meta.yml
index e04e241..ad37de9 100644
--- a/subworkflows/local/domain_annotation/meta.yml
+++ b/subworkflows/local/domain_annotation/meta.yml
@@ -42,6 +42,18 @@ input:
       type: string
       description: |
         Path to the latest FunFam HMM database, to download
+  - skip_mroot:
+      type: boolean
+      description: |
+        Skip domain annotation with MetagRoot
+  - mroot_db:
+      type: string
+      description: |
+        Path to an existing HMM MetagRoot library on the system. If provided, the ARIA2_METAGROOT db download will be skipped.
+  - mroot_latest_link:
+      type: string
+      description: |
+        Path to the latest MetagRoot HMM database, to download
 output:
   - pfam_domains:
       type: file
@@ -51,6 +63,10 @@ output:
       type: file
       description: |
         domtbl.gz files with funfam domain annotation for input amino acid sequences
+  - mroot_domains:
+      type: file
+      description: |
+        domtbl.gz files with metagroot domain annotation for input amino acid sequences
   - versions:
       type: file
       description: |
diff --git a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf
index 1ba3ccc..7ef2d1a 100644
--- a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf
@@ -180,7 +180,7 @@ def toolCitationText() {
         params.skip_preprocessing ? "" : "Input sequences were preprocessed with SeqKit (gap trimming, length filtering, validation, duplicate removal) (Shen et al. 2024)."
     ].join(' ').trim()
 
-    def domain_annotation_text = (params.skip_pfam && params.skip_funfam) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)."
+    def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_mroot) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)."
 
     def prediction_text = params.skip_s4pred ? "" : "Secondary structures were predicted via the s4pred software (Moffat et al. 2021)."
 
@@ -202,7 +202,7 @@ def toolBibliographyText() {
         params.skip_preprocessing ? '' : '<li>Shen, W., Sipos, B., & Zhao, L. (2024). SeqKit2: A Swiss army knife for sequence and alignment processing. Imeta, 3(3), e191. doi: <a href="https://doi.org/10.1002/imt2.191">10.1002/imt2.191</a></li>'
     ].join(' ').trim()
 
-    def domain_annotation_text = (params.skip_pfam && params.skip_funfam) ? '' : '<li>Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: <a href="https://doi.org/10.1371/journal.pcbi.1002195">10.1371/journal.pcbi.1002195</a></li>'
+    def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_mroot) ? '' : '<li>Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: <a href="https://doi.org/10.1371/journal.pcbi.1002195">10.1371/journal.pcbi.1002195</a></li>'
 
     def prediction_text = params.skip_s4pred ? '' : '<li>Moffat, L., & Jones, D. T. (2021). Increasing the accuracy of single sequence prediction methods using a deep semi-supervised learning framework. Bioinformatics, 37(21), 3744-3751. doi: <a href="https://doi.org/10.1093/bioinformatics/btab491">10.1093/bioinformatics/btab491</a></li>'
 
diff --git a/workflows/proteinannotator.nf b/workflows/proteinannotator.nf
index fae1d7a..4398d69 100644
--- a/workflows/proteinannotator.nf
+++ b/workflows/proteinannotator.nf
@@ -29,6 +29,9 @@ workflow PROTEINANNOTATOR {
     skip_funfam         // boolean
     funfam_db           // string, path to the pfam HMM database, if already exists
     funfam_latest_link  // string, path to the latest pfam HMM database, to download
+    skip_mroot          // boolean
+    mroot_db            // string, path to the metagroot HMM database, if already exists
+    mroot_latest_link   // string, path to the latest metagroot HMM database, to download
     skip_interproscan   // boolean
     interproscan_db_url // string, url to download db
     interproscan_db     // string, existing db
@@ -49,7 +52,10 @@ workflow PROTEINANNOTATOR {
         pfam_latest_link,
         skip_funfam,
         funfam_db,
-        funfam_latest_link
+        funfam_latest_link,
+        skip_mroot,
+        mroot_db,
+        mroot_latest_link
     )
     ch_versions = ch_versions.mix( DOMAIN_ANNOTATION.out.versions )
 

From 63fd565dcbe9a311932fe8feff14a960bdaadc38 Mon Sep 17 00:00:00 2001
From: angelphanth <angel.phanth@hotmail.com>
Date: Thu, 12 Mar 2026 16:53:15 +0000
Subject: [PATCH 3/3] nf-core modules wget and untar as a workaround for aria2
 to get metagroot tar file?

---
 modules.json                                 |  5 ++
 modules/nf-core/wget/environment.yml         |  7 ++
 modules/nf-core/wget/main.nf                 | 48 ++++++++++++++
 modules/nf-core/wget/meta.yml                | 52 +++++++++++++++
 modules/nf-core/wget/tests/main.nf.test      | 62 +++++++++++++++++
 modules/nf-core/wget/tests/main.nf.test.snap | 70 ++++++++++++++++++++
 modules/nf-core/wget/tests/nextflow.config   |  6 ++
 7 files changed, 250 insertions(+)
 create mode 100644 modules/nf-core/wget/environment.yml
 create mode 100644 modules/nf-core/wget/main.nf
 create mode 100644 modules/nf-core/wget/meta.yml
 create mode 100644 modules/nf-core/wget/tests/main.nf.test
 create mode 100644 modules/nf-core/wget/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/wget/tests/nextflow.config

diff --git a/modules.json b/modules.json
index 37ba5b8..6a31b58 100644
--- a/modules.json
+++ b/modules.json
@@ -59,6 +59,11 @@
                         "branch": "master",
                         "git_sha": "447f7bc0fa41dfc2400c8cad4c0291880dc060cf",
                         "installed_by": ["modules"]
+                    },
+                    "wget": {
+                        "branch": "master",
+                        "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46",
+                        "installed_by": ["modules"]
                     }
                 }
             },
diff --git a/modules/nf-core/wget/environment.yml b/modules/nf-core/wget/environment.yml
new file mode 100644
index 0000000..9eb304e
--- /dev/null
+++ b/modules/nf-core/wget/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::wget=1.21.4
diff --git a/modules/nf-core/wget/main.nf b/modules/nf-core/wget/main.nf
new file mode 100644
index 0000000..9bc6f15
--- /dev/null
+++ b/modules/nf-core/wget/main.nf
@@ -0,0 +1,48 @@
+process WGET {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3b/3b54fa9135194c72a18d00db6b399c03248103f87e43ca75e4b50d61179994b3/data':
+        'community.wave.seqera.io/library/wget:1.21.4--8b0fcde81c17be5e' }"
+
+    input:
+    tuple val(meta), val(url)
+
+    output:
+    tuple val(meta), path("${prefix}.${suffix}"), emit: outfile
+    path "versions.yml"                         , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args   ?: ''
+    prefix   = task.ext.prefix ?: "${meta.id}"
+    suffix   = task.ext.suffix ?: 'html'
+    """
+    wget \\
+        -O - \\
+        $args \\
+        $url \\
+        > ${prefix}.${suffix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        wget: \$(wget --version | head -1 | cut -d ' ' -f 3)
+    END_VERSIONS
+    """
+
+    stub:
+    prefix   = task.ext.prefix ?: "${meta.id}"
+    suffix   = task.ext.suffix ?: 'html'
+    """
+    touch ${prefix}.${suffix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        wget: \$(wget --version | head -1 | cut -d ' ' -f 3)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/wget/meta.yml b/modules/nf-core/wget/meta.yml
new file mode 100644
index 0000000..56df0af
--- /dev/null
+++ b/modules/nf-core/wget/meta.yml
@@ -0,0 +1,52 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "wget"
+description: The non-interactive network downloader
+keywords:
+  - "wget"
+  - "download"
+  - "network"
+tools:
+  - "wget":
+      description: "wget is a free utility for non-interactive download of files from
+        the Web."
+      homepage: "https://www.gnu.org/software/wget/"
+      documentation: "https://www.gnu.org/software/wget/manual/wget.html"
+      licence: ["GPL"]
+      identifier: ""
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`
+    - url:
+        type: string
+        description: URL to download
+        pattern: "^https?://*.*"
+
+output:
+  outfile:
+    - - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - ${prefix}.${suffix}:
+          type: file
+          description: Downloaded file
+          pattern: "*.*"
+
+          ontologies: []
+  versions:
+    - versions.yml:
+        type: file
+        description: File containing software versions
+        pattern: "versions.yml"
+
+        ontologies:
+          - edam: http://edamontology.org/format_3750 # YAML
+authors:
+  - "@itrujnara"
+maintainers:
+  - "@itrujnara"
diff --git a/modules/nf-core/wget/tests/main.nf.test b/modules/nf-core/wget/tests/main.nf.test
new file mode 100644
index 0000000..e094288
--- /dev/null
+++ b/modules/nf-core/wget/tests/main.nf.test
@@ -0,0 +1,62 @@
+// nf-core modules test wget
+nextflow_process {
+
+    name "Test Process WGET"
+    script "../main.nf"
+    process "WGET"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "wget"
+
+    test("sarscov2 - gff") {
+
+        config "./nextflow.config"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/sarscov2/genome/genome.gff3",
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - gff - stub") {
+
+        options "-stub"
+
+        config "./nextflow.config"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/sarscov2/genome/genome.gff3",
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/wget/tests/main.nf.test.snap b/modules/nf-core/wget/tests/main.nf.test.snap
new file mode 100644
index 0000000..6c05160
--- /dev/null
+++ b/modules/nf-core/wget/tests/main.nf.test.snap
@@ -0,0 +1,70 @@
+{
+    "sarscov2 - gff": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.gff3:md5,357435a81a9981a0128e840ebe11051e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35"
+                ],
+                "outfile": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.gff3:md5,357435a81a9981a0128e840ebe11051e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.10.4"
+        },
+        "timestamp": "2025-03-26T12:27:32.67617"
+    },
+    "sarscov2 - gff - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35"
+                ],
+                "outfile": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,a747f72db5fc051f64676a0ba6f32f35"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.10.4"
+        },
+        "timestamp": "2025-03-26T12:21:06.414955"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/wget/tests/nextflow.config b/modules/nf-core/wget/tests/nextflow.config
new file mode 100644
index 0000000..236f4e1
--- /dev/null
+++ b/modules/nf-core/wget/tests/nextflow.config
@@ -0,0 +1,6 @@
+process {
+    withName: "WGET" {
+        ext.prefix = "test"
+        ext.suffix = "gff3"
+    }
+}