diff --git a/.nf-core.yml b/.nf-core.yml index 99a3e1e..a1abaed 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -8,7 +8,7 @@ lint: - docs/images/nf-core-proteinannotator_logo_light.png - docs/images/nf-core-proteinannotator_logo_dark.png - .github/PULL_REQUEST_TEMPLATE.md -nf_core_version: 3.5.1 +nf_core_version: 3.5.2 repository_type: pipeline template: author: Olga Botvinnik, Evangelos Karatzas diff --git a/CHANGELOG.md b/CHANGELOG.md index 89b0098..202bd64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- [85](https://github.com/nf-core/proteinannotator/pull/85) - Added zenodo doi in `nextflow.config`. (by @vagkaratzas) +- [#87](https://github.com/nf-core/proteinannotator/pull/87) - Added the option to download and use the latest `NMPFams` HMM library (or use path to an existing one) for domain annotation. (by @npechl) +- [#85](https://github.com/nf-core/proteinannotator/pull/85) - Added zenodo doi in `nextflow.config`. (by @vagkaratzas) ### `Changed` -- [85](https://github.com/nf-core/proteinannotator/pull/85) - `test_full.config` input samplesheet path is now set properly. (by @vagkaratzas) +- [#85](https://github.com/nf-core/proteinannotator/pull/85) - `test_full.config` input samplesheet path is now set properly. (by @vagkaratzas) ## v1.0.0 - Yellow Saiga - [2026/02/09] diff --git a/README.md b/README.md index 2eef432..2b8f037 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) -[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.2) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -37,7 +37,7 @@ Generate input amino acid sequence statistics with ([`SeqFu`](https://github.com ### Annotate sequences 1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases - such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/) and [FunFam](https://download.cathdb.info/cath/releases/all-releases/) + such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams](https://pavlopoulos-lab.org/envofams/databases/hmmer/) 2. Functional annotation: - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics. 3. Predict secondary structure compositional features such as α-helices, β-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred)) diff --git a/conf/modules.config b/conf/modules.config index ec1428c..b5a5635 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -90,6 +90,14 @@ process { ] } + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:ARIA2_NMPFAMS' { + publishDir = [ + path: { "${params.outdir}/downloaded_dbs/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_PFAM' { ext.args = { "-E ${params.hmmsearch_evalue_cutoff}" } publishDir = [ @@ -110,6 +118,16 @@ process { ] } + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_NMPFAMS' { + ext.args = { "-E ${params.hmmsearch_evalue_cutoff}" } + publishDir = [ + path: { "${params.outdir}/domain_annotation/nmpfams/" }, + mode: params.publish_dir_mode, + pattern: "*.domtbl.gz", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:FUNCTIONAL_ANNOTATION:ARIA2' { publishDir = [ path: { "${params.outdir}/downloaded_dbs/" }, diff --git a/conf/test.config b/conf/test.config index 252ec87..9defa1c 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,8 +25,9 @@ params { // Input data input = params.pipelines_testdata_base_path + 'proteinannotator/samplesheet/samplesheet.csv' // Domain annotation - pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' - funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' + funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' diff --git a/conf/test_full.config b/conf/test_full.config index 94cd889..bcf1d96 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,8 +17,9 @@ params { // Input data for full size test input = params.pipelines_testdata_base_path + 'proteinannotator/samplesheet/samplesheet.csv' // Domain annotation - pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' - funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' + funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' diff --git a/docs/output.md b/docs/output.md index fcd3159..0e6387f 100644 --- a/docs/output.md +++ b/docs/output.md @@ -14,9 +14,9 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [SeqFu](#seqfu) for input amino acid sequences quality control (QC) - [SeqKit](#seqkit) for preprocessing input amino acid sequences (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) - [Database download](#database-download) Optionally download selected databases for annotation. - - [aria2](#aria2) - To optionally download the Pfam, FunFam, and/or InterProScan databases through the pipeline. + - [aria2](#aria2) - To optionally download the Pfam, FunFam, NMPFams and/or InterProScan databases through the pipeline. - [Domain annotation](#domain-annotation) Annotate proteins with domains from established repositories. - - [hmmer](#hmmer) - To optionally match the input sequence to known Pfam and/or FunFam domains through `hmmer/hmmsearch` + - [hmmer](#hmmer) - To optionally match the input sequence to known Pfam, FunFam and/or NMPFams domains through `hmmer/hmmsearch` - [Functional annotation](#functional-annotation) Annotate proteins with functional domains - [InterProScan](#Interproscan) - Search the InterProScan database for functional domains - [s4pred](#s4pred) - Predict secondary structures of sequences, producing amino acid level probabilities of forming an α-helix, a β-strand or a coil. @@ -72,10 +72,11 @@ The `seqkit` module is used for initial preprocessing (i.e., gap removal, conver - `Pfam-A*.hmm.gz`: (optional) The latest full, or a minimal test, Pfam-A HMM database that can be downloaded through the pipeline. - `interproscan_test.tar.gz`: (optional) the downloaded InterProScan archive of member databases according to the optional user-provided url - `funfam-hmm3-v4_3_0*.lib.gz`: (optional) The latest (v4_3_0) full, or a minimal test, FunFam HMM database that can be downloaded through the pipeline. + - `nmpfamsdb.hmm.gz`: (optional) The latest full, or a minimal test, NMPFams HMM database that can be downloaded through the pipeline. -If the `skip_*` flags (e.g., `skip_pfam`, `skip_funfam`, `skip_interproscan`) for each annotation database is set to `true`, or the `*_db` parameter paths (e.g., `pfam_db`, `funfam_db`, `interproscan_db`) are set (i.e., not `null`), or the run is resumed after a successful database download, then the respective database will not be (re)downloaded. The full database links can be found in the main `nextflow.config` file, while minimal test versions can be found in the `test` and `test_full` profiles (i.e., `conf/test.config`, `conf/test_full.config`). +If the `skip_*` flags (e.g., `skip_pfam`, `skip_funfam`, `skip_nmpfams`, `skip_interproscan`) for each annotation database is set to `true`, or the `*_db` parameter paths (e.g., `pfam_db`, `funfam_db`, `nmpfams_db`, `interproscan_db`) are set (i.e., not `null`), or the run is resumed after a successful database download, then the respective database will not be (re)downloaded. The full database links can be found in the main `nextflow.config` file, while minimal test versions can be found in the `test` and `test_full` profiles (i.e., `conf/test.config`, `conf/test_full.config`). [aria2](https://github.com/aria2/aria2/) is a lightweight multi-protocol & multi-source, cross platform download utility operated in command-line. It supports HTTP/HTTPS, FTP, SFTP, BitTorrent and Metalink. @@ -91,10 +92,12 @@ If the `skip_*` flags (e.g., `skip_pfam`, `skip_funfam`, `skip_interproscan`) fo - `.domtbl.gz`: `hmmer/hmmsearch` results along parameters info. - `funfam/` - `.domtbl.gz`: `hmmer/hmmsearch` results along parameters info. + - `nmpfams/` + - `.domtbl.gz`: `hmmer/hmmsearch` results along parameters info. -Each of the `domain_annotation/` subfolders (e.g., `pfam`, `funfam`) contain a `.domtbl.gz` annotation file per input sample, depending on which domain annotation databases were used in the pipeline execution. +Each of the `domain_annotation/` subfolders (e.g., `pfam`, `funfam`, `nmpfams`) contain a `.domtbl.gz` annotation file per input sample, depending on which domain annotation databases were used in the pipeline execution. [hmmer](https://github.com/EddyRivasLab/hmmer) is a fast and flexible alignment trimming tool that keeps phylogenetically informative sites and removes others. diff --git a/docs/usage.md b/docs/usage.md index 0945da0..72d53cc 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -7,7 +7,7 @@ ## Introduction **nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics and generates sequence-level annotations for amino acid sequences. -It takes a protein FASTA file as input and performs conserved domain annotation (using Pfam and FunFam HMM databases), functional annotation (using InterProScan), and secondary structure prediction (using s4pred). +It takes a protein FASTA file as input and performs conserved domain annotation (using Pfam, FunFam and NMPFams HMM databases), functional annotation (using InterProScan), and secondary structure prediction (using s4pred). Optionally, paths to pre-downloaded databases can be provided to skip the automatic download steps and speed up repeated runs. ## Samplesheet input diff --git a/main.nf b/main.nf index 98d7d67..f9286d5 100644 --- a/main.nf +++ b/main.nf @@ -46,6 +46,9 @@ workflow NFCORE_PROTEINANNOTATOR { params.skip_funfam, params.funfam_db, params.funfam_latest_link, + params.skip_nmpfams, + params.nmpfams_db, + params.nmpfams_latest_link, params.skip_interproscan, params.interproscan_db_url, params.interproscan_db, diff --git a/nextflow.config b/nextflow.config index 580ca1c..3b9086a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,6 +25,9 @@ params { skip_funfam = false funfam_db = null funfam_latest_link = "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz" + skip_nmpfams = false + nmpfams_db = null + nmpfams_latest_link = "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz" hmmsearch_evalue_cutoff = 0.001 // Functional annotation diff --git a/nextflow_schema.json b/nextflow_schema.json index b7ad6d8..46a08a6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -276,6 +276,23 @@ "default": "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz", "description": "CATH hosted link to the latest available (v4_3_0) FunFam HMM database file." }, + "skip_nmpfams": { + "type": "boolean", + "fa_icon": "fas fa-ban", + "description": "Skip the domain annotation with the NMPFams database.", + "help": "Skips the domain annotation of input sequence against a NMPFams database." + }, + "nmpfams_db": { + "type": "string", + "format": "file-path", + "description": "Path to an already installed NMPFams HMM database.", + "help_text": "If left null and skip_funfam is false, the pipeline will start downloading the latest FunFam HMM library." + }, + "nmpfams_latest_link": { + "type": "string", + "default": "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz", + "description": "" + }, "hmmsearch_evalue_cutoff": { "type": "number", "default": 0.001, diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 936e72a..5028582 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2026-02-09T13:54:13+00:00", - "description": "

\n \n \n \"nf-core/proteinannotator\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n

\n \n \n \"nf-core/proteinannotator\"\n \n

\n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/) and [FunFam](https://download.cathdb.info/cath/releases/all-releases/)\n2. Functional annotation:\n - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

\n \n \n \"nf-core/proteinannotator\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n

\n \n \n \"nf-core/proteinannotator\"\n \n

\n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams](https://pavlopoulos-lab.org/envofams/databases/hmmer/)\n2. Functional annotation:\n - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf index 1ec8289..467dbda 100644 --- a/subworkflows/local/domain_annotation/main.nf +++ b/subworkflows/local/domain_annotation/main.nf @@ -1,23 +1,29 @@ -include { ARIA2 as ARIA2_PFAM } from '../../../modules/nf-core/aria2/main' -include { ARIA2 as ARIA2_FUNFAM } from '../../../modules/nf-core/aria2/main' -include { HMMER_HMMSEARCH as HMMSEARCH_PFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' -include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { ARIA2 as ARIA2_PFAM } from '../../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_FUNFAM } from '../../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_NMPFAMS } from '../../../modules/nf-core/aria2/main' +include { HMMER_HMMSEARCH as HMMSEARCH_PFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { HMMER_HMMSEARCH as HMMSEARCH_NMPFAMS } from '../../../modules/nf-core/hmmer/hmmsearch/main' workflow DOMAIN_ANNOTATION { take: - ch_fasta // channel: [ val(meta), [ fasta ] ] - skip_pfam // boolean - pfam_db // string, path to the pfam HMM database, if already exists - pfam_latest_link // string, path to the latest pfam HMM database, to download - skip_funfam // boolean - funfam_db // string, path to the funfam HMM database, if already exists - funfam_latest_link // string, path to the latest funfam HMM database, to download + ch_fasta // channel: [ val(meta), [ fasta ] ] + skip_pfam // boolean + pfam_db // string, path to the pfam HMM database, if already exists + pfam_latest_link // string, path to the latest pfam HMM database, to download + skip_funfam // boolean + funfam_db // string, path to the funfam HMM database, if already exists + funfam_latest_link // string, path to the latest funfam HMM database, to download + skip_nmpfams // boolean + nmpfams_db // string + nmpfams_latest_link // string main: - ch_versions = channel.empty() - ch_pfam_domains = channel.empty() - ch_funfam_domains = channel.empty() + ch_versions = channel.empty() + ch_pfam_domains = channel.empty() + ch_funfam_domains = channel.empty() + ch_nmpfams_domains = channel.empty() if (!skip_pfam) { if (!pfam_db) { @@ -59,8 +65,29 @@ workflow DOMAIN_ANNOTATION { ch_funfam_domains = HMMSEARCH_FUNFAM.out.domain_summary } + if (!skip_nmpfams) { + if (!nmpfams_db) { + ch_nmpfams_link = channel.of([ [ id: 'nmpfams' ], nmpfams_latest_link ]) + + ARIA2_NMPFAMS( ch_nmpfams_link ) + ch_versions = ch_versions.mix( ARIA2_NMPFAMS.out.versions ) + ch_nmpfams_db = ARIA2_NMPFAMS.out.downloaded_file + } else { + ch_nmpfams_db = channel.of([ [ id: 'nmpfams' ], nmpfams_db ]) + } + + ch_input_for_hmmsearch_nmpfams = ch_fasta + .combine(ch_nmpfams_db) + .map{ meta, seqs, _meta2, models -> [meta, models, seqs, false, false, true] } + + HMMSEARCH_NMPFAMS( ch_input_for_hmmsearch_nmpfams ) + ch_versions = ch_versions.mix( HMMSEARCH_NMPFAMS.out.versions.first() ) + ch_nmpfams_domains = HMMSEARCH_NMPFAMS.out.domain_summary + } + emit: - pfam_domains = ch_pfam_domains - funfam_domains = ch_funfam_domains - versions = ch_versions + pfam_domains = ch_pfam_domains + funfam_domains = ch_funfam_domains + nmpfams_domains = ch_nmpfams_domains + versions = ch_versions } diff --git a/subworkflows/local/domain_annotation/meta.yml b/subworkflows/local/domain_annotation/meta.yml index e04e241..80f38ba 100644 --- a/subworkflows/local/domain_annotation/meta.yml +++ b/subworkflows/local/domain_annotation/meta.yml @@ -42,6 +42,18 @@ input: type: string description: | Path to the latest FunFam HMM database, to download + - skip_nmpfams: + type: boolean + description: | + Skip domain annotation with nmpfamsDB + - nmpfams_db: + type: string + description: | + Path to an existing HMM NPMFamDB library on the system. If provided, the ARIA2_nmpfams db download will be skipped. + - nmpfams_latest_link: + type: string + description: | + Path to the latest nmpfamsDB HMM database, to download output: - pfam_domains: type: file @@ -51,6 +63,10 @@ output: type: file description: | domtbl.gz files with funfam domain annotation for input amino acid sequences + - nmpfams_domains: + type: file + description: | + domtbl.gz files with nmpfams domain annotation for input amino acid sequences - versions: type: file description: | diff --git a/subworkflows/local/domain_annotation/tests/main.nf.test b/subworkflows/local/domain_annotation/tests/main.nf.test index c713051..18030f4 100644 --- a/subworkflows/local/domain_annotation/tests/main.nf.test +++ b/subworkflows/local/domain_annotation/tests/main.nf.test @@ -19,6 +19,9 @@ nextflow_workflow { input[4] = false // skip_funfam input[5] = null // funfam_db input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link + input[7] = true // skip_nmpfams + input[8] = null // nmpfams_db + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link """ } } @@ -50,6 +53,9 @@ nextflow_workflow { input[4] = true // skip_funfam input[5] = null // funfam_db input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link + input[7] = true // skip_nmpfams + input[8] = null // nmpfams_db + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link """ } } @@ -65,6 +71,39 @@ nextflow_workflow { } } + test("faa - nmpfams") { + + when { + workflow { + """ + input[0] = channel.of([ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + '/testdata/sequences/test_proteins.faa', checkIfExists: true) + ]) + input[1] = true // skip_pfam + input[2] = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' // pfam_db + input[3] = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' // pfam_latest_link + input[4] = true // skip_funfam + input[5] = null // funfam_db + input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link + input[7] = false // skip_nmpfams + input[8] = null // nmpfams_db + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + path(workflow.out.nmpfams_domains[0][1]).linesGzip[0..7], + workflow.out.versions.collect { path(it).yaml }.unique() + ).match()} + ) + } + } + test("faa - domain annotation - stub") { options "-stub" @@ -82,6 +121,9 @@ nextflow_workflow { input[4] = false // skip_funfam input[5] = null // funfam_db input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link + input[7] = false // skip_nmpfams + input[8] = null // nmpfams_db + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link """ } } diff --git a/subworkflows/local/domain_annotation/tests/main.nf.test.snap b/subworkflows/local/domain_annotation/tests/main.nf.test.snap index f1c925c..80ce69a 100644 --- a/subworkflows/local/domain_annotation/tests/main.nf.test.snap +++ b/subworkflows/local/domain_annotation/tests/main.nf.test.snap @@ -44,11 +44,42 @@ } ] ], + "timestamp": "2026-03-13T14:51:37.636657", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2025-12-05T08:44:26.478981734" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "faa - nmpfams": { + "content": [ + [ + "# --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord", + "# target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target", + "#------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------", + "T1026 - 172 F049289 - 90 1e-05 12.9 0.0 1 1 9.8e-06 2e-05 12.0 0.0 34 76 43 85 33 88 0.93 FBNSV, , 172 residues|", + "T1024 - 408 F075270 - 57 1.5e-06 15.3 2.4 1 3 0.00095 0.0019 5.3 0.0 6 25 72 91 68 98 0.85 LmrP, , 408 residues|", + "T1024 - 408 F075270 - 57 1.5e-06 15.3 2.4 2 3 0.36 0.71 -2.9 0.0 42 49 148 155 135 162 0.68 LmrP, , 408 residues|", + "T1024 - 408 F075270 - 57 1.5e-06 15.3 2.4 3 3 0.00017 0.00033 7.8 0.4 10 48 266 304 257 311 0.89 LmrP, , 408 residues|", + "T1024 - 408 F093539 - 93 3.1e-05 11.0 0.1 1 4 1.6e-05 3.1e-05 11.0 0.1 57 75 50 68 6 75 0.82 LmrP, , 408 residues|" + ], + [ + { + "DOMAIN_ANNOTATION:HMMSEARCH_NMPFAMS": { + "hmmer": 3.4 + } + }, + { + "DOMAIN_ANNOTATION:ARIA2_NMPFAMS": { + "aria2": "1.36.0" + } + } + ] + ], + "timestamp": "2026-03-13T14:53:51.847817", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "faa - pfam_db - skip_funfam": { "content": [ @@ -70,11 +101,11 @@ } ] ], + "timestamp": "2026-03-13T14:51:45.461466", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2025-12-05T10:10:22.057426358" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "faa - domain annotation - stub": { "content": [ @@ -96,8 +127,18 @@ ] ], "2": [ + [ + { + "id": "test" + }, + "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ "versions.yml:md5,160d4c5a5001cfb4ff57b94fc52b67d9", + "versions.yml:md5,1b7d208e42364fb87160693faa4e83b9", "versions.yml:md5,35e41735706132967dd94bb636833a4a", + "versions.yml:md5,9045f482d64e7666e62932b0578b665e", "versions.yml:md5,a74a0c8fcb741e59bc14424f612b8d09", "versions.yml:md5,f1d8a406d3dcb97a7c15e9c810926de1" ], @@ -109,6 +150,14 @@ "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "nmpfams_domains": [ + [ + { + "id": "test" + }, + "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "pfam_domains": [ [ { @@ -119,16 +168,18 @@ ], "versions": [ "versions.yml:md5,160d4c5a5001cfb4ff57b94fc52b67d9", + "versions.yml:md5,1b7d208e42364fb87160693faa4e83b9", "versions.yml:md5,35e41735706132967dd94bb636833a4a", + "versions.yml:md5,9045f482d64e7666e62932b0578b665e", "versions.yml:md5,a74a0c8fcb741e59bc14424f612b8d09", "versions.yml:md5,f1d8a406d3dcb97a7c15e9c810926de1" ] } ], + "timestamp": "2026-03-13T09:45:07.520815", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2025-12-05T08:44:37.015452047" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } } } \ No newline at end of file diff --git a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf index 1ba3ccc..f5b753a 100644 --- a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf @@ -180,7 +180,7 @@ def toolCitationText() { params.skip_preprocessing ? "" : "Input sequences were preprocessed with SeqKit (gap trimming, length filtering, validation, duplicate removal) (Shen et al. 2024)." ].join(' ').trim() - def domain_annotation_text = (params.skip_pfam && params.skip_funfam) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)." + def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_nmpfams) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)." def prediction_text = params.skip_s4pred ? "" : "Secondary structures were predicted via the s4pred software (Moffat et al. 2021)." @@ -202,7 +202,7 @@ def toolBibliographyText() { params.skip_preprocessing ? '' : '
  • Shen, W., Sipos, B., & Zhao, L. (2024). SeqKit2: A Swiss army knife for sequence and alignment processing. Imeta, 3(3), e191. doi: 10.1002/imt2.191
  • ' ].join(' ').trim() - def domain_annotation_text = (params.skip_pfam && params.skip_funfam) ? '' : '
  • Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: 10.1371/journal.pcbi.1002195
  • ' + def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_nmpfams) ? '' : '
  • Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: 10.1371/journal.pcbi.1002195
  • ' def prediction_text = params.skip_s4pred ? '' : '
  • Moffat, L., & Jones, D. T. (2021). Increasing the accuracy of single sequence prediction methods using a deep semi-supervised learning framework. Bioinformatics, 37(21), 3744-3751. doi: 10.1093/bioinformatics/btab491
  • ' diff --git a/tests/.nftignore b/tests/.nftignore index 153205a..0b6bd76 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -14,6 +14,9 @@ domain_annotation/pfam/l_arginase.domtbl.gz domain_annotation/funfam/T1024.domtbl.gz domain_annotation/funfam/T1026.domtbl.gz domain_annotation/funfam/l_arginase.domtbl.gz +domain_annotation/nmpfams/T1024.domtbl.gz +domain_annotation/nmpfams/T1026.domtbl.gz +domain_annotation/nmpfams/l_arginase.domtbl.gz functional_annotation/interproscan/T1024/T1024.gff3 functional_annotation/interproscan/T1024/T1024.tsv functional_annotation/interproscan/T1026/T1026.gff3 diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 527d241..10d5d5a 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test": { "content": [ - 32, + 36, { "ARIA2": { "aria2": "1.36.0" @@ -9,12 +9,18 @@ "ARIA2_FUNFAM": { "aria2": "1.36.0" }, + "ARIA2_NMPFAMS": { + "aria2": "1.36.0" + }, "ARIA2_PFAM": { "aria2": "1.36.0" }, "HMMSEARCH_FUNFAM": { "hmmer": 3.4 }, + "HMMSEARCH_NMPFAMS": { + "hmmer": 3.4 + }, "HMMSEARCH_PFAM": { "hmmer": 3.4 }, @@ -52,6 +58,10 @@ "domain_annotation/funfam/T1024.domtbl.gz", "domain_annotation/funfam/T1026.domtbl.gz", "domain_annotation/funfam/l_arginase.domtbl.gz", + "domain_annotation/nmpfams", + "domain_annotation/nmpfams/T1024.domtbl.gz", + "domain_annotation/nmpfams/T1026.domtbl.gz", + "domain_annotation/nmpfams/l_arginase.domtbl.gz", "domain_annotation/pfam", "domain_annotation/pfam/T1024.domtbl.gz", "domain_annotation/pfam/T1026.domtbl.gz", @@ -78,6 +88,7 @@ "downloaded_dbs/interproscan_db/tigrfam/15.0/TIGRFAMs_15.0_HMM.LIB", "downloaded_dbs/interproscan_db/tigrfam/15.0/TIGRFAMs_HMM.LIB", "downloaded_dbs/interproscan_test.tar.gz", + "downloaded_dbs/nmpfamsdb_test.hmm.gz", "functional_annotation", "functional_annotation/interproscan", "functional_annotation/interproscan/T1024", @@ -182,6 +193,7 @@ "TIGRFAMs_15.0_HMM.LIB:md5,64f2b2c9e834b47b17d91bb9a6a0067e", "TIGRFAMs_HMM.LIB:md5,543da3f4b65eed9ec393986c6c6ff0ba", "interproscan_test.tar.gz:md5,cde88c0cd841c84dc1203e64854c762b", + "nmpfamsdb_test.hmm.gz:md5,ad7a094618ccfdaeed1c03e93f6abf1e", "T1024.json:md5,0288f7551a14faedc409dd374b3e073e", "T1024.xml:md5,63a3db0eb0e1f76403411602c23b721e", "T1026.json:md5,5c2a40474b1cfb50cd043fe0be5e5d52", @@ -220,8 +232,8 @@ ], "meta": { "nf-test": "0.9.3", - "nextflow": "25.10.3" + "nextflow": "25.10.2" }, - "timestamp": "2026-02-04T12:43:32.273407057" + "timestamp": "2026-03-14T10:06:42.466898492" } } \ No newline at end of file diff --git a/workflows/proteinannotator.nf b/workflows/proteinannotator.nf index fae1d7a..b0032f2 100644 --- a/workflows/proteinannotator.nf +++ b/workflows/proteinannotator.nf @@ -29,6 +29,9 @@ workflow PROTEINANNOTATOR { skip_funfam // boolean funfam_db // string, path to the pfam HMM database, if already exists funfam_latest_link // string, path to the latest pfam HMM database, to download + skip_nmpfams // boolean + nmpfams_db // string + nmpfams_latest_link // string skip_interproscan // boolean interproscan_db_url // string, url to download db interproscan_db // string, existing db @@ -49,7 +52,10 @@ workflow PROTEINANNOTATOR { pfam_latest_link, skip_funfam, funfam_db, - funfam_latest_link + funfam_latest_link, + skip_nmpfams, + nmpfams_db, + nmpfams_latest_link ) ch_versions = ch_versions.mix( DOMAIN_ANNOTATION.out.versions )