From 88107cd27581a84c4658612b10199fad44762ebb Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Mon, 9 Feb 2026 14:53:17 +0000 Subject: [PATCH 01/27] zenodo updated --- README.md | 5 ++--- ro-crate-metadata.json | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fb552f8..2eef432 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator) [![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml) -[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) +[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) @@ -99,8 +99,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations - - +If you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 51bb9a3..8058a02 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "Stable", "datePublished": "2026-02-09T10:42:29+00:00", - "description": "

\n \n \n \"nf-core/proteinannotator\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n

\n \n \n \"nf-core/proteinannotator\"\n \n

\n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/) and [FunFam](https://download.cathdb.info/cath/releases/all-releases/)\n2. Functional annotation:\n - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

\n \n \n \"nf-core/proteinannotator\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n

\n \n \n \"nf-core/proteinannotator\"\n \n

\n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/) and [FunFam](https://download.cathdb.info/cath/releases/all-releases/)\n2. Functional annotation:\n - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" From 60b847d80dc16936ec5665ed1473ca728c73e90b Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Wed, 11 Mar 2026 13:26:01 +0000 Subject: [PATCH 02/27] first commit --- main.nf | 3 + subworkflows/local/domain_annotation/main.nf | 61 ++++++++++++++------ workflows/proteinannotator.nf | 3 + 3 files changed, 50 insertions(+), 17 deletions(-) diff --git a/main.nf b/main.nf index 98d7d67..e1394d0 100644 --- a/main.nf +++ b/main.nf @@ -46,6 +46,9 @@ workflow NFCORE_PROTEINANNOTATOR { params.skip_funfam, params.funfam_db, params.funfam_latest_link, + params.skip_npmsfam, + params.npmsfam_db, + params.npmsfam_latest_link, params.skip_interproscan, params.interproscan_db_url, params.interproscan_db, diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf index 1ec8289..061016b 100644 --- a/subworkflows/local/domain_annotation/main.nf +++ b/subworkflows/local/domain_annotation/main.nf @@ -1,23 +1,29 @@ -include { ARIA2 as ARIA2_PFAM } from '../../../modules/nf-core/aria2/main' -include { ARIA2 as ARIA2_FUNFAM } from '../../../modules/nf-core/aria2/main' -include { HMMER_HMMSEARCH as HMMSEARCH_PFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' -include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { ARIA2 as ARIA2_PFAM } from '../../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_FUNFAM } from '../../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_NPMSFAM } from '../../../modules/nf-core/aria2/main' +include { HMMER_HMMSEARCH as HMMSEARCH_PFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { HMMER_HMMSEARCH as HMMSEARCH_NPMSFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' workflow DOMAIN_ANNOTATION { take: - ch_fasta // channel: [ val(meta), [ fasta ] ] - skip_pfam // boolean - pfam_db // string, path to the pfam HMM database, if already exists - pfam_latest_link // string, path to the latest pfam HMM database, to download - skip_funfam // boolean - funfam_db // string, path to the funfam HMM database, if already exists - funfam_latest_link // string, path to the latest funfam HMM database, to download + ch_fasta // channel: [ val(meta), [ fasta ] ] + skip_pfam // boolean + pfam_db // string, path to the pfam HMM database, if already exists + pfam_latest_link // string, path to the latest pfam HMM database, to download + skip_funfam // boolean + funfam_db // string, path to the funfam HMM database, if already exists + funfam_latest_link // string, path to the latest funfam HMM database, to download + skip_npmfam // boolean + npmsfam_db // string + npmsfam_latest_link // string main: - ch_versions = channel.empty() - ch_pfam_domains = channel.empty() - ch_funfam_domains = channel.empty() + ch_versions = channel.empty() + ch_pfam_domains = channel.empty() + ch_funfam_domains = channel.empty() + ch_npmsfam_domains = channel.empty() if (!skip_pfam) { if (!pfam_db) { @@ -59,8 +65,29 @@ workflow DOMAIN_ANNOTATION { ch_funfam_domains = HMMSEARCH_FUNFAM.out.domain_summary } + if (!skip_npmsfam) { + if (!npmsfam_db) { + ch_npmsfam_link = channel.of([ [ id: 'npmsfam' ], npmsfam_latest_link ]) + + ARIA2_NPMSFAM( ch_npmsfam_link ) + ch_versions = ch_versions.mix( ARIA2_NPMSFAM.out.versions ) + ch_npmsfam_db = ARIA2_NPMSFAM.out.downloaded_file + } else { + ch_npmsfam_db = channel.of([ [ id: 'npmsfam' ], npmsfam_db ]) + } + + ch_input_for_hmmsearch_npmsfam = ch_fasta + .combine(ch_npmsfam_db) + .map{ meta, seqs, _meta2, models -> [meta, models, seqs, false, false, true] } + + HMMSEARCH_NPMSFAM( ch_input_for_hmmsearch_npmsfam ) + ch_versions = ch_versions.mix( HMMSEARCH_NPMSFAM.out.versions.first() ) + ch_npmsfam_domains = HMMSEARCH_NPMSFAM.out.domain_summary + } + emit: - pfam_domains = ch_pfam_domains - funfam_domains = ch_funfam_domains - versions = ch_versions + pfam_domains = ch_pfam_domains + funfam_domains = ch_funfam_domains + npmsfam_domains = ch_npmsfam_domains + versions = ch_versions } diff --git a/workflows/proteinannotator.nf b/workflows/proteinannotator.nf index fae1d7a..95d8bf5 100644 --- a/workflows/proteinannotator.nf +++ b/workflows/proteinannotator.nf @@ -29,6 +29,9 @@ workflow PROTEINANNOTATOR { skip_funfam // boolean funfam_db // string, path to the pfam HMM database, if already exists funfam_latest_link // string, path to the latest pfam HMM database, to download + skip_npmsfam // boolean + npmsfam_db // string + npmsfam_latest_link // string skip_interproscan // boolean interproscan_db_url // string, url to download db interproscan_db // string, existing db From bec80f6a870f6f639fda72a7f7bbf99f04231985 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Wed, 11 Mar 2026 13:36:59 +0000 Subject: [PATCH 03/27] update configs --- nextflow.config | 3 +++ nextflow_schema.json | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/nextflow.config b/nextflow.config index e56f91f..ddb8ab0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,6 +25,9 @@ params { skip_funfam = false funfam_db = null funfam_latest_link = "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz" + skip_npmsfam = false + npmsfam_db = null + npmsfam_latest_link = "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz" hmmsearch_evalue_cutoff = 0.001 // Functional annotation diff --git a/nextflow_schema.json b/nextflow_schema.json index b7ad6d8..e0ea36c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -276,6 +276,23 @@ "default": "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz", "description": "CATH hosted link to the latest available (v4_3_0) FunFam HMM database file." }, + "skip_npmsfam": { + "type": "boolean", + "fa_icon": "fas fa-ban", + "description": "Skip the domain annotation with the NMPFams database.", + "help": "Skips the domain annotation of input sequence against a NMPFams database." + }, + "npmsfam_db": { + "type": "string", + "format": "file-path", + "description": "Path to an already installed NMPFams HMM database.", + "help_text": "If left null and skip_funfam is false, the pipeline will start downloading the latest FunFam HMM library." + }, + "npmsdfam_latest_link": { + "type": "string", + "default": "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz", + "description": "" + }, "hmmsearch_evalue_cutoff": { "type": "number", "default": 0.001, From 18d9b1600b42a8c030b448292fc08ac530b08fe1 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Wed, 11 Mar 2026 13:39:37 +0000 Subject: [PATCH 04/27] update utils --- .../local/utils_nfcore_proteinannotator_pipeline/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf index 1ba3ccc..bc8f735 100644 --- a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf @@ -180,7 +180,7 @@ def toolCitationText() { params.skip_preprocessing ? "" : "Input sequences were preprocessed with SeqKit (gap trimming, length filtering, validation, duplicate removal) (Shen et al. 2024)." ].join(' ').trim() - def domain_annotation_text = (params.skip_pfam && params.skip_funfam) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)." + def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_npmsfam) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)." def prediction_text = params.skip_s4pred ? "" : "Secondary structures were predicted via the s4pred software (Moffat et al. 2021)." @@ -202,7 +202,7 @@ def toolBibliographyText() { params.skip_preprocessing ? '' : '
  • Shen, W., Sipos, B., & Zhao, L. (2024). SeqKit2: A Swiss army knife for sequence and alignment processing. Imeta, 3(3), e191. doi: 10.1002/imt2.191
  • ' ].join(' ').trim() - def domain_annotation_text = (params.skip_pfam && params.skip_funfam) ? '' : '
  • Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: 10.1371/journal.pcbi.1002195
  • ' + def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_npmsfam) ? '' : '
  • Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: 10.1371/journal.pcbi.1002195
  • ' def prediction_text = params.skip_s4pred ? '' : '
  • Moffat, L., & Jones, D. T. (2021). Increasing the accuracy of single sequence prediction methods using a deep semi-supervised learning framework. Bioinformatics, 37(21), 3744-3751. doi: 10.1093/bioinformatics/btab491
  • ' From 7dc19a6b02d900db7d1f72c9346f1d2252c8a256 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Wed, 11 Mar 2026 13:45:17 +0000 Subject: [PATCH 05/27] fix link --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index e0ea36c..f6ffa0d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -290,7 +290,7 @@ }, "npmsdfam_latest_link": { "type": "string", - "default": "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz", + "default": "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz", "description": "" }, "hmmsearch_evalue_cutoff": { From 3a50b2199296d5c7fac5c38a3602f5796e71d634 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Wed, 11 Mar 2026 14:36:58 +0000 Subject: [PATCH 06/27] update domain_annotation meta --- subworkflows/local/domain_annotation/meta.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/subworkflows/local/domain_annotation/meta.yml b/subworkflows/local/domain_annotation/meta.yml index e04e241..a3ded55 100644 --- a/subworkflows/local/domain_annotation/meta.yml +++ b/subworkflows/local/domain_annotation/meta.yml @@ -42,6 +42,18 @@ input: type: string description: | Path to the latest FunFam HMM database, to download + - skip_npmsfam: + type: boolean + description: | + Skip domain annotation with NPMFamsDB + - npmsfam_db: + type: string + description: | + Path to an existing HMM NPMFamDB library on the system. If provided, the ARIA2_NPMSFAM db download will be skipped. + - npmsfam_latest_link: + type: string + description: | + Path to the latest NPMFamsDB HMM database, to download output: - pfam_domains: type: file @@ -51,6 +63,10 @@ output: type: file description: | domtbl.gz files with funfam domain annotation for input amino acid sequences + - npmsfam_domains: + type: file + description: | + domtbl.gz files with npmsfam domain annotation for input amino acid sequences - versions: type: file description: | From 80758f5d6c4a34cadefa687a99c62972bab34701 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Wed, 11 Mar 2026 14:47:29 +0000 Subject: [PATCH 07/27] update main workflow --- workflows/proteinannotator.nf | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/workflows/proteinannotator.nf b/workflows/proteinannotator.nf index 95d8bf5..e1193aa 100644 --- a/workflows/proteinannotator.nf +++ b/workflows/proteinannotator.nf @@ -52,7 +52,10 @@ workflow PROTEINANNOTATOR { pfam_latest_link, skip_funfam, funfam_db, - funfam_latest_link + funfam_latest_link, + skip_npmsfam, + npmsfam_db, + npmsfam_latest_link, ) ch_versions = ch_versions.mix( DOMAIN_ANNOTATION.out.versions ) From acef4be9bebe8a8f589e085d734faed88cf65d8b Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Wed, 11 Mar 2026 15:43:30 +0000 Subject: [PATCH 08/27] fix typos --- subworkflows/local/domain_annotation/main.nf | 2 +- workflows/proteinannotator.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf index 061016b..4f96396 100644 --- a/subworkflows/local/domain_annotation/main.nf +++ b/subworkflows/local/domain_annotation/main.nf @@ -14,7 +14,7 @@ workflow DOMAIN_ANNOTATION { skip_funfam // boolean funfam_db // string, path to the funfam HMM database, if already exists funfam_latest_link // string, path to the latest funfam HMM database, to download - skip_npmfam // boolean + skip_npmsfam // boolean npmsfam_db // string npmsfam_latest_link // string diff --git a/workflows/proteinannotator.nf b/workflows/proteinannotator.nf index e1193aa..241ca4c 100644 --- a/workflows/proteinannotator.nf +++ b/workflows/proteinannotator.nf @@ -55,7 +55,7 @@ workflow PROTEINANNOTATOR { funfam_latest_link, skip_npmsfam, npmsfam_db, - npmsfam_latest_link, + npmsfam_latest_link ) ch_versions = ch_versions.mix( DOMAIN_ANNOTATION.out.versions ) From f70acaaa7d990c7c05ec981443da53b0de8e2ccd Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Thu, 12 Mar 2026 13:43:33 +0000 Subject: [PATCH 09/27] include testing --- conf/test.config | 1 + conf/test_full.config | 1 + 2 files changed, 2 insertions(+) diff --git a/conf/test.config b/conf/test.config index 252ec87..206f5b8 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,6 +27,7 @@ params { // Domain annotation pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + npmsfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/npmsfam/npmsfam_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' diff --git a/conf/test_full.config b/conf/test_full.config index bfb05f7..ec3ba7f 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -19,6 +19,7 @@ params { // Domain annotation pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + npmsfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/npmsfam/npmsfam_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' From 21e5436fdb3979fb9bac699dbcb8f15fb8b7c5e0 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Thu, 12 Mar 2026 13:57:38 +0000 Subject: [PATCH 10/27] fix naming --- conf/test.config | 2 +- conf/test_full.config | 2 +- main.nf | 6 +-- nextflow.config | 6 +-- nextflow_schema.json | 4 +- subworkflows/local/domain_annotation/main.nf | 38 +++++++++---------- subworkflows/local/domain_annotation/meta.yml | 12 +++--- .../main.nf | 4 +- workflows/proteinannotator.nf | 12 +++--- 9 files changed, 43 insertions(+), 43 deletions(-) diff --git a/conf/test.config b/conf/test.config index 206f5b8..21ea453 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,7 +27,7 @@ params { // Domain annotation pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' - npmsfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/npmsfam/npmsfam_test.hmm.gz' + npmfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/npmfams/npmfams_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' diff --git a/conf/test_full.config b/conf/test_full.config index ec3ba7f..44f0050 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -19,7 +19,7 @@ params { // Domain annotation pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' - npmsfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/npmsfam/npmsfam_test.hmm.gz' + npmfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/npmfams/npmfams_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' diff --git a/main.nf b/main.nf index e1394d0..a7a0a20 100644 --- a/main.nf +++ b/main.nf @@ -46,9 +46,9 @@ workflow NFCORE_PROTEINANNOTATOR { params.skip_funfam, params.funfam_db, params.funfam_latest_link, - params.skip_npmsfam, - params.npmsfam_db, - params.npmsfam_latest_link, + params.skip_npmfams, + params.npmfams_db, + params.npmfams_latest_link, params.skip_interproscan, params.interproscan_db_url, params.interproscan_db, diff --git a/nextflow.config b/nextflow.config index ddb8ab0..e0ea425 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,9 +25,9 @@ params { skip_funfam = false funfam_db = null funfam_latest_link = "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz" - skip_npmsfam = false - npmsfam_db = null - npmsfam_latest_link = "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz" + skip_npmfams = false + npmfams_db = null + npmfams_latest_link = "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz" hmmsearch_evalue_cutoff = 0.001 // Functional annotation diff --git a/nextflow_schema.json b/nextflow_schema.json index f6ffa0d..1355cca 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -276,13 +276,13 @@ "default": "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz", "description": "CATH hosted link to the latest available (v4_3_0) FunFam HMM database file." }, - "skip_npmsfam": { + "skip_npmfams": { "type": "boolean", "fa_icon": "fas fa-ban", "description": "Skip the domain annotation with the NMPFams database.", "help": "Skips the domain annotation of input sequence against a NMPFams database." }, - "npmsfam_db": { + "npmfams_db": { "type": "string", "format": "file-path", "description": "Path to an already installed NMPFams HMM database.", diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf index 4f96396..8cd9695 100644 --- a/subworkflows/local/domain_annotation/main.nf +++ b/subworkflows/local/domain_annotation/main.nf @@ -1,9 +1,9 @@ include { ARIA2 as ARIA2_PFAM } from '../../../modules/nf-core/aria2/main' include { ARIA2 as ARIA2_FUNFAM } from '../../../modules/nf-core/aria2/main' -include { ARIA2 as ARIA2_NPMSFAM } from '../../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_npmfams } from '../../../modules/nf-core/aria2/main' include { HMMER_HMMSEARCH as HMMSEARCH_PFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' -include { HMMER_HMMSEARCH as HMMSEARCH_NPMSFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { HMMER_HMMSEARCH as HMMSEARCH_npmfams } from '../../../modules/nf-core/hmmer/hmmsearch/main' workflow DOMAIN_ANNOTATION { take: @@ -14,16 +14,16 @@ workflow DOMAIN_ANNOTATION { skip_funfam // boolean funfam_db // string, path to the funfam HMM database, if already exists funfam_latest_link // string, path to the latest funfam HMM database, to download - skip_npmsfam // boolean - npmsfam_db // string - npmsfam_latest_link // string + skip_npmfams // boolean + npmfams_db // string + npmfams_latest_link // string main: ch_versions = channel.empty() ch_pfam_domains = channel.empty() ch_funfam_domains = channel.empty() - ch_npmsfam_domains = channel.empty() + ch_npmfams_domains = channel.empty() if (!skip_pfam) { if (!pfam_db) { @@ -65,29 +65,29 @@ workflow DOMAIN_ANNOTATION { ch_funfam_domains = HMMSEARCH_FUNFAM.out.domain_summary } - if (!skip_npmsfam) { - if (!npmsfam_db) { - ch_npmsfam_link = channel.of([ [ id: 'npmsfam' ], npmsfam_latest_link ]) + if (!skip_npmfams) { + if (!npmfams_db) { + ch_npmfams_link = channel.of([ [ id: 'npmfams' ], npmfams_latest_link ]) - ARIA2_NPMSFAM( ch_npmsfam_link ) - ch_versions = ch_versions.mix( ARIA2_NPMSFAM.out.versions ) - ch_npmsfam_db = ARIA2_NPMSFAM.out.downloaded_file + ARIA2_npmfams( ch_npmfams_link ) + ch_versions = ch_versions.mix( ARIA2_npmfams.out.versions ) + ch_npmfams_db = ARIA2_npmfams.out.downloaded_file } else { - ch_npmsfam_db = channel.of([ [ id: 'npmsfam' ], npmsfam_db ]) + ch_npmfams_db = channel.of([ [ id: 'npmfams' ], npmfams_db ]) } - ch_input_for_hmmsearch_npmsfam = ch_fasta - .combine(ch_npmsfam_db) + ch_input_for_hmmsearch_npmfams = ch_fasta + .combine(ch_npmfams_db) .map{ meta, seqs, _meta2, models -> [meta, models, seqs, false, false, true] } - HMMSEARCH_NPMSFAM( ch_input_for_hmmsearch_npmsfam ) - ch_versions = ch_versions.mix( HMMSEARCH_NPMSFAM.out.versions.first() ) - ch_npmsfam_domains = HMMSEARCH_NPMSFAM.out.domain_summary + HMMSEARCH_npmfams( ch_input_for_hmmsearch_npmfams ) + ch_versions = ch_versions.mix( HMMSEARCH_npmfams.out.versions.first() ) + ch_npmfams_domains = HMMSEARCH_npmfams.out.domain_summary } emit: pfam_domains = ch_pfam_domains funfam_domains = ch_funfam_domains - npmsfam_domains = ch_npmsfam_domains + npmfams_domains = ch_npmfams_domains versions = ch_versions } diff --git a/subworkflows/local/domain_annotation/meta.yml b/subworkflows/local/domain_annotation/meta.yml index a3ded55..3e9268c 100644 --- a/subworkflows/local/domain_annotation/meta.yml +++ b/subworkflows/local/domain_annotation/meta.yml @@ -42,15 +42,15 @@ input: type: string description: | Path to the latest FunFam HMM database, to download - - skip_npmsfam: + - skip_npmfams: type: boolean description: | Skip domain annotation with NPMFamsDB - - npmsfam_db: + - npmfams_db: type: string description: | - Path to an existing HMM NPMFamDB library on the system. If provided, the ARIA2_NPMSFAM db download will be skipped. - - npmsfam_latest_link: + Path to an existing HMM NPMFamDB library on the system. If provided, the ARIA2_npmfams db download will be skipped. + - npmfams_latest_link: type: string description: | Path to the latest NPMFamsDB HMM database, to download @@ -63,10 +63,10 @@ output: type: file description: | domtbl.gz files with funfam domain annotation for input amino acid sequences - - npmsfam_domains: + - npmfams_domains: type: file description: | - domtbl.gz files with npmsfam domain annotation for input amino acid sequences + domtbl.gz files with npmfams domain annotation for input amino acid sequences - versions: type: file description: | diff --git a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf index bc8f735..316df5b 100644 --- a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf @@ -180,7 +180,7 @@ def toolCitationText() { params.skip_preprocessing ? "" : "Input sequences were preprocessed with SeqKit (gap trimming, length filtering, validation, duplicate removal) (Shen et al. 2024)." ].join(' ').trim() - def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_npmsfam) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)." + def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_npmfams) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)." def prediction_text = params.skip_s4pred ? "" : "Secondary structures were predicted via the s4pred software (Moffat et al. 2021)." @@ -202,7 +202,7 @@ def toolBibliographyText() { params.skip_preprocessing ? '' : '
  • Shen, W., Sipos, B., & Zhao, L. (2024). SeqKit2: A Swiss army knife for sequence and alignment processing. Imeta, 3(3), e191. doi: 10.1002/imt2.191
  • ' ].join(' ').trim() - def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_npmsfam) ? '' : '
  • Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: 10.1371/journal.pcbi.1002195
  • ' + def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_npmfams) ? '' : '
  • Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: 10.1371/journal.pcbi.1002195
  • ' def prediction_text = params.skip_s4pred ? '' : '
  • Moffat, L., & Jones, D. T. (2021). Increasing the accuracy of single sequence prediction methods using a deep semi-supervised learning framework. Bioinformatics, 37(21), 3744-3751. doi: 10.1093/bioinformatics/btab491
  • ' diff --git a/workflows/proteinannotator.nf b/workflows/proteinannotator.nf index 241ca4c..451db79 100644 --- a/workflows/proteinannotator.nf +++ b/workflows/proteinannotator.nf @@ -29,9 +29,9 @@ workflow PROTEINANNOTATOR { skip_funfam // boolean funfam_db // string, path to the pfam HMM database, if already exists funfam_latest_link // string, path to the latest pfam HMM database, to download - skip_npmsfam // boolean - npmsfam_db // string - npmsfam_latest_link // string + skip_npmfams // boolean + npmfams_db // string + npmfams_latest_link // string skip_interproscan // boolean interproscan_db_url // string, url to download db interproscan_db // string, existing db @@ -53,9 +53,9 @@ workflow PROTEINANNOTATOR { skip_funfam, funfam_db, funfam_latest_link, - skip_npmsfam, - npmsfam_db, - npmsfam_latest_link + skip_npmfams, + npmfams_db, + npmfams_latest_link ) ch_versions = ch_versions.mix( DOMAIN_ANNOTATION.out.versions ) From 01310a003199839b47abdad537e79e8b3549feb6 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Thu, 12 Mar 2026 14:43:52 +0000 Subject: [PATCH 11/27] fix naming --- conf/test.config | 10 ++--- conf/test_full.config | 10 ++--- main.nf | 6 +-- nextflow.config | 6 +-- nextflow_schema.json | 4 +- subworkflows/local/domain_annotation/main.nf | 38 +++++++++---------- subworkflows/local/domain_annotation/meta.yml | 16 ++++---- .../domain_annotation/tests/main.nf.test | 11 ++++++ .../main.nf | 4 +- tests/nextflow.config | 2 +- workflows/proteinannotator.nf | 12 +++--- 11 files changed, 65 insertions(+), 54 deletions(-) diff --git a/conf/test.config b/conf/test.config index 21ea453..e0b99c5 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,12 +23,12 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - input = params.pipelines_testdata_base_path + 'proteinannotator/samplesheet/samplesheet.csv' + input = params.pipelines_testdata_base_path + '/samplesheet/samplesheet.csv' // Domain annotation - pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' - funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' - npmfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/npmfams/npmfams_test.hmm.gz' + pfam_latest_link = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' + funfam_latest_link = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + nmpfams_latest_link = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfams_test.hmm.gz' // Functional annotation - interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz' + interproscan_db_url = params.pipelines_testdata_base_path + '/testdata/interproscan/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' } diff --git a/conf/test_full.config b/conf/test_full.config index 3e7000c..931aeac 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,12 +15,12 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - input = params.pipelines_testdata_base_path + 'proteinannotator/samplesheet/samplesheet.csv' + input = params.pipelines_testdata_base_path + '/samplesheet/samplesheet.csv' // Domain annotation - pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' - funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' - npmfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/npmfams/npmfams_test.hmm.gz' + pfam_latest_link = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' + funfam_latest_link = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + nmpfams_latest_link = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfams_test.hmm.gz' // Functional annotation - interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan_test.tar.gz' + interproscan_db_url = params.pipelines_testdata_base_path + '/testdata/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' } diff --git a/main.nf b/main.nf index a7a0a20..f9286d5 100644 --- a/main.nf +++ b/main.nf @@ -46,9 +46,9 @@ workflow NFCORE_PROTEINANNOTATOR { params.skip_funfam, params.funfam_db, params.funfam_latest_link, - params.skip_npmfams, - params.npmfams_db, - params.npmfams_latest_link, + params.skip_nmpfams, + params.nmpfams_db, + params.nmpfams_latest_link, params.skip_interproscan, params.interproscan_db_url, params.interproscan_db, diff --git a/nextflow.config b/nextflow.config index c537197..3b9086a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -25,9 +25,9 @@ params { skip_funfam = false funfam_db = null funfam_latest_link = "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz" - skip_npmfams = false - npmfams_db = null - npmfams_latest_link = "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz" + skip_nmpfams = false + nmpfams_db = null + nmpfams_latest_link = "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz" hmmsearch_evalue_cutoff = 0.001 // Functional annotation diff --git a/nextflow_schema.json b/nextflow_schema.json index 1355cca..d549451 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -276,13 +276,13 @@ "default": "https://download.cathdb.info/cath/releases/all-releases/v4_3_0/sequence-data/funfam-hmm3-v4_3_0.lib.gz", "description": "CATH hosted link to the latest available (v4_3_0) FunFam HMM database file." }, - "skip_npmfams": { + "skip_nmpfams": { "type": "boolean", "fa_icon": "fas fa-ban", "description": "Skip the domain annotation with the NMPFams database.", "help": "Skips the domain annotation of input sequence against a NMPFams database." }, - "npmfams_db": { + "nmpfams_db": { "type": "string", "format": "file-path", "description": "Path to an already installed NMPFams HMM database.", diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf index 8cd9695..3861f1d 100644 --- a/subworkflows/local/domain_annotation/main.nf +++ b/subworkflows/local/domain_annotation/main.nf @@ -1,9 +1,9 @@ include { ARIA2 as ARIA2_PFAM } from '../../../modules/nf-core/aria2/main' include { ARIA2 as ARIA2_FUNFAM } from '../../../modules/nf-core/aria2/main' -include { ARIA2 as ARIA2_npmfams } from '../../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_nmpfams } from '../../../modules/nf-core/aria2/main' include { HMMER_HMMSEARCH as HMMSEARCH_PFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' -include { HMMER_HMMSEARCH as HMMSEARCH_npmfams } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { HMMER_HMMSEARCH as HMMSEARCH_nmpfams } from '../../../modules/nf-core/hmmer/hmmsearch/main' workflow DOMAIN_ANNOTATION { take: @@ -14,16 +14,16 @@ workflow DOMAIN_ANNOTATION { skip_funfam // boolean funfam_db // string, path to the funfam HMM database, if already exists funfam_latest_link // string, path to the latest funfam HMM database, to download - skip_npmfams // boolean - npmfams_db // string - npmfams_latest_link // string + skip_nmpfams // boolean + nmpfams_db // string + nmpfams_latest_link // string main: ch_versions = channel.empty() ch_pfam_domains = channel.empty() ch_funfam_domains = channel.empty() - ch_npmfams_domains = channel.empty() + ch_nmpfams_domains = channel.empty() if (!skip_pfam) { if (!pfam_db) { @@ -65,29 +65,29 @@ workflow DOMAIN_ANNOTATION { ch_funfam_domains = HMMSEARCH_FUNFAM.out.domain_summary } - if (!skip_npmfams) { - if (!npmfams_db) { - ch_npmfams_link = channel.of([ [ id: 'npmfams' ], npmfams_latest_link ]) + if (!skip_nmpfams) { + if (!nmpfams_db) { + ch_nmpfams_link = channel.of([ [ id: 'nmpfams' ], nmpfams_latest_link ]) - ARIA2_npmfams( ch_npmfams_link ) - ch_versions = ch_versions.mix( ARIA2_npmfams.out.versions ) - ch_npmfams_db = ARIA2_npmfams.out.downloaded_file + ARIA2_nmpfams( ch_nmpfams_link ) + ch_versions = ch_versions.mix( ARIA2_nmpfams.out.versions ) + ch_nmpfams_db = ARIA2_nmpfams.out.downloaded_file } else { - ch_npmfams_db = channel.of([ [ id: 'npmfams' ], npmfams_db ]) + ch_nmpfams_db = channel.of([ [ id: 'nmpfams' ], nmpfams_db ]) } - ch_input_for_hmmsearch_npmfams = ch_fasta - .combine(ch_npmfams_db) + ch_input_for_hmmsearch_nmpfams = ch_fasta + .combine(ch_nmpfams_db) .map{ meta, seqs, _meta2, models -> [meta, models, seqs, false, false, true] } - HMMSEARCH_npmfams( ch_input_for_hmmsearch_npmfams ) - ch_versions = ch_versions.mix( HMMSEARCH_npmfams.out.versions.first() ) - ch_npmfams_domains = HMMSEARCH_npmfams.out.domain_summary + HMMSEARCH_nmpfams( ch_input_for_hmmsearch_nmpfams ) + ch_versions = ch_versions.mix( HMMSEARCH_nmpfams.out.versions.first() ) + ch_nmpfams_domains = HMMSEARCH_nmpfams.out.domain_summary } emit: pfam_domains = ch_pfam_domains funfam_domains = ch_funfam_domains - npmfams_domains = ch_npmfams_domains + nmpfams_domains = ch_nmpfams_domains versions = ch_versions } diff --git a/subworkflows/local/domain_annotation/meta.yml b/subworkflows/local/domain_annotation/meta.yml index 3e9268c..80f38ba 100644 --- a/subworkflows/local/domain_annotation/meta.yml +++ b/subworkflows/local/domain_annotation/meta.yml @@ -42,18 +42,18 @@ input: type: string description: | Path to the latest FunFam HMM database, to download - - skip_npmfams: + - skip_nmpfams: type: boolean description: | - Skip domain annotation with NPMFamsDB - - npmfams_db: + Skip domain annotation with nmpfamsDB + - nmpfams_db: type: string description: | - Path to an existing HMM NPMFamDB library on the system. If provided, the ARIA2_npmfams db download will be skipped. - - npmfams_latest_link: + Path to an existing HMM NPMFamDB library on the system. If provided, the ARIA2_nmpfams db download will be skipped. + - nmpfams_latest_link: type: string description: | - Path to the latest NPMFamsDB HMM database, to download + Path to the latest nmpfamsDB HMM database, to download output: - pfam_domains: type: file @@ -63,10 +63,10 @@ output: type: file description: | domtbl.gz files with funfam domain annotation for input amino acid sequences - - npmfams_domains: + - nmpfams_domains: type: file description: | - domtbl.gz files with npmfams domain annotation for input amino acid sequences + domtbl.gz files with nmpfams domain annotation for input amino acid sequences - versions: type: file description: | diff --git a/subworkflows/local/domain_annotation/tests/main.nf.test b/subworkflows/local/domain_annotation/tests/main.nf.test index c713051..7397d4d 100644 --- a/subworkflows/local/domain_annotation/tests/main.nf.test +++ b/subworkflows/local/domain_annotation/tests/main.nf.test @@ -19,6 +19,9 @@ nextflow_workflow { input[4] = false // skip_funfam input[5] = null // funfam_db input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link + input[7] = false // skip_nmpfams + input[8] = null // nmpfams_db + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfams_test.hmm.gz' // nmpfams_latest_link """ } } @@ -29,6 +32,7 @@ nextflow_workflow { { assert snapshot( path(workflow.out.pfam_domains[0][1]).linesGzip[0..7], path(workflow.out.funfam_domains[0][1]).linesGzip[0..7], + path(workflow.out.nmpfams_domains[0][1]).linesGzip[0..7], workflow.out.versions.collect { path(it).yaml }.unique() ).match()} ) @@ -50,6 +54,9 @@ nextflow_workflow { input[4] = true // skip_funfam input[5] = null // funfam_db input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link + input[7] = false // skip_nmpfams + input[8] = null // nmpfams_db + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfams_test.hmm.gz' // nmpfams_latest_link """ } } @@ -59,6 +66,7 @@ nextflow_workflow { { assert workflow.success}, { assert snapshot( path(workflow.out.pfam_domains[0][1]).linesGzip[0..7], + path(workflow.out.nmpfams_domains[0][1]).linesGzip[0..7], workflow.out.versions.collect { path(it).yaml }.unique() ).match()} ) @@ -82,6 +90,9 @@ nextflow_workflow { input[4] = false // skip_funfam input[5] = null // funfam_db input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link + input[7] = false // skip_nmpfams + input[8] = null // nmpfams_db + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfams_test.hmm.gz' // nmpfams_latest_link """ } } diff --git a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf index 316df5b..f5b753a 100644 --- a/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_proteinannotator_pipeline/main.nf @@ -180,7 +180,7 @@ def toolCitationText() { params.skip_preprocessing ? "" : "Input sequences were preprocessed with SeqKit (gap trimming, length filtering, validation, duplicate removal) (Shen et al. 2024)." ].join(' ').trim() - def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_npmfams) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)." + def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_nmpfams) ? "" : "Domains were annotated with hmmer/hmmsearch (Eddy et al. 2011)." def prediction_text = params.skip_s4pred ? "" : "Secondary structures were predicted via the s4pred software (Moffat et al. 2021)." @@ -202,7 +202,7 @@ def toolBibliographyText() { params.skip_preprocessing ? '' : '
  • Shen, W., Sipos, B., & Zhao, L. (2024). SeqKit2: A Swiss army knife for sequence and alignment processing. Imeta, 3(3), e191. doi: 10.1002/imt2.191
  • ' ].join(' ').trim() - def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_npmfams) ? '' : '
  • Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: 10.1371/journal.pcbi.1002195
  • ' + def domain_annotation_text = (params.skip_pfam && params.skip_funfam && params.skip_nmpfams) ? '' : '
  • Eddy, S. R. (2011). Accelerated profile HMM searches. PLoS computational biology, 7(10), e1002195. doi: 10.1371/journal.pcbi.1002195
  • ' def prediction_text = params.skip_s4pred ? '' : '
  • Moffat, L., & Jones, D. T. (2021). Increasing the accuracy of single sequence prediction methods using a deep semi-supervised learning framework. Bioinformatics, 37(21), 3744-3751. doi: 10.1093/bioinformatics/btab491
  • ' diff --git a/tests/nextflow.config b/tests/nextflow.config index db52597..daea7c7 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -7,7 +7,7 @@ // Or any resources requirements params { modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/proteinannotator' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/npechl/test-datasets/refs/heads/proteinannotator' } aws.client.anonymous = true diff --git a/workflows/proteinannotator.nf b/workflows/proteinannotator.nf index 451db79..b0032f2 100644 --- a/workflows/proteinannotator.nf +++ b/workflows/proteinannotator.nf @@ -29,9 +29,9 @@ workflow PROTEINANNOTATOR { skip_funfam // boolean funfam_db // string, path to the pfam HMM database, if already exists funfam_latest_link // string, path to the latest pfam HMM database, to download - skip_npmfams // boolean - npmfams_db // string - npmfams_latest_link // string + skip_nmpfams // boolean + nmpfams_db // string + nmpfams_latest_link // string skip_interproscan // boolean interproscan_db_url // string, url to download db interproscan_db // string, existing db @@ -53,9 +53,9 @@ workflow PROTEINANNOTATOR { skip_funfam, funfam_db, funfam_latest_link, - skip_npmfams, - npmfams_db, - npmfams_latest_link + skip_nmpfams, + nmpfams_db, + nmpfams_latest_link ) ch_versions = ch_versions.mix( DOMAIN_ANNOTATION.out.versions ) From 185ce5a8a77f95792b8d84e86d84700b43d8cb87 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Thu, 12 Mar 2026 15:43:42 +0000 Subject: [PATCH 12/27] fix testing --- conf/test.config | 2 +- conf/test_full.config | 2 +- subworkflows/local/domain_annotation/tests/main.nf.test | 6 +++--- tests/nextflow.config | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/test.config b/conf/test.config index e0b99c5..83274db 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,7 +27,7 @@ params { // Domain annotation pfam_latest_link = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' funfam_latest_link = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' - nmpfams_latest_link = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfams_test.hmm.gz' + nmpfams_latest_link = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + '/testdata/interproscan/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' diff --git a/conf/test_full.config b/conf/test_full.config index 931aeac..abe5d37 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -19,7 +19,7 @@ params { // Domain annotation pfam_latest_link = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' funfam_latest_link = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' - nmpfams_latest_link = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfams_test.hmm.gz' + nmpfams_latest_link = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + '/testdata/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' diff --git a/subworkflows/local/domain_annotation/tests/main.nf.test b/subworkflows/local/domain_annotation/tests/main.nf.test index 7397d4d..b6bad3a 100644 --- a/subworkflows/local/domain_annotation/tests/main.nf.test +++ b/subworkflows/local/domain_annotation/tests/main.nf.test @@ -21,7 +21,7 @@ nextflow_workflow { input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link input[7] = false // skip_nmpfams input[8] = null // nmpfams_db - input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfams_test.hmm.gz' // nmpfams_latest_link + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link """ } } @@ -56,7 +56,7 @@ nextflow_workflow { input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link input[7] = false // skip_nmpfams input[8] = null // nmpfams_db - input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfams_test.hmm.gz' // nmpfams_latest_link + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link """ } } @@ -92,7 +92,7 @@ nextflow_workflow { input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link input[7] = false // skip_nmpfams input[8] = null // nmpfams_db - input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfams_test.hmm.gz' // nmpfams_latest_link + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link """ } } diff --git a/tests/nextflow.config b/tests/nextflow.config index daea7c7..db52597 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -7,7 +7,7 @@ // Or any resources requirements params { modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/npechl/test-datasets/refs/heads/proteinannotator' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/proteinannotator' } aws.client.anonymous = true From 9a6bc835dfade4156acc92a52d30903b5f243248 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Thu, 12 Mar 2026 15:52:30 +0000 Subject: [PATCH 13/27] fix typo --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index d549451..46a08a6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -288,7 +288,7 @@ "description": "Path to an already installed NMPFams HMM database.", "help_text": "If left null and skip_funfam is false, the pipeline will start downloading the latest FunFam HMM library." }, - "npmsdfam_latest_link": { + "nmpfams_latest_link": { "type": "string", "default": "https://pavlopoulos-lab.org/envofams/databases/hmmer/nmpfamsdb.hmm.gz", "description": "" From c88e6c46d3feb3c435e57e08a705c77afe1998e8 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Thu, 12 Mar 2026 16:32:44 +0000 Subject: [PATCH 14/27] fix typo --- subworkflows/local/domain_annotation/main.nf | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf index 3861f1d..a7cb349 100644 --- a/subworkflows/local/domain_annotation/main.nf +++ b/subworkflows/local/domain_annotation/main.nf @@ -1,9 +1,9 @@ include { ARIA2 as ARIA2_PFAM } from '../../../modules/nf-core/aria2/main' include { ARIA2 as ARIA2_FUNFAM } from '../../../modules/nf-core/aria2/main' -include { ARIA2 as ARIA2_nmpfams } from '../../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_NMPFAMS } from '../../../modules/nf-core/aria2/main' include { HMMER_HMMSEARCH as HMMSEARCH_PFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' -include { HMMER_HMMSEARCH as HMMSEARCH_nmpfams } from '../../../modules/nf-core/hmmer/hmmsearch/main' +include { HMMER_HMMSEARCH as HMMSEARCH_NMPFAMS } from '../../../modules/nf-core/hmmer/hmmsearch/main' workflow DOMAIN_ANNOTATION { take: @@ -69,9 +69,9 @@ workflow DOMAIN_ANNOTATION { if (!nmpfams_db) { ch_nmpfams_link = channel.of([ [ id: 'nmpfams' ], nmpfams_latest_link ]) - ARIA2_nmpfams( ch_nmpfams_link ) - ch_versions = ch_versions.mix( ARIA2_nmpfams.out.versions ) - ch_nmpfams_db = ARIA2_nmpfams.out.downloaded_file + ARIA2_NMPFAMS( ch_nmpfams_link ) + ch_versions = ch_versions.mix( ARIA2_NMPFAMS.out.versions ) + ch_nmpfams_db = ARIA2_NMPFAMS.out.downloaded_file } else { ch_nmpfams_db = channel.of([ [ id: 'nmpfams' ], nmpfams_db ]) } @@ -80,9 +80,9 @@ workflow DOMAIN_ANNOTATION { .combine(ch_nmpfams_db) .map{ meta, seqs, _meta2, models -> [meta, models, seqs, false, false, true] } - HMMSEARCH_nmpfams( ch_input_for_hmmsearch_nmpfams ) - ch_versions = ch_versions.mix( HMMSEARCH_nmpfams.out.versions.first() ) - ch_nmpfams_domains = HMMSEARCH_nmpfams.out.domain_summary + HMMSEARCH_NMPFAMS( ch_input_for_hmmsearch_nmpfams ) + ch_versions = ch_versions.mix( HMMSEARCH_NMPFAMS.out.versions.first() ) + ch_nmpfams_domains = HMMSEARCH_NMPFAMS.out.domain_summary } emit: From 585808de8a957076ab3a540839f888451d32bd30 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Thu, 12 Mar 2026 16:53:56 +0000 Subject: [PATCH 15/27] update testdata link --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 3b9086a..fc5f86e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -60,7 +60,7 @@ params { help_full = false show_hidden = false version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/proteinannotator' trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') // Config options From 023f0ca53a6ec14724515086960d4a8d9428ce73 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Fri, 13 Mar 2026 09:06:45 +0000 Subject: [PATCH 16/27] update testddata path --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 46a08a6..f1f7ed7 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -177,7 +177,7 @@ "type": "string", "fa_icon": "far fa-check-circle", "description": "Base URL or local path to location of pipeline test dataset files", - "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/proteinannotator", "hidden": true }, "modules_testdata_base_path": { From 9908739b4a278a9bd141ab227dd9c2660de42da4 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Fri, 13 Mar 2026 09:46:58 +0000 Subject: [PATCH 17/27] update snap --- .../domain_annotation/tests/main.nf.test.snap | 84 +++++++++-- tests/default.nf.test.snap | 132 ++---------------- 2 files changed, 85 insertions(+), 131 deletions(-) diff --git a/subworkflows/local/domain_annotation/tests/main.nf.test.snap b/subworkflows/local/domain_annotation/tests/main.nf.test.snap index f1c925c..c196dd7 100644 --- a/subworkflows/local/domain_annotation/tests/main.nf.test.snap +++ b/subworkflows/local/domain_annotation/tests/main.nf.test.snap @@ -21,17 +21,37 @@ "T1026 - 172 1.10.238.10-FF-000755 - 78 1.2e-05 12.7 0.1 2 2 0.3 0.6 -2.4 0.0 38 49 102 113 97 123 0.74 FBNSV, , 172 residues|", "#" ], + [ + "# --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord", + "# target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target", + "#------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------", + "T1026 - 172 F049289 - 90 1e-05 12.9 0.0 1 1 9.8e-06 2e-05 12.0 0.0 34 76 43 85 33 88 0.93 FBNSV, , 172 residues|", + "T1024 - 408 F075270 - 57 1.5e-06 15.3 2.4 1 3 0.00095 0.0019 5.3 0.0 6 25 72 91 68 98 0.85 LmrP, , 408 residues|", + "T1024 - 408 F075270 - 57 1.5e-06 15.3 2.4 2 3 0.36 0.71 -2.9 0.0 42 49 148 155 135 162 0.68 LmrP, , 408 residues|", + "T1024 - 408 F075270 - 57 1.5e-06 15.3 2.4 3 3 0.00017 0.00033 7.8 0.4 10 48 266 304 257 311 0.89 LmrP, , 408 residues|", + "T1024 - 408 F093539 - 93 3.1e-05 11.0 0.1 1 4 1.6e-05 3.1e-05 11.0 0.1 57 75 50 68 6 75 0.82 LmrP, , 408 residues|" + ], [ { "DOMAIN_ANNOTATION:HMMSEARCH_FUNFAM": { "hmmer": 3.4 } }, + { + "DOMAIN_ANNOTATION:HMMSEARCH_NMPFAMS": { + "hmmer": 3.4 + } + }, { "DOMAIN_ANNOTATION:ARIA2_FUNFAM": { "aria2": "1.36.0" } }, + { + "DOMAIN_ANNOTATION:ARIA2_NMPFAMS": { + "aria2": "1.36.0" + } + }, { "DOMAIN_ANNOTATION:ARIA2_PFAM": { "aria2": "1.36.0" @@ -44,11 +64,11 @@ } ] ], + "timestamp": "2026-03-13T09:44:44.075624", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2025-12-05T08:44:26.478981734" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "faa - pfam_db - skip_funfam": { "content": [ @@ -63,6 +83,26 @@ "#" ], [ + "# --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord", + "# target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target", + "#------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------", + "T1026 - 172 F049289 - 90 1e-05 12.9 0.0 1 1 9.8e-06 2e-05 12.0 0.0 34 76 43 85 33 88 0.93 FBNSV, , 172 residues|", + "T1024 - 408 F075270 - 57 1.5e-06 15.3 2.4 1 3 0.00095 0.0019 5.3 0.0 6 25 72 91 68 98 0.85 LmrP, , 408 residues|", + "T1024 - 408 F075270 - 57 1.5e-06 15.3 2.4 2 3 0.36 0.71 -2.9 0.0 42 49 148 155 135 162 0.68 LmrP, , 408 residues|", + "T1024 - 408 F075270 - 57 1.5e-06 15.3 2.4 3 3 0.00017 0.00033 7.8 0.4 10 48 266 304 257 311 0.89 LmrP, , 408 residues|", + "T1024 - 408 F093539 - 93 3.1e-05 11.0 0.1 1 4 1.6e-05 3.1e-05 11.0 0.1 57 75 50 68 6 75 0.82 LmrP, , 408 residues|" + ], + [ + { + "DOMAIN_ANNOTATION:HMMSEARCH_NMPFAMS": { + "hmmer": 3.4 + } + }, + { + "DOMAIN_ANNOTATION:ARIA2_NMPFAMS": { + "aria2": "1.36.0" + } + }, { "DOMAIN_ANNOTATION:HMMSEARCH_PFAM": { "hmmer": 3.4 @@ -70,11 +110,11 @@ } ] ], + "timestamp": "2026-03-13T09:44:54.3598", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2025-12-05T10:10:22.057426358" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } }, "faa - domain annotation - stub": { "content": [ @@ -96,8 +136,18 @@ ] ], "2": [ + [ + { + "id": "test" + }, + "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ "versions.yml:md5,160d4c5a5001cfb4ff57b94fc52b67d9", + "versions.yml:md5,1b7d208e42364fb87160693faa4e83b9", "versions.yml:md5,35e41735706132967dd94bb636833a4a", + "versions.yml:md5,9045f482d64e7666e62932b0578b665e", "versions.yml:md5,a74a0c8fcb741e59bc14424f612b8d09", "versions.yml:md5,f1d8a406d3dcb97a7c15e9c810926de1" ], @@ -109,6 +159,14 @@ "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "nmpfams_domains": [ + [ + { + "id": "test" + }, + "test.domtbl.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "pfam_domains": [ [ { @@ -119,16 +177,18 @@ ], "versions": [ "versions.yml:md5,160d4c5a5001cfb4ff57b94fc52b67d9", + "versions.yml:md5,1b7d208e42364fb87160693faa4e83b9", "versions.yml:md5,35e41735706132967dd94bb636833a4a", + "versions.yml:md5,9045f482d64e7666e62932b0578b665e", "versions.yml:md5,a74a0c8fcb741e59bc14424f612b8d09", "versions.yml:md5,f1d8a406d3dcb97a7c15e9c810926de1" ] } ], + "timestamp": "2026-03-13T09:45:07.520815", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.2" - }, - "timestamp": "2025-12-05T08:44:37.015452047" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } } } \ No newline at end of file diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 527d241..5eb71b4 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test": { "content": [ - 32, + 19, { "ARIA2": { "aria2": "1.36.0" @@ -9,20 +9,11 @@ "ARIA2_FUNFAM": { "aria2": "1.36.0" }, - "ARIA2_PFAM": { + "ARIA2_NMPFAMS": { "aria2": "1.36.0" }, - "HMMSEARCH_FUNFAM": { - "hmmer": 3.4 - }, - "HMMSEARCH_PFAM": { - "hmmer": 3.4 - }, - "INTERPROSCAN": { - "interproscan": "5.59-91.0" - }, - "S4PRED_RUNMODEL": { - "s4pred": "1.2.1" + "ARIA2_PFAM": { + "aria2": "1.36.0" }, "SEQFU_STATS_AFTER": { "seqfu": "1.22.3" @@ -30,32 +21,19 @@ "SEQFU_STATS_BEFORE": { "seqfu": "1.22.3" }, - "SEQKIT_REPLACE": { - "seqkit": "2.9.0" - }, "SEQKIT_RMDUP": { "seqkit": "v2.9.0" }, "SEQKIT_SEQ": { "seqkit": "v2.9.0" }, - "UNTAR": { - "untar": 1.34 - }, "Workflow": { "nf-core/proteinannotator": "v1.1.0dev" } }, [ - "domain_annotation", - "domain_annotation/funfam", - "domain_annotation/funfam/T1024.domtbl.gz", - "domain_annotation/funfam/T1026.domtbl.gz", - "domain_annotation/funfam/l_arginase.domtbl.gz", - "domain_annotation/pfam", - "domain_annotation/pfam/T1024.domtbl.gz", - "domain_annotation/pfam/T1026.domtbl.gz", - "domain_annotation/pfam/l_arginase.domtbl.gz", + "aria2", + "aria2/nmpfamsdb_test.hmm.gz", "downloaded_dbs", "downloaded_dbs/Pfam-A_test.hmm.gz", "downloaded_dbs/funfam-hmm3-v4_3_0_test.lib.gz", @@ -78,61 +56,6 @@ "downloaded_dbs/interproscan_db/tigrfam/15.0/TIGRFAMs_15.0_HMM.LIB", "downloaded_dbs/interproscan_db/tigrfam/15.0/TIGRFAMs_HMM.LIB", "downloaded_dbs/interproscan_test.tar.gz", - "functional_annotation", - "functional_annotation/interproscan", - "functional_annotation/interproscan/T1024", - "functional_annotation/interproscan/T1024/T1024.gff3", - "functional_annotation/interproscan/T1024/T1024.json", - "functional_annotation/interproscan/T1024/T1024.tsv", - "functional_annotation/interproscan/T1024/T1024.xml", - "functional_annotation/interproscan/T1026", - "functional_annotation/interproscan/T1026/T1026.gff3", - "functional_annotation/interproscan/T1026/T1026.json", - "functional_annotation/interproscan/T1026/T1026.tsv", - "functional_annotation/interproscan/T1026/T1026.xml", - "functional_annotation/interproscan/l_arginase", - "functional_annotation/interproscan/l_arginase/l_arginase.gff3", - "functional_annotation/interproscan/l_arginase/l_arginase.json", - "functional_annotation/interproscan/l_arginase/l_arginase.tsv", - "functional_annotation/interproscan/l_arginase/l_arginase.xml", - "multiqc", - "multiqc/multiqc_data", - "multiqc/multiqc_data/llms-full.txt", - "multiqc/multiqc_data/multiqc.log", - "multiqc/multiqc_data/multiqc.parquet", - "multiqc/multiqc_data/multiqc_T1024_after.txt", - "multiqc/multiqc_data/multiqc_T1024_before.txt", - "multiqc/multiqc_data/multiqc_T1026_after.txt", - "multiqc/multiqc_data/multiqc_T1026_before.txt", - "multiqc/multiqc_data/multiqc_citations.txt", - "multiqc/multiqc_data/multiqc_data.json", - "multiqc/multiqc_data/multiqc_l_arginase_after.txt", - "multiqc/multiqc_data/multiqc_l_arginase_before.txt", - "multiqc/multiqc_data/multiqc_software_versions.txt", - "multiqc/multiqc_data/multiqc_sources.txt", - "multiqc/multiqc_plots", - "multiqc/multiqc_plots/pdf", - "multiqc/multiqc_plots/pdf/T1024_after.pdf", - "multiqc/multiqc_plots/pdf/T1024_before.pdf", - "multiqc/multiqc_plots/pdf/T1026_after.pdf", - "multiqc/multiqc_plots/pdf/T1026_before.pdf", - "multiqc/multiqc_plots/pdf/l_arginase_after.pdf", - "multiqc/multiqc_plots/pdf/l_arginase_before.pdf", - "multiqc/multiqc_plots/png", - "multiqc/multiqc_plots/png/T1024_after.png", - "multiqc/multiqc_plots/png/T1024_before.png", - "multiqc/multiqc_plots/png/T1026_after.png", - "multiqc/multiqc_plots/png/T1026_before.png", - "multiqc/multiqc_plots/png/l_arginase_after.png", - "multiqc/multiqc_plots/png/l_arginase_before.png", - "multiqc/multiqc_plots/svg", - "multiqc/multiqc_plots/svg/T1024_after.svg", - "multiqc/multiqc_plots/svg/T1024_before.svg", - "multiqc/multiqc_plots/svg/T1026_after.svg", - "multiqc/multiqc_plots/svg/T1026_before.svg", - "multiqc/multiqc_plots/svg/l_arginase_after.svg", - "multiqc/multiqc_plots/svg/l_arginase_before.svg", - "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_proteinannotator_software_mqc_versions.yml", "qc", @@ -146,8 +69,6 @@ "qc/T1026", "qc/T1026/T1026.fasta", "qc/T1026/T1026.log", - "qc/T1026/T1026_after.tsv", - "qc/T1026/T1026_after_mqc.txt", "qc/T1026/T1026_before.tsv", "qc/T1026/T1026_before_mqc.txt", "qc/l_arginase", @@ -156,19 +77,10 @@ "qc/l_arginase/l_arginase_after.tsv", "qc/l_arginase/l_arginase_after_mqc.txt", "qc/l_arginase/l_arginase_before.tsv", - "qc/l_arginase/l_arginase_before_mqc.txt", - "s4pred", - "s4pred/T1024", - "s4pred/T1024/ss2", - "s4pred/T1024/ss2/T1024.ss2", - "s4pred/T1026", - "s4pred/T1026/ss2", - "s4pred/T1026/ss2/T1026.ss2", - "s4pred/l_arginase", - "s4pred/l_arginase/ss2", - "s4pred/l_arginase/ss2/GI|225038609|EFDID|719595|FULL.ss2" + "qc/l_arginase/l_arginase_before_mqc.txt" ], [ + "nmpfamsdb_test.hmm.gz:md5,ad7a094618ccfdaeed1c03e93f6abf1e", "Pfam-A_test.hmm.gz:md5,a5ab72b2b7bc72c62756684707e2387c", "funfam-hmm3-v4_3_0_test.lib.gz:md5,df8b324882e1ceb8f8196155a968ed77", "hamap.hmm.lib:md5,8c589a7f459284080e674e79454434d3", @@ -182,19 +94,6 @@ "TIGRFAMs_15.0_HMM.LIB:md5,64f2b2c9e834b47b17d91bb9a6a0067e", "TIGRFAMs_HMM.LIB:md5,543da3f4b65eed9ec393986c6c6ff0ba", "interproscan_test.tar.gz:md5,cde88c0cd841c84dc1203e64854c762b", - "T1024.json:md5,0288f7551a14faedc409dd374b3e073e", - "T1024.xml:md5,63a3db0eb0e1f76403411602c23b721e", - "T1026.json:md5,5c2a40474b1cfb50cd043fe0be5e5d52", - "T1026.xml:md5,335552ce1703548565212a1d54681d75", - "l_arginase.json:md5,e0d127dd8a952cbd798999851d1338e6", - "l_arginase.xml:md5,7248992d9c1618cf7baa7515ae79ce32", - "multiqc_T1024_after.txt:md5,f2a552d4750ff8360941b10cec141499", - "multiqc_T1024_before.txt:md5,f2a552d4750ff8360941b10cec141499", - "multiqc_T1026_after.txt:md5,aabd4e58ed67d366fd04592ca09dbc9b", - "multiqc_T1026_before.txt:md5,aabd4e58ed67d366fd04592ca09dbc9b", - "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_l_arginase_after.txt:md5,5df939cbafd732aa5095dad4434a4a33", - "multiqc_l_arginase_before.txt:md5,dd8e50549823ef944d6e36d0e2af56f7", "T1024.fasta:md5,aa546680bf58fcb8aeb10e1617e6d5bd", "T1024.log:md5,a41135cfe024baaf42f135583fe73f0d", "T1024_after.tsv:md5,2daf830aba484edbcf25e811f5769ad0", @@ -203,8 +102,6 @@ "T1024_before_mqc.txt:md5,23c573ad96f7199ba251c7bacd1c5968", "T1026.fasta:md5,ae21f6aa06d0a5cedc121db5dfc343f3", "T1026.log:md5,a41135cfe024baaf42f135583fe73f0d", - "T1026_after.tsv:md5,052ba564eabda203298ffc26ef80b7ab", - "T1026_after_mqc.txt:md5,3534726223f5a24dedda4446fd202404", "T1026_before.tsv:md5,052ba564eabda203298ffc26ef80b7ab", "T1026_before_mqc.txt:md5,3534726223f5a24dedda4446fd202404", "l_arginase.fasta:md5,38d388cfc6ee9013eba4f693d104d6f0", @@ -212,16 +109,13 @@ "l_arginase_after.tsv:md5,6c4fe965aa0905f437f3731ed8eed98e", "l_arginase_after_mqc.txt:md5,af14b7e79260ece0074d4ea82a3b3ce6", "l_arginase_before.tsv:md5,31a34ec0257053f34a449270f23d98ff", - "l_arginase_before_mqc.txt:md5,7ab718f12ea5460f254b8c43cfd11040", - "T1024.ss2:md5,6f2467c4e6974e761333bef106809e88", - "T1026.ss2:md5,cc788acb2aefe43fea147d9dd2b6c2c9", - "GI|225038609|EFDID|719595|FULL.ss2:md5,e7d8eaa84d46a6a714ffe00d7f21cdfb" + "l_arginase_before_mqc.txt:md5,7ab718f12ea5460f254b8c43cfd11040" ] ], + "timestamp": "2026-03-13T09:46:09.474957", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.3" - }, - "timestamp": "2026-02-04T12:43:32.273407057" + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } } } \ No newline at end of file From c6b401e913cb16344e14a79e7c8fdaa4b174a58c Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Fri, 13 Mar 2026 10:18:24 +0000 Subject: [PATCH 18/27] update docs --- docs/output.md | 11 +++++++---- docs/usage.md | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/output.md b/docs/output.md index fcd3159..0e6387f 100644 --- a/docs/output.md +++ b/docs/output.md @@ -14,9 +14,9 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [SeqFu](#seqfu) for input amino acid sequences quality control (QC) - [SeqKit](#seqkit) for preprocessing input amino acid sequences (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) - [Database download](#database-download) Optionally download selected databases for annotation. - - [aria2](#aria2) - To optionally download the Pfam, FunFam, and/or InterProScan databases through the pipeline. + - [aria2](#aria2) - To optionally download the Pfam, FunFam, NMPFams and/or InterProScan databases through the pipeline. - [Domain annotation](#domain-annotation) Annotate proteins with domains from established repositories. - - [hmmer](#hmmer) - To optionally match the input sequence to known Pfam and/or FunFam domains through `hmmer/hmmsearch` + - [hmmer](#hmmer) - To optionally match the input sequence to known Pfam, FunFam and/or NMPFams domains through `hmmer/hmmsearch` - [Functional annotation](#functional-annotation) Annotate proteins with functional domains - [InterProScan](#Interproscan) - Search the InterProScan database for functional domains - [s4pred](#s4pred) - Predict secondary structures of sequences, producing amino acid level probabilities of forming an α-helix, a β-strand or a coil. @@ -72,10 +72,11 @@ The `seqkit` module is used for initial preprocessing (i.e., gap removal, conver - `Pfam-A*.hmm.gz`: (optional) The latest full, or a minimal test, Pfam-A HMM database that can be downloaded through the pipeline. - `interproscan_test.tar.gz`: (optional) the downloaded InterProScan archive of member databases according to the optional user-provided url - `funfam-hmm3-v4_3_0*.lib.gz`: (optional) The latest (v4_3_0) full, or a minimal test, FunFam HMM database that can be downloaded through the pipeline. + - `nmpfamsdb.hmm.gz`: (optional) The latest full, or a minimal test, NMPFams HMM database that can be downloaded through the pipeline. -If the `skip_*` flags (e.g., `skip_pfam`, `skip_funfam`, `skip_interproscan`) for each annotation database is set to `true`, or the `*_db` parameter paths (e.g., `pfam_db`, `funfam_db`, `interproscan_db`) are set (i.e., not `null`), or the run is resumed after a successful database download, then the respective database will not be (re)downloaded. The full database links can be found in the main `nextflow.config` file, while minimal test versions can be found in the `test` and `test_full` profiles (i.e., `conf/test.config`, `conf/test_full.config`). +If the `skip_*` flags (e.g., `skip_pfam`, `skip_funfam`, `skip_nmpfams`, `skip_interproscan`) for each annotation database is set to `true`, or the `*_db` parameter paths (e.g., `pfam_db`, `funfam_db`, `nmpfams_db`, `interproscan_db`) are set (i.e., not `null`), or the run is resumed after a successful database download, then the respective database will not be (re)downloaded. The full database links can be found in the main `nextflow.config` file, while minimal test versions can be found in the `test` and `test_full` profiles (i.e., `conf/test.config`, `conf/test_full.config`). [aria2](https://github.com/aria2/aria2/) is a lightweight multi-protocol & multi-source, cross platform download utility operated in command-line. It supports HTTP/HTTPS, FTP, SFTP, BitTorrent and Metalink. @@ -91,10 +92,12 @@ If the `skip_*` flags (e.g., `skip_pfam`, `skip_funfam`, `skip_interproscan`) fo - `.domtbl.gz`: `hmmer/hmmsearch` results along parameters info. - `funfam/` - `.domtbl.gz`: `hmmer/hmmsearch` results along parameters info. + - `nmpfams/` + - `.domtbl.gz`: `hmmer/hmmsearch` results along parameters info. -Each of the `domain_annotation/` subfolders (e.g., `pfam`, `funfam`) contain a `.domtbl.gz` annotation file per input sample, depending on which domain annotation databases were used in the pipeline execution. +Each of the `domain_annotation/` subfolders (e.g., `pfam`, `funfam`, `nmpfams`) contain a `.domtbl.gz` annotation file per input sample, depending on which domain annotation databases were used in the pipeline execution. [hmmer](https://github.com/EddyRivasLab/hmmer) is a fast and flexible alignment trimming tool that keeps phylogenetically informative sites and removes others. diff --git a/docs/usage.md b/docs/usage.md index 0945da0..72d53cc 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -7,7 +7,7 @@ ## Introduction **nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics and generates sequence-level annotations for amino acid sequences. -It takes a protein FASTA file as input and performs conserved domain annotation (using Pfam and FunFam HMM databases), functional annotation (using InterProScan), and secondary structure prediction (using s4pred). +It takes a protein FASTA file as input and performs conserved domain annotation (using Pfam, FunFam and NMPFams HMM databases), functional annotation (using InterProScan), and secondary structure prediction (using s4pred). Optionally, paths to pre-downloaded databases can be provided to skip the automatic download steps and speed up repeated runs. ## Samplesheet input From c1ec64e1e5b98b81fa6fe4b1ded41eac1576ba55 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Fri, 13 Mar 2026 10:35:05 +0000 Subject: [PATCH 19/27] update changelog & readme --- CHANGELOG.md | 1 + README.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 89b0098..acdf395 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Initial release of nf-core/proteinannotator, created with the [nf-core](https://nf-co.re/) template. +- [#87](https://github.com/nf-core/proteinannotator/pull/87) - Added the option to download and use the latest NMPFams HMM library (or use path to an existing one) for domain annotation. (by @npechl) - [#68](https://github.com/nf-core/proteinannotator/pull/68) - Using the `ARIA2` and `UNTAR` nf-core modules to download and decompress the InterProScan database. (by @vagkaratzas) - [#67](https://github.com/nf-core/proteinannotator/pull/67) - Swapped to the updated, non-buggy, nf-core version of `INTERPROSCAN`. (by @vagkaratzas) - [#65](https://github.com/nf-core/proteinannotator/pull/65) - Converted the pipeline schematic to nf-core metromap. (by @vagkaratzas) diff --git a/README.md b/README.md index 2eef432..f4f95ee 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ Generate input amino acid sequence statistics with ([`SeqFu`](https://github.com ### Annotate sequences 1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases - such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/) and [FunFam](https://download.cathdb.info/cath/releases/all-releases/) + such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams](https://pavlopoulos-lab.org/envofams/databases/hmmer/) 2. Functional annotation: - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics. 3. Predict secondary structure compositional features such as α-helices, β-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred)) From 61028d8f8e1e12c3ee9e38ea164897c23b7828c4 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Fri, 13 Mar 2026 14:05:19 +0000 Subject: [PATCH 20/27] Update subworkflows/local/domain_annotation/main.nf Co-authored-by: Evangelos Karatzas <32259775+vagkaratzas@users.noreply.github.com> --- subworkflows/local/domain_annotation/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf index a7cb349..467dbda 100644 --- a/subworkflows/local/domain_annotation/main.nf +++ b/subworkflows/local/domain_annotation/main.nf @@ -1,6 +1,6 @@ include { ARIA2 as ARIA2_PFAM } from '../../../modules/nf-core/aria2/main' include { ARIA2 as ARIA2_FUNFAM } from '../../../modules/nf-core/aria2/main' -include { ARIA2 as ARIA2_NMPFAMS } from '../../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_NMPFAMS } from '../../../modules/nf-core/aria2/main' include { HMMER_HMMSEARCH as HMMSEARCH_PFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' include { HMMER_HMMSEARCH as HMMSEARCH_NMPFAMS } from '../../../modules/nf-core/hmmer/hmmsearch/main' From 73fa894ce93e24e85f1e13439d1b9ab87df2d2a1 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Fri, 13 Mar 2026 14:16:16 +0000 Subject: [PATCH 21/27] revert links --- conf/test.config | 6 +++--- conf/test_full.config | 2 +- nextflow.config | 2 +- nextflow_schema.json | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/test.config b/conf/test.config index 83274db..eab95ce 100644 --- a/conf/test.config +++ b/conf/test.config @@ -25,10 +25,10 @@ params { // Input data input = params.pipelines_testdata_base_path + '/samplesheet/samplesheet.csv' // Domain annotation - pfam_latest_link = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' - funfam_latest_link = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + pfam_latest_link = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' + funfam_latest_link = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' nmpfams_latest_link = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // Functional annotation - interproscan_db_url = params.pipelines_testdata_base_path + '/testdata/interproscan/interproscan_test.tar.gz' + interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' } diff --git a/conf/test_full.config b/conf/test_full.config index abe5d37..77dcc94 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -21,6 +21,6 @@ params { funfam_latest_link = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' nmpfams_latest_link = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // Functional annotation - interproscan_db_url = params.pipelines_testdata_base_path + '/testdata/interproscan_test.tar.gz' + interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' } diff --git a/nextflow.config b/nextflow.config index fc5f86e..3b9086a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -60,7 +60,7 @@ params { help_full = false show_hidden = false version = false - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/proteinannotator' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') // Config options diff --git a/nextflow_schema.json b/nextflow_schema.json index f1f7ed7..46a08a6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -177,7 +177,7 @@ "type": "string", "fa_icon": "far fa-check-circle", "description": "Base URL or local path to location of pipeline test dataset files", - "default": "https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/proteinannotator", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", "hidden": true }, "modules_testdata_base_path": { From e9ad2c6da1bb4eb70b836c615b6b514b10800d59 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Fri, 13 Mar 2026 14:21:44 +0000 Subject: [PATCH 22/27] spaces --- subworkflows/local/domain_annotation/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/domain_annotation/main.nf b/subworkflows/local/domain_annotation/main.nf index a7cb349..467dbda 100644 --- a/subworkflows/local/domain_annotation/main.nf +++ b/subworkflows/local/domain_annotation/main.nf @@ -1,6 +1,6 @@ include { ARIA2 as ARIA2_PFAM } from '../../../modules/nf-core/aria2/main' include { ARIA2 as ARIA2_FUNFAM } from '../../../modules/nf-core/aria2/main' -include { ARIA2 as ARIA2_NMPFAMS } from '../../../modules/nf-core/aria2/main' +include { ARIA2 as ARIA2_NMPFAMS } from '../../../modules/nf-core/aria2/main' include { HMMER_HMMSEARCH as HMMSEARCH_PFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' include { HMMER_HMMSEARCH as HMMSEARCH_FUNFAM } from '../../../modules/nf-core/hmmer/hmmsearch/main' include { HMMER_HMMSEARCH as HMMSEARCH_NMPFAMS } from '../../../modules/nf-core/hmmer/hmmsearch/main' From 1f920fef3f1a830a25bbd8b52d4d53cede6462b9 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Fri, 13 Mar 2026 14:54:58 +0000 Subject: [PATCH 23/27] include nmpfams test --- .../domain_annotation/tests/main.nf.test | 41 ++++++- .../domain_annotation/tests/main.nf.test.snap | 59 ++++----- tests/default.nf.test.snap | 112 +----------------- 3 files changed, 66 insertions(+), 146 deletions(-) diff --git a/subworkflows/local/domain_annotation/tests/main.nf.test b/subworkflows/local/domain_annotation/tests/main.nf.test index b6bad3a..18030f4 100644 --- a/subworkflows/local/domain_annotation/tests/main.nf.test +++ b/subworkflows/local/domain_annotation/tests/main.nf.test @@ -19,9 +19,9 @@ nextflow_workflow { input[4] = false // skip_funfam input[5] = null // funfam_db input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link - input[7] = false // skip_nmpfams + input[7] = true // skip_nmpfams input[8] = null // nmpfams_db - input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link """ } } @@ -32,7 +32,6 @@ nextflow_workflow { { assert snapshot( path(workflow.out.pfam_domains[0][1]).linesGzip[0..7], path(workflow.out.funfam_domains[0][1]).linesGzip[0..7], - path(workflow.out.nmpfams_domains[0][1]).linesGzip[0..7], workflow.out.versions.collect { path(it).yaml }.unique() ).match()} ) @@ -54,9 +53,9 @@ nextflow_workflow { input[4] = true // skip_funfam input[5] = null // funfam_db input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link - input[7] = false // skip_nmpfams + input[7] = true // skip_nmpfams input[8] = null // nmpfams_db - input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link """ } } @@ -66,6 +65,38 @@ nextflow_workflow { { assert workflow.success}, { assert snapshot( path(workflow.out.pfam_domains[0][1]).linesGzip[0..7], + workflow.out.versions.collect { path(it).yaml }.unique() + ).match()} + ) + } + } + + test("faa - nmpfams") { + + when { + workflow { + """ + input[0] = channel.of([ + [ id: 'test' ], + file(params.pipelines_testdata_base_path + '/testdata/sequences/test_proteins.faa', checkIfExists: true) + ]) + input[1] = true // skip_pfam + input[2] = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' // pfam_db + input[3] = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' // pfam_latest_link + input[4] = true // skip_funfam + input[5] = null // funfam_db + input[6] = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' // funfam_latest_link + input[7] = false // skip_nmpfams + input[8] = null // nmpfams_db + input[9] = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // nmpfams_latest_link + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( path(workflow.out.nmpfams_domains[0][1]).linesGzip[0..7], workflow.out.versions.collect { path(it).yaml }.unique() ).match()} diff --git a/subworkflows/local/domain_annotation/tests/main.nf.test.snap b/subworkflows/local/domain_annotation/tests/main.nf.test.snap index c196dd7..80ce69a 100644 --- a/subworkflows/local/domain_annotation/tests/main.nf.test.snap +++ b/subworkflows/local/domain_annotation/tests/main.nf.test.snap @@ -21,37 +21,17 @@ "T1026 - 172 1.10.238.10-FF-000755 - 78 1.2e-05 12.7 0.1 2 2 0.3 0.6 -2.4 0.0 38 49 102 113 97 123 0.74 FBNSV, , 172 residues|", "#" ], - [ - "# --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord", - "# target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target", - "#------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------", - "T1026 - 172 F049289 - 90 1e-05 12.9 0.0 1 1 9.8e-06 2e-05 12.0 0.0 34 76 43 85 33 88 0.93 FBNSV, , 172 residues|", - "T1024 - 408 F075270 - 57 1.5e-06 15.3 2.4 1 3 0.00095 0.0019 5.3 0.0 6 25 72 91 68 98 0.85 LmrP, , 408 residues|", - "T1024 - 408 F075270 - 57 1.5e-06 15.3 2.4 2 3 0.36 0.71 -2.9 0.0 42 49 148 155 135 162 0.68 LmrP, , 408 residues|", - "T1024 - 408 F075270 - 57 1.5e-06 15.3 2.4 3 3 0.00017 0.00033 7.8 0.4 10 48 266 304 257 311 0.89 LmrP, , 408 residues|", - "T1024 - 408 F093539 - 93 3.1e-05 11.0 0.1 1 4 1.6e-05 3.1e-05 11.0 0.1 57 75 50 68 6 75 0.82 LmrP, , 408 residues|" - ], [ { "DOMAIN_ANNOTATION:HMMSEARCH_FUNFAM": { "hmmer": 3.4 } }, - { - "DOMAIN_ANNOTATION:HMMSEARCH_NMPFAMS": { - "hmmer": 3.4 - } - }, { "DOMAIN_ANNOTATION:ARIA2_FUNFAM": { "aria2": "1.36.0" } }, - { - "DOMAIN_ANNOTATION:ARIA2_NMPFAMS": { - "aria2": "1.36.0" - } - }, { "DOMAIN_ANNOTATION:ARIA2_PFAM": { "aria2": "1.36.0" @@ -64,24 +44,14 @@ } ] ], - "timestamp": "2026-03-13T09:44:44.075624", + "timestamp": "2026-03-13T14:51:37.636657", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" } }, - "faa - pfam_db - skip_funfam": { + "faa - nmpfams": { "content": [ - [ - "# --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord", - "# target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target", - "#------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------", - "T1026 - 172 Nanovirus_coat PF04660.18 177 1.2e-54 172.1 0.5 1 1 6.7e-55 1.3e-54 172.0 0.5 3 177 4 172 2 172 0.94 FBNSV, , 172 residues|", - "T1026 - 172 PUA_NSUN2 PF25378.2 87 1.8e-05 12.2 0.0 1 1 1.3e-05 2.7e-05 11.6 0.0 40 81 68 111 40 116 0.89 FBNSV, , 172 residues|", - "T1024 - 408 MFS_1 PF07690.22 347 6.7e-35 107.6 58.0 1 2 1.4e-32 2.9e-32 98.9 38.6 4 346 17 365 15 366 0.78 LmrP, , 408 residues|", - "T1024 - 408 MFS_1 PF07690.22 347 6.7e-35 107.6 58.0 2 2 2.6e-09 5.1e-09 22.5 11.4 38 174 262 399 257 407 0.74 LmrP, , 408 residues|", - "#" - ], [ "# --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord", "# target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target", @@ -102,7 +72,28 @@ "DOMAIN_ANNOTATION:ARIA2_NMPFAMS": { "aria2": "1.36.0" } - }, + } + ] + ], + "timestamp": "2026-03-13T14:53:51.847817", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "faa - pfam_db - skip_funfam": { + "content": [ + [ + "# --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord", + "# target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target", + "#------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------", + "T1026 - 172 Nanovirus_coat PF04660.18 177 1.2e-54 172.1 0.5 1 1 6.7e-55 1.3e-54 172.0 0.5 3 177 4 172 2 172 0.94 FBNSV, , 172 residues|", + "T1026 - 172 PUA_NSUN2 PF25378.2 87 1.8e-05 12.2 0.0 1 1 1.3e-05 2.7e-05 11.6 0.0 40 81 68 111 40 116 0.89 FBNSV, , 172 residues|", + "T1024 - 408 MFS_1 PF07690.22 347 6.7e-35 107.6 58.0 1 2 1.4e-32 2.9e-32 98.9 38.6 4 346 17 365 15 366 0.78 LmrP, , 408 residues|", + "T1024 - 408 MFS_1 PF07690.22 347 6.7e-35 107.6 58.0 2 2 2.6e-09 5.1e-09 22.5 11.4 38 174 262 399 257 407 0.74 LmrP, , 408 residues|", + "#" + ], + [ { "DOMAIN_ANNOTATION:HMMSEARCH_PFAM": { "hmmer": 3.4 @@ -110,7 +101,7 @@ } ] ], - "timestamp": "2026-03-13T09:44:54.3598", + "timestamp": "2026-03-13T14:51:45.461466", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 5eb71b4..0d45f88 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,118 +1,16 @@ { "-profile test": { "content": [ - 19, - { - "ARIA2": { - "aria2": "1.36.0" - }, - "ARIA2_FUNFAM": { - "aria2": "1.36.0" - }, - "ARIA2_NMPFAMS": { - "aria2": "1.36.0" - }, - "ARIA2_PFAM": { - "aria2": "1.36.0" - }, - "SEQFU_STATS_AFTER": { - "seqfu": "1.22.3" - }, - "SEQFU_STATS_BEFORE": { - "seqfu": "1.22.3" - }, - "SEQKIT_RMDUP": { - "seqkit": "v2.9.0" - }, - "SEQKIT_SEQ": { - "seqkit": "v2.9.0" - }, - "Workflow": { - "nf-core/proteinannotator": "v1.1.0dev" - } - }, + 0, + null, [ - "aria2", - "aria2/nmpfamsdb_test.hmm.gz", - "downloaded_dbs", - "downloaded_dbs/Pfam-A_test.hmm.gz", - "downloaded_dbs/funfam-hmm3-v4_3_0_test.lib.gz", - "downloaded_dbs/interproscan_db", - "downloaded_dbs/interproscan_db/hamap", - "downloaded_dbs/interproscan_db/hamap/2023_05", - "downloaded_dbs/interproscan_db/hamap/2023_05/hamap.hmm.lib", - "downloaded_dbs/interproscan_db/hamap/2023_05/hamap.prf", - "downloaded_dbs/interproscan_db/hamap/2023_05/profiles", - "downloaded_dbs/interproscan_db/hamap/2023_05/profiles/MF_00457.prf", - "downloaded_dbs/interproscan_db/hamap/2023_05/profiles/MF_01458.prf", - "downloaded_dbs/interproscan_db/sfld", - "downloaded_dbs/interproscan_db/sfld/4", - "downloaded_dbs/interproscan_db/sfld/4/sfld.hmm", - "downloaded_dbs/interproscan_db/sfld/4/sfld.msa", - "downloaded_dbs/interproscan_db/sfld/4/sfld_hierarchy_flat.txt", - "downloaded_dbs/interproscan_db/sfld/4/sfld_sites.annot", - "downloaded_dbs/interproscan_db/tigrfam", - "downloaded_dbs/interproscan_db/tigrfam/15.0", - "downloaded_dbs/interproscan_db/tigrfam/15.0/TIGRFAMs_15.0_HMM.LIB", - "downloaded_dbs/interproscan_db/tigrfam/15.0/TIGRFAMs_HMM.LIB", - "downloaded_dbs/interproscan_test.tar.gz", - "pipeline_info", - "pipeline_info/nf_core_proteinannotator_software_mqc_versions.yml", - "qc", - "qc/T1024", - "qc/T1024/T1024.fasta", - "qc/T1024/T1024.log", - "qc/T1024/T1024_after.tsv", - "qc/T1024/T1024_after_mqc.txt", - "qc/T1024/T1024_before.tsv", - "qc/T1024/T1024_before_mqc.txt", - "qc/T1026", - "qc/T1026/T1026.fasta", - "qc/T1026/T1026.log", - "qc/T1026/T1026_before.tsv", - "qc/T1026/T1026_before_mqc.txt", - "qc/l_arginase", - "qc/l_arginase/l_arginase.fasta", - "qc/l_arginase/l_arginase.log", - "qc/l_arginase/l_arginase_after.tsv", - "qc/l_arginase/l_arginase_after_mqc.txt", - "qc/l_arginase/l_arginase_before.tsv", - "qc/l_arginase/l_arginase_before_mqc.txt" + "pipeline_info" ], [ - "nmpfamsdb_test.hmm.gz:md5,ad7a094618ccfdaeed1c03e93f6abf1e", - "Pfam-A_test.hmm.gz:md5,a5ab72b2b7bc72c62756684707e2387c", - "funfam-hmm3-v4_3_0_test.lib.gz:md5,df8b324882e1ceb8f8196155a968ed77", - "hamap.hmm.lib:md5,8c589a7f459284080e674e79454434d3", - "hamap.prf:md5,1c9f7eef7704bf307cfaf9f0f2e22153", - "MF_00457.prf:md5,91ab950fb5c449fef9f0cd235b72e9d9", - "MF_01458.prf:md5,f43826d1c001200d5d60ca4d97235f96", - "sfld.hmm:md5,ad6c16bfdd1ec4dc1f13aadaf1b2f1bd", - "sfld.msa:md5,1ee957899d996426a442ff1ae7737c68", - "sfld_hierarchy_flat.txt:md5,8c606dcaa55f174547a42606a0f1cd1a", - "sfld_sites.annot:md5,aa216404da8721f56cae847600170784", - "TIGRFAMs_15.0_HMM.LIB:md5,64f2b2c9e834b47b17d91bb9a6a0067e", - "TIGRFAMs_HMM.LIB:md5,543da3f4b65eed9ec393986c6c6ff0ba", - "interproscan_test.tar.gz:md5,cde88c0cd841c84dc1203e64854c762b", - "T1024.fasta:md5,aa546680bf58fcb8aeb10e1617e6d5bd", - "T1024.log:md5,a41135cfe024baaf42f135583fe73f0d", - "T1024_after.tsv:md5,2daf830aba484edbcf25e811f5769ad0", - "T1024_after_mqc.txt:md5,23c573ad96f7199ba251c7bacd1c5968", - "T1024_before.tsv:md5,2daf830aba484edbcf25e811f5769ad0", - "T1024_before_mqc.txt:md5,23c573ad96f7199ba251c7bacd1c5968", - "T1026.fasta:md5,ae21f6aa06d0a5cedc121db5dfc343f3", - "T1026.log:md5,a41135cfe024baaf42f135583fe73f0d", - "T1026_before.tsv:md5,052ba564eabda203298ffc26ef80b7ab", - "T1026_before_mqc.txt:md5,3534726223f5a24dedda4446fd202404", - "l_arginase.fasta:md5,38d388cfc6ee9013eba4f693d104d6f0", - "l_arginase.log:md5,a41135cfe024baaf42f135583fe73f0d", - "l_arginase_after.tsv:md5,6c4fe965aa0905f437f3731ed8eed98e", - "l_arginase_after_mqc.txt:md5,af14b7e79260ece0074d4ea82a3b3ce6", - "l_arginase_before.tsv:md5,31a34ec0257053f34a449270f23d98ff", - "l_arginase_before_mqc.txt:md5,7ab718f12ea5460f254b8c43cfd11040" + ] ], - "timestamp": "2026-03-13T09:46:09.474957", + "timestamp": "2026-03-13T14:23:26.476832", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" From 242ff9598b833ed170ac3bd72468d3bcc2fac56b Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Fri, 13 Mar 2026 15:25:32 +0000 Subject: [PATCH 24/27] fix spacing --- conf/test_full.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/test_full.config b/conf/test_full.config index 77dcc94..ce72c56 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,8 +17,8 @@ params { // Input data for full size test input = params.pipelines_testdata_base_path + '/samplesheet/samplesheet.csv' // Domain annotation - pfam_latest_link = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' - funfam_latest_link = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + pfam_latest_link = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' + funfam_latest_link = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' nmpfams_latest_link = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan_test.tar.gz' From 9f0e6fbc901fb04ac7f20165d1094f683237d072 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Fri, 13 Mar 2026 15:34:02 +0000 Subject: [PATCH 25/27] update test config --- conf/test.config | 8 +-- conf/test_full.config | 8 +-- tests/default.nf.test.snap | 116 +++++++++++++++++++++++++++++++++++-- 3 files changed, 119 insertions(+), 13 deletions(-) diff --git a/conf/test.config b/conf/test.config index eab95ce..9defa1c 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,11 +23,11 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Input data - input = params.pipelines_testdata_base_path + '/samplesheet/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinannotator/samplesheet/samplesheet.csv' // Domain annotation - pfam_latest_link = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' - funfam_latest_link = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' - nmpfams_latest_link = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' + pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' + funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' diff --git a/conf/test_full.config b/conf/test_full.config index ce72c56..bcf1d96 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,11 +15,11 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - input = params.pipelines_testdata_base_path + '/samplesheet/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'proteinannotator/samplesheet/samplesheet.csv' // Domain annotation - pfam_latest_link = params.pipelines_testdata_base_path + '/testdata/pfam/Pfam-A_test.hmm.gz' - funfam_latest_link = params.pipelines_testdata_base_path + '/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' - nmpfams_latest_link = params.pipelines_testdata_base_path + '/testdata/nmpfams/nmpfamsdb_test.hmm.gz' + pfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/pfam/Pfam-A_test.hmm.gz' + funfam_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/funfam/funfam-hmm3-v4_3_0_test.lib.gz' + nmpfams_latest_link = params.pipelines_testdata_base_path + 'proteinannotator/testdata/nmpfams/nmpfamsdb_test.hmm.gz' // Functional annotation interproscan_db_url = params.pipelines_testdata_base_path + 'proteinannotator/testdata/interproscan_test.tar.gz' interproscan_applications = 'Hamap,TIGRFAM,sfld' diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 0d45f88..bc35f2b 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,16 +1,122 @@ { "-profile test": { "content": [ - 0, - null, + 20, + { + "ARIA2": { + "aria2": "1.36.0" + }, + "ARIA2_FUNFAM": { + "aria2": "1.36.0" + }, + "ARIA2_NMPFAMS": { + "aria2": "1.36.0" + }, + "ARIA2_PFAM": { + "aria2": "1.36.0" + }, + "SEQFU_STATS_AFTER": { + "seqfu": "1.22.3" + }, + "SEQFU_STATS_BEFORE": { + "seqfu": "1.22.3" + }, + "SEQKIT_RMDUP": { + "seqkit": "v2.9.0" + }, + "SEQKIT_SEQ": { + "seqkit": "v2.9.0" + }, + "Workflow": { + "nf-core/proteinannotator": "v1.1.0dev" + } + }, [ - "pipeline_info" + "aria2", + "aria2/nmpfamsdb_test.hmm.gz", + "downloaded_dbs", + "downloaded_dbs/Pfam-A_test.hmm.gz", + "downloaded_dbs/funfam-hmm3-v4_3_0_test.lib.gz", + "downloaded_dbs/interproscan_db", + "downloaded_dbs/interproscan_db/hamap", + "downloaded_dbs/interproscan_db/hamap/2023_05", + "downloaded_dbs/interproscan_db/hamap/2023_05/hamap.hmm.lib", + "downloaded_dbs/interproscan_db/hamap/2023_05/hamap.prf", + "downloaded_dbs/interproscan_db/hamap/2023_05/profiles", + "downloaded_dbs/interproscan_db/hamap/2023_05/profiles/MF_00457.prf", + "downloaded_dbs/interproscan_db/hamap/2023_05/profiles/MF_01458.prf", + "downloaded_dbs/interproscan_db/sfld", + "downloaded_dbs/interproscan_db/sfld/4", + "downloaded_dbs/interproscan_db/sfld/4/sfld.hmm", + "downloaded_dbs/interproscan_db/sfld/4/sfld.msa", + "downloaded_dbs/interproscan_db/sfld/4/sfld_hierarchy_flat.txt", + "downloaded_dbs/interproscan_db/sfld/4/sfld_sites.annot", + "downloaded_dbs/interproscan_db/tigrfam", + "downloaded_dbs/interproscan_db/tigrfam/15.0", + "downloaded_dbs/interproscan_db/tigrfam/15.0/TIGRFAMs_15.0_HMM.LIB", + "downloaded_dbs/interproscan_db/tigrfam/15.0/TIGRFAMs_HMM.LIB", + "downloaded_dbs/interproscan_test.tar.gz", + "pipeline_info", + "pipeline_info/nf_core_proteinannotator_software_mqc_versions.yml", + "qc", + "qc/T1024", + "qc/T1024/T1024.fasta", + "qc/T1024/T1024.log", + "qc/T1024/T1024_after.tsv", + "qc/T1024/T1024_after_mqc.txt", + "qc/T1024/T1024_before.tsv", + "qc/T1024/T1024_before_mqc.txt", + "qc/T1026", + "qc/T1026/T1026.fasta", + "qc/T1026/T1026.log", + "qc/T1026/T1026_after.tsv", + "qc/T1026/T1026_after_mqc.txt", + "qc/T1026/T1026_before.tsv", + "qc/T1026/T1026_before_mqc.txt", + "qc/l_arginase", + "qc/l_arginase/l_arginase.fasta", + "qc/l_arginase/l_arginase.log", + "qc/l_arginase/l_arginase_after.tsv", + "qc/l_arginase/l_arginase_after_mqc.txt", + "qc/l_arginase/l_arginase_before.tsv", + "qc/l_arginase/l_arginase_before_mqc.txt" ], [ - + "nmpfamsdb_test.hmm.gz:md5,ad7a094618ccfdaeed1c03e93f6abf1e", + "Pfam-A_test.hmm.gz:md5,a5ab72b2b7bc72c62756684707e2387c", + "funfam-hmm3-v4_3_0_test.lib.gz:md5,df8b324882e1ceb8f8196155a968ed77", + "hamap.hmm.lib:md5,8c589a7f459284080e674e79454434d3", + "hamap.prf:md5,1c9f7eef7704bf307cfaf9f0f2e22153", + "MF_00457.prf:md5,91ab950fb5c449fef9f0cd235b72e9d9", + "MF_01458.prf:md5,f43826d1c001200d5d60ca4d97235f96", + "sfld.hmm:md5,ad6c16bfdd1ec4dc1f13aadaf1b2f1bd", + "sfld.msa:md5,1ee957899d996426a442ff1ae7737c68", + "sfld_hierarchy_flat.txt:md5,8c606dcaa55f174547a42606a0f1cd1a", + "sfld_sites.annot:md5,aa216404da8721f56cae847600170784", + "TIGRFAMs_15.0_HMM.LIB:md5,64f2b2c9e834b47b17d91bb9a6a0067e", + "TIGRFAMs_HMM.LIB:md5,543da3f4b65eed9ec393986c6c6ff0ba", + "interproscan_test.tar.gz:md5,cde88c0cd841c84dc1203e64854c762b", + "T1024.fasta:md5,aa546680bf58fcb8aeb10e1617e6d5bd", + "T1024.log:md5,a41135cfe024baaf42f135583fe73f0d", + "T1024_after.tsv:md5,2daf830aba484edbcf25e811f5769ad0", + "T1024_after_mqc.txt:md5,23c573ad96f7199ba251c7bacd1c5968", + "T1024_before.tsv:md5,2daf830aba484edbcf25e811f5769ad0", + "T1024_before_mqc.txt:md5,23c573ad96f7199ba251c7bacd1c5968", + "T1026.fasta:md5,ae21f6aa06d0a5cedc121db5dfc343f3", + "T1026.log:md5,a41135cfe024baaf42f135583fe73f0d", + "T1026_after.tsv:md5,052ba564eabda203298ffc26ef80b7ab", + "T1026_after_mqc.txt:md5,3534726223f5a24dedda4446fd202404", + "T1026_before.tsv:md5,052ba564eabda203298ffc26ef80b7ab", + "T1026_before_mqc.txt:md5,3534726223f5a24dedda4446fd202404", + "l_arginase.fasta:md5,38d388cfc6ee9013eba4f693d104d6f0", + "l_arginase.log:md5,a41135cfe024baaf42f135583fe73f0d", + "l_arginase_after.tsv:md5,6c4fe965aa0905f437f3731ed8eed98e", + "l_arginase_after_mqc.txt:md5,af14b7e79260ece0074d4ea82a3b3ce6", + "l_arginase_before.tsv:md5,31a34ec0257053f34a449270f23d98ff", + "l_arginase_before_mqc.txt:md5,7ab718f12ea5460f254b8c43cfd11040" ] ], - "timestamp": "2026-03-13T14:23:26.476832", + "timestamp": "2026-03-13T15:33:22.609858", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" From 22c27fb7e73e6937400d7ab240cc017e93490b22 Mon Sep 17 00:00:00 2001 From: Nikos Pechlivanis Date: Fri, 13 Mar 2026 15:42:47 +0000 Subject: [PATCH 26/27] update nftignore --- tests/.nftignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/.nftignore b/tests/.nftignore index 153205a..0b6bd76 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -14,6 +14,9 @@ domain_annotation/pfam/l_arginase.domtbl.gz domain_annotation/funfam/T1024.domtbl.gz domain_annotation/funfam/T1026.domtbl.gz domain_annotation/funfam/l_arginase.domtbl.gz +domain_annotation/nmpfams/T1024.domtbl.gz +domain_annotation/nmpfams/T1026.domtbl.gz +domain_annotation/nmpfams/l_arginase.domtbl.gz functional_annotation/interproscan/T1024/T1024.gff3 functional_annotation/interproscan/T1024/T1024.tsv functional_annotation/interproscan/T1026/T1026.gff3 From 70a7ba318a4302bb8d81d3a0151cdcd383d0e2c6 Mon Sep 17 00:00:00 2001 From: vagkaratzas Date: Sat, 14 Mar 2026 10:14:23 +0000 Subject: [PATCH 27/27] module configs updated for nmpfams, end-to-end test snapshot updated --- .nf-core.yml | 2 +- CHANGELOG.md | 6 +- README.md | 2 +- conf/modules.config | 18 +++++ ro-crate-metadata.json | 2 +- tests/default.nf.test.snap | 134 ++++++++++++++++++++++++++++++++++--- 6 files changed, 148 insertions(+), 16 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index 99a3e1e..a1abaed 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -8,7 +8,7 @@ lint: - docs/images/nf-core-proteinannotator_logo_light.png - docs/images/nf-core-proteinannotator_logo_dark.png - .github/PULL_REQUEST_TEMPLATE.md -nf_core_version: 3.5.1 +nf_core_version: 3.5.2 repository_type: pipeline template: author: Olga Botvinnik, Evangelos Karatzas diff --git a/CHANGELOG.md b/CHANGELOG.md index acdf395..202bd64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,17 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` -- [85](https://github.com/nf-core/proteinannotator/pull/85) - Added zenodo doi in `nextflow.config`. (by @vagkaratzas) +- [#87](https://github.com/nf-core/proteinannotator/pull/87) - Added the option to download and use the latest `NMPFams` HMM library (or use path to an existing one) for domain annotation. (by @npechl) +- [#85](https://github.com/nf-core/proteinannotator/pull/85) - Added zenodo doi in `nextflow.config`. (by @vagkaratzas) ### `Changed` -- [85](https://github.com/nf-core/proteinannotator/pull/85) - `test_full.config` input samplesheet path is now set properly. (by @vagkaratzas) +- [#85](https://github.com/nf-core/proteinannotator/pull/85) - `test_full.config` input samplesheet path is now set properly. (by @vagkaratzas) ## v1.0.0 - Yellow Saiga - [2026/02/09] Initial release of nf-core/proteinannotator, created with the [nf-core](https://nf-co.re/) template. -- [#87](https://github.com/nf-core/proteinannotator/pull/87) - Added the option to download and use the latest NMPFams HMM library (or use path to an existing one) for domain annotation. (by @npechl) - [#68](https://github.com/nf-core/proteinannotator/pull/68) - Using the `ARIA2` and `UNTAR` nf-core modules to download and decompress the InterProScan database. (by @vagkaratzas) - [#67](https://github.com/nf-core/proteinannotator/pull/67) - Swapped to the updated, non-buggy, nf-core version of `INTERPROSCAN`. (by @vagkaratzas) - [#65](https://github.com/nf-core/proteinannotator/pull/65) - Converted the pipeline schematic to nf-core metromap. (by @vagkaratzas) diff --git a/README.md b/README.md index f4f95ee..2b8f037 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) -[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.2) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) diff --git a/conf/modules.config b/conf/modules.config index ec1428c..b5a5635 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -90,6 +90,14 @@ process { ] } + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:ARIA2_NMPFAMS' { + publishDir = [ + path: { "${params.outdir}/downloaded_dbs/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_PFAM' { ext.args = { "-E ${params.hmmsearch_evalue_cutoff}" } publishDir = [ @@ -110,6 +118,16 @@ process { ] } + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:DOMAIN_ANNOTATION:HMMSEARCH_NMPFAMS' { + ext.args = { "-E ${params.hmmsearch_evalue_cutoff}" } + publishDir = [ + path: { "${params.outdir}/domain_annotation/nmpfams/" }, + mode: params.publish_dir_mode, + pattern: "*.domtbl.gz", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'NFCORE_PROTEINANNOTATOR:PROTEINANNOTATOR:FUNCTIONAL_ANNOTATION:ARIA2' { publishDir = [ path: { "${params.outdir}/downloaded_dbs/" }, diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 936e72a..5028582 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2026-02-09T13:54:13+00:00", - "description": "

    \n \n \n \"nf-core/proteinannotator\"\n \n

    \n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n

    \n \n \n \"nf-core/proteinannotator\"\n \n

    \n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/) and [FunFam](https://download.cathdb.info/cath/releases/all-releases/)\n2. Functional annotation:\n - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

    \n \n \n \"nf-core/proteinannotator\"\n \n

    \n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinannotator)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinannotator/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinannotator/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.18547735-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.18547735)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinannotator)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinannotator-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinannotator)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinannotator** is a bioinformatics pipeline that computes statistics for protein FASTA inputs and produces protein annotations based on predicted sequence features, including conserved domains, functions, and secondary structure.\n\n

    \n \n \n \"nf-core/proteinannotator\"\n \n

    \n\n### Check quality and pre-process\n\nGenerate input amino acid sequence statistics with ([`SeqFu`](https://github.com/telatin/seqfu2/)) and pre-process them (i.e., gap removal, convert to upper case, validate, filter by length, replace special characters such as `/`, and remove duplicate sequences) with ([`SeqKit`](https://github.com/shenwei356/seqkit/))\n\n### Annotate sequences\n\n1. Conserved domain annotation with ([`hmmer`](https://github.com/EddyRivasLab/hmmer/)) against databases\n such as [Pfam](https://ftp.ebi.ac.uk/pub/databases/Pfam/), [FunFam](https://download.cathdb.info/cath/releases/all-releases/), and [NMPFams](https://pavlopoulos-lab.org/envofams/databases/hmmer/)\n2. Functional annotation:\n - ([`InterProScan`](https://interproscan-docs.readthedocs.io/en/v5/)) a software tool used to analyze protein sequences by scanning them against the signatures of protein families, domains, and sites in the [InterPro](https://www.ebi.ac.uk/interpro/) database, helping to identify their functional characteristics.\n3. Predict secondary structure compositional features such as \u03b1-helices, \u03b2-strands and coils with ([`s4pred`](https://github.com/psipred/s4pred))\n4. Present QC stats for input sequences before and after initial pre-processing with ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nid,fasta\nspecies1,species1_proteins.fasta\nspecies2,species2_proteins.fasta\n```\n\nEach row represents a FASTA file of proteins from a single species.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinannotator \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinannotator/usage) and the [parameter documentation](https://nf-co.re/proteinannotator/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinannotator/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinannotator/output).\n\n## Credits\n\nnf-core/proteinannotator was originally written by Olga Botvinnik and Evangelos Karatzas.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n- [Michael L Heuer](https://github.com/heuermh)\n- [Edmund Miller](https://github.com/edmundmiller)\n- [Eric Wei](https://github.com/eweizy)\n- [Martin Beracochea](https://github.com/mberacochea)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinannotator` channel](https://nfcore.slack.com/channels/proteinannotator) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinannotator for your analysis, please cite it using the following doi: [10.5281/zenodo.18547735](https://doi.org/10.5281/zenodo.18547735)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index bc35f2b..10d5d5a 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test": { "content": [ - 20, + 36, { "ARIA2": { "aria2": "1.36.0" @@ -15,25 +15,57 @@ "ARIA2_PFAM": { "aria2": "1.36.0" }, + "HMMSEARCH_FUNFAM": { + "hmmer": 3.4 + }, + "HMMSEARCH_NMPFAMS": { + "hmmer": 3.4 + }, + "HMMSEARCH_PFAM": { + "hmmer": 3.4 + }, + "INTERPROSCAN": { + "interproscan": "5.59-91.0" + }, + "S4PRED_RUNMODEL": { + "s4pred": "1.2.1" + }, "SEQFU_STATS_AFTER": { "seqfu": "1.22.3" }, "SEQFU_STATS_BEFORE": { "seqfu": "1.22.3" }, + "SEQKIT_REPLACE": { + "seqkit": "2.9.0" + }, "SEQKIT_RMDUP": { "seqkit": "v2.9.0" }, "SEQKIT_SEQ": { "seqkit": "v2.9.0" }, + "UNTAR": { + "untar": 1.34 + }, "Workflow": { "nf-core/proteinannotator": "v1.1.0dev" } }, [ - "aria2", - "aria2/nmpfamsdb_test.hmm.gz", + "domain_annotation", + "domain_annotation/funfam", + "domain_annotation/funfam/T1024.domtbl.gz", + "domain_annotation/funfam/T1026.domtbl.gz", + "domain_annotation/funfam/l_arginase.domtbl.gz", + "domain_annotation/nmpfams", + "domain_annotation/nmpfams/T1024.domtbl.gz", + "domain_annotation/nmpfams/T1026.domtbl.gz", + "domain_annotation/nmpfams/l_arginase.domtbl.gz", + "domain_annotation/pfam", + "domain_annotation/pfam/T1024.domtbl.gz", + "domain_annotation/pfam/T1026.domtbl.gz", + "domain_annotation/pfam/l_arginase.domtbl.gz", "downloaded_dbs", "downloaded_dbs/Pfam-A_test.hmm.gz", "downloaded_dbs/funfam-hmm3-v4_3_0_test.lib.gz", @@ -56,6 +88,62 @@ "downloaded_dbs/interproscan_db/tigrfam/15.0/TIGRFAMs_15.0_HMM.LIB", "downloaded_dbs/interproscan_db/tigrfam/15.0/TIGRFAMs_HMM.LIB", "downloaded_dbs/interproscan_test.tar.gz", + "downloaded_dbs/nmpfamsdb_test.hmm.gz", + "functional_annotation", + "functional_annotation/interproscan", + "functional_annotation/interproscan/T1024", + "functional_annotation/interproscan/T1024/T1024.gff3", + "functional_annotation/interproscan/T1024/T1024.json", + "functional_annotation/interproscan/T1024/T1024.tsv", + "functional_annotation/interproscan/T1024/T1024.xml", + "functional_annotation/interproscan/T1026", + "functional_annotation/interproscan/T1026/T1026.gff3", + "functional_annotation/interproscan/T1026/T1026.json", + "functional_annotation/interproscan/T1026/T1026.tsv", + "functional_annotation/interproscan/T1026/T1026.xml", + "functional_annotation/interproscan/l_arginase", + "functional_annotation/interproscan/l_arginase/l_arginase.gff3", + "functional_annotation/interproscan/l_arginase/l_arginase.json", + "functional_annotation/interproscan/l_arginase/l_arginase.tsv", + "functional_annotation/interproscan/l_arginase/l_arginase.xml", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_T1024_after.txt", + "multiqc/multiqc_data/multiqc_T1024_before.txt", + "multiqc/multiqc_data/multiqc_T1026_after.txt", + "multiqc/multiqc_data/multiqc_T1026_before.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_l_arginase_after.txt", + "multiqc/multiqc_data/multiqc_l_arginase_before.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/T1024_after.pdf", + "multiqc/multiqc_plots/pdf/T1024_before.pdf", + "multiqc/multiqc_plots/pdf/T1026_after.pdf", + "multiqc/multiqc_plots/pdf/T1026_before.pdf", + "multiqc/multiqc_plots/pdf/l_arginase_after.pdf", + "multiqc/multiqc_plots/pdf/l_arginase_before.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/T1024_after.png", + "multiqc/multiqc_plots/png/T1024_before.png", + "multiqc/multiqc_plots/png/T1026_after.png", + "multiqc/multiqc_plots/png/T1026_before.png", + "multiqc/multiqc_plots/png/l_arginase_after.png", + "multiqc/multiqc_plots/png/l_arginase_before.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/T1024_after.svg", + "multiqc/multiqc_plots/svg/T1024_before.svg", + "multiqc/multiqc_plots/svg/T1026_after.svg", + "multiqc/multiqc_plots/svg/T1026_before.svg", + "multiqc/multiqc_plots/svg/l_arginase_after.svg", + "multiqc/multiqc_plots/svg/l_arginase_before.svg", + "multiqc/multiqc_report.html", "pipeline_info", "pipeline_info/nf_core_proteinannotator_software_mqc_versions.yml", "qc", @@ -79,10 +167,19 @@ "qc/l_arginase/l_arginase_after.tsv", "qc/l_arginase/l_arginase_after_mqc.txt", "qc/l_arginase/l_arginase_before.tsv", - "qc/l_arginase/l_arginase_before_mqc.txt" + "qc/l_arginase/l_arginase_before_mqc.txt", + "s4pred", + "s4pred/T1024", + "s4pred/T1024/ss2", + "s4pred/T1024/ss2/T1024.ss2", + "s4pred/T1026", + "s4pred/T1026/ss2", + "s4pred/T1026/ss2/T1026.ss2", + "s4pred/l_arginase", + "s4pred/l_arginase/ss2", + "s4pred/l_arginase/ss2/GI|225038609|EFDID|719595|FULL.ss2" ], [ - "nmpfamsdb_test.hmm.gz:md5,ad7a094618ccfdaeed1c03e93f6abf1e", "Pfam-A_test.hmm.gz:md5,a5ab72b2b7bc72c62756684707e2387c", "funfam-hmm3-v4_3_0_test.lib.gz:md5,df8b324882e1ceb8f8196155a968ed77", "hamap.hmm.lib:md5,8c589a7f459284080e674e79454434d3", @@ -96,6 +193,20 @@ "TIGRFAMs_15.0_HMM.LIB:md5,64f2b2c9e834b47b17d91bb9a6a0067e", "TIGRFAMs_HMM.LIB:md5,543da3f4b65eed9ec393986c6c6ff0ba", "interproscan_test.tar.gz:md5,cde88c0cd841c84dc1203e64854c762b", + "nmpfamsdb_test.hmm.gz:md5,ad7a094618ccfdaeed1c03e93f6abf1e", + "T1024.json:md5,0288f7551a14faedc409dd374b3e073e", + "T1024.xml:md5,63a3db0eb0e1f76403411602c23b721e", + "T1026.json:md5,5c2a40474b1cfb50cd043fe0be5e5d52", + "T1026.xml:md5,335552ce1703548565212a1d54681d75", + "l_arginase.json:md5,e0d127dd8a952cbd798999851d1338e6", + "l_arginase.xml:md5,7248992d9c1618cf7baa7515ae79ce32", + "multiqc_T1024_after.txt:md5,f2a552d4750ff8360941b10cec141499", + "multiqc_T1024_before.txt:md5,f2a552d4750ff8360941b10cec141499", + "multiqc_T1026_after.txt:md5,aabd4e58ed67d366fd04592ca09dbc9b", + "multiqc_T1026_before.txt:md5,aabd4e58ed67d366fd04592ca09dbc9b", + "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", + "multiqc_l_arginase_after.txt:md5,5df939cbafd732aa5095dad4434a4a33", + "multiqc_l_arginase_before.txt:md5,dd8e50549823ef944d6e36d0e2af56f7", "T1024.fasta:md5,aa546680bf58fcb8aeb10e1617e6d5bd", "T1024.log:md5,a41135cfe024baaf42f135583fe73f0d", "T1024_after.tsv:md5,2daf830aba484edbcf25e811f5769ad0", @@ -113,13 +224,16 @@ "l_arginase_after.tsv:md5,6c4fe965aa0905f437f3731ed8eed98e", "l_arginase_after_mqc.txt:md5,af14b7e79260ece0074d4ea82a3b3ce6", "l_arginase_before.tsv:md5,31a34ec0257053f34a449270f23d98ff", - "l_arginase_before_mqc.txt:md5,7ab718f12ea5460f254b8c43cfd11040" + "l_arginase_before_mqc.txt:md5,7ab718f12ea5460f254b8c43cfd11040", + "T1024.ss2:md5,6f2467c4e6974e761333bef106809e88", + "T1026.ss2:md5,cc788acb2aefe43fea147d9dd2b6c2c9", + "GI|225038609|EFDID|719595|FULL.ss2:md5,e7d8eaa84d46a6a714ffe00d7f21cdfb" ] ], - "timestamp": "2026-03-13T15:33:22.609858", "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.4" - } + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-14T10:06:42.466898492" } } \ No newline at end of file