From dc3dc26ddd7d8b3b446c630d24a734f94f4d0a8f Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Tue, 17 Feb 2026 16:07:00 +0200 Subject: [PATCH 01/16] Implelenting bigslice into funcscan --- conf/modules.config | 8 ++ modules/nf-core/bigslice/run/environment.yml | 7 ++ modules/nf-core/bigslice/run/main.nf | 45 ++++++++ modules/nf-core/bigslice/run/meta.yml | 90 +++++++++++++++ .../nf-core/bigslice/run/tests/main.nf.test | 107 ++++++++++++++++++ .../bigslice/run/tests/main.nf.test.snap | 88 ++++++++++++++ .../bigslice/run/tests/nextflow.config | 5 + nextflow.config | 4 + nextflow_schema.json | 45 +++++--- subworkflows/local/bgc.nf | 48 ++++++++ 10 files changed, 434 insertions(+), 13 deletions(-) create mode 100644 modules/nf-core/bigslice/run/environment.yml create mode 100644 modules/nf-core/bigslice/run/main.nf create mode 100644 modules/nf-core/bigslice/run/meta.yml create mode 100644 modules/nf-core/bigslice/run/tests/main.nf.test create mode 100644 modules/nf-core/bigslice/run/tests/main.nf.test.snap create mode 100644 modules/nf-core/bigslice/run/tests/nextflow.config diff --git a/conf/modules.config b/conf/modules.config index 26186279..d240c20c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -540,6 +540,14 @@ process { ] } + withName: BIGSLICE { + publishDir = [ + path: { "${params.outdir}/bgc/bigslice/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + withName: HAMRONIZATION_ABRICATE { publishDir = [ path: { "${params.outdir}/arg/hamronization/abricate" }, diff --git a/modules/nf-core/bigslice/run/environment.yml b/modules/nf-core/bigslice/run/environment.yml new file mode 100644 index 00000000..de8fdfbb --- /dev/null +++ b/modules/nf-core/bigslice/run/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::bigslice=2.0.2" diff --git a/modules/nf-core/bigslice/run/main.nf b/modules/nf-core/bigslice/run/main.nf new file mode 100644 index 00000000..a1782087 --- /dev/null +++ b/modules/nf-core/bigslice/run/main.nf @@ -0,0 +1,45 @@ +process BIGSLICE { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided correctly by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bigslice:2.0.2--pyh8ed023e_0': + 'biocontainers/bigslice:2.0.2--pyh8ed023e_0' }" + + input: + tuple val(meta), path(bgc) + path(hmmdb) + + output: + tuple val(meta), path("${prefix}/result/data.db") , emit: db + tuple val(meta), path("${prefix}/result/tmp/**/*.fa"), emit: fa + tuple val("${task.process}"), val('bigslice'), eval("echo 2.0.2"), topic: versions, emit: versions_bigslice + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + bigslice \\ + $args \\ + --num_threads ${task.cpus} \\ + -i ${bgc} \\ + --program_db_folder ${hmmdb} \\ + ${prefix} + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo $args + + mkdir -p ${prefix}/result/tmp/2e555308dfc411186cf012334262f127 + touch ${prefix}/result/data.db + touch ${prefix}/result/tmp/2e555308dfc411186cf012334262f127/test.fa + """ +} diff --git a/modules/nf-core/bigslice/run/meta.yml b/modules/nf-core/bigslice/run/meta.yml new file mode 100644 index 00000000..518f4bf3 --- /dev/null +++ b/modules/nf-core/bigslice/run/meta.yml @@ -0,0 +1,90 @@ +name: "bigslice" +description: | + A scalable tool for large-scale analysis of Biosynthetic Gene Clusters (BGCs). + It takes genome regions in GenBank format along with an HMM database and produces a SQLite database and FASTA outputs of predicted features. +keywords: + - biosynthetic gene clusters + - genomics + - analysis +tools: + - "bigslice": + description: A highly scalable, user-interactive tool for the large scale analysis + of Biosynthetic Gene Clusters data + homepage: "https://github.com/medema-group/bigslice" + documentation: "https://github.com/medema-group/bigslice" + tool_dev_url: "https://github.com/medema-group/bigslice" + doi: "10.1093/gigascience/giaa154" + licence: ["AGPL v3-or-later"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - bgc: + type: directory + description: | + Path to a folder containing genomic regions in GenBank format, structured for BiG-SLiCE. + Each genome should have its own subfolder with region `.gbk` files. + The folder should also contain a datasets.tsv, and a taxonomy folder, with TSV taxonomy files per dataset. + See the tool's wiki for more information: https://github.com/medema-group/bigslice/wiki/Input-folder + pattern: "*" + - hmmdb: + type: directory + description: | + Path to the BiG-SLiCE HMM database folder containing biosynthetic and sub Pfams for annotation, in the required BiG-SLiCE format. + An example directory in compressed archive format can be found here: https://github.com/medema-group/bigslice/releases/download/v2.0.0rc/bigslice-models.2022-11-30.tar.gz + +output: + db: + - - meta: + type: map + description: Groovy Map containing sample/dataset information + - ${prefix}/result/data.db: + type: file + description: | + The results SQLite database. Contains various tables relevant to result + BGCs, CDSs, GCFs, HMMs and HSPs. + pattern: "data.db" + ontologies: + - edam: "http://edamontology.org/format_3621" # SQLite format + fa: + - - meta: + type: map + description: Groovy Map containing sample/dataset information + - ${prefix}/result/tmp/**/*.fa: + type: file + description: | + Predicted features as FASTA files. One file per hit HMM. + pattern: "*.fa" + ontologies: + - edam: "http://edamontology.org/format_1929" # FASTA + versions_bigslice: + - - ${task.process}: + type: string + description: The name of the process + - bigslice: + type: string + description: The name of the tool + - echo 2.0.2: + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - bigslice: + type: string + description: The name of the tool + - echo 2.0.2: + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@vagkaratzas" +maintainers: + - "@vagkaratzas" diff --git a/modules/nf-core/bigslice/run/tests/main.nf.test b/modules/nf-core/bigslice/run/tests/main.nf.test new file mode 100644 index 00000000..354301c8 --- /dev/null +++ b/modules/nf-core/bigslice/run/tests/main.nf.test @@ -0,0 +1,107 @@ +nextflow_process { + + name "Test Process BIGSLICE" + script "../main.nf" + process "BIGSLICE" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "bigslice" + tag "aria2" + tag "untar" + + setup { + run("ARIA2", alias: "ARIA2_HMMDB") { + script "../../aria2/main.nf" + process { + """ + input[0] = [ + [ id:'test_hmm_db' ], + 'https://github.com/medema-group/bigslice/releases/download/v2.0.0rc/bigslice-models.2022-11-30.tar.gz' // https URL + ] + """ + } + } + + run("UNTAR", alias: "UNTAR_HMMDB") { + script "../../untar/main.nf" + process { + """ + input[0] = ARIA2_HMMDB.out.downloaded_file + """ + } + } + + run("ARIA2", alias: "ARIA2_GBK") { + script "../../aria2/main.nf" + process { + """ + input[0] = [ + [ id:'test_gbk' ], + params.modules_testdata_base_path + 'genomics/prokaryotes/streptomyces_coelicolor/fixtures_bigslice_gbk.tar.gz' // https URL + ] + """ + } + } + + run("UNTAR", alias: "UNTAR_GBK") { + script "../../untar/main.nf" + process { + """ + input[0] = ARIA2_GBK.out.downloaded_file + """ + } + } + } + + test("streptomyces_coelicolor - bigslice - gbk") { + + when { + process { + """ + input[0] = UNTAR_GBK.out.untar + input[1] = UNTAR_HMMDB.out.untar.map{ it -> it[1] } + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + file(process.out.db[0][1]).name, + process.out.fa[0][1].size(), + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + + test("streptomyces_coelicolor - bigslice - gbk - stub") { + + options "-stub" + + when { + process { + """ + input[0] = UNTAR_GBK.out.untar + input[1] = UNTAR_HMMDB.out.untar.map{ it -> it[1] } + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out, + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bigslice/run/tests/main.nf.test.snap b/modules/nf-core/bigslice/run/tests/main.nf.test.snap new file mode 100644 index 00000000..cc26a103 --- /dev/null +++ b/modules/nf-core/bigslice/run/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "streptomyces_coelicolor - bigslice - gbk - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_gbk" + }, + "data.db:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test_gbk" + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "BIGSLICE", + "bigslice", + "2.0.2" + ] + ], + "db": [ + [ + { + "id": "test_gbk" + }, + "data.db:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fa": [ + [ + { + "id": "test_gbk" + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bigslice": [ + [ + "BIGSLICE", + "bigslice", + "2.0.2" + ] + ] + }, + { + "versions_bigslice": [ + [ + "BIGSLICE", + "bigslice", + "2.0.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-06T15:13:53.99145463" + }, + "streptomyces_coelicolor - bigslice - gbk": { + "content": [ + "data.db", + 40, + { + "versions_bigslice": [ + [ + "BIGSLICE", + "bigslice", + "2.0.2" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-06T15:26:29.769543832" + } +} \ No newline at end of file diff --git a/modules/nf-core/bigslice/run/tests/nextflow.config b/modules/nf-core/bigslice/run/tests/nextflow.config new file mode 100644 index 00000000..2986e346 --- /dev/null +++ b/modules/nf-core/bigslice/run/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BIGSLICE { + ext.prefix = "test_bigslice" + } +} diff --git a/nextflow.config b/nextflow.config index b87d4938..4b7227db 100644 --- a/nextflow.config +++ b/nextflow.config @@ -257,6 +257,10 @@ params { bgc_gecco_convertmode = 'clusters' bgc_gecco_convertformat = 'gff' + + bgc_bigslice_run = false + bgc_bigslice_db = null + bgc_run_hmmsearch = false bgc_hmmsearch_models = null bgc_hmmsearch_savealignments = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 3b8d8fd8..9734f7fc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -159,14 +159,14 @@ }, "taxa_classification_mmseqs_taxonomy_sensitivity": { "type": "number", - "default": 5, + "default": 5.0, "help_text": "This flag specifies the speed and sensitivity of the taxonomic search. It stands for how many kmers should be produced during the preliminary seeding stage. A very fast search requires a low value e.g. '1.0' and a a very sensitive search requires e.g. '7.0'. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `-s`", "description": "Specify the speed and sensitivity for taxonomy assignment.", "fa_icon": "fas fa-history" }, "taxa_classification_mmseqs_taxonomy_orffilters": { "type": "number", - "default": 2, + "default": 2.0, "help_text": "This flag specifies the sensitivity used for prefiltering the query ORF. Before the taxonomy-assigning step, MMseqs2 searches the predicted ORFs against the provided database. This value influences the speed with which the search is carried out. More details can be found in the [documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - mmseqs taxonomy: `--orf-filter-s`", "description": "Specify the ORF search sensitivity in the prefilter step.", "fa_icon": "fas fa-history" @@ -383,7 +383,7 @@ "default": "Bacteria", "fa_icon": "fas fa-crown", "description": "Specify the kingdom that the input represents.", - "help_text": "Specifies the kingdom that the input sample is derived from and/or you wish to screen for\n\n> ⚠️ Prokka cannot annotate Eukaryotes.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--kingdom`", + "help_text": "Specifies the kingdom that the input sample is derived from and/or you wish to screen for\n\n> \u26a0\ufe0f Prokka cannot annotate Eukaryotes.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--kingdom`", "enum": ["Archaea", "Bacteria", "Mitochondria", "Viruses"] }, "annotation_prokka_gcode": { @@ -399,12 +399,12 @@ "type": "integer", "default": 1, "description": "Minimum contig size required for annotation (bp).", - "help_text": "Specify the minimum contig lengths to carry out annotations on. The Prokka developers recommend that this should be ≥ 200 bp, if you plan to submit such annotations to NCBI.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--mincontiglen`", + "help_text": "Specify the minimum contig lengths to carry out annotations on. The Prokka developers recommend that this should be \u2265 200 bp, if you plan to submit such annotations to NCBI.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--mincontiglen`", "fa_icon": "fas fa-ruler-horizontal" }, "annotation_prokka_evalue": { "type": "number", - "default": 0.000001, + "default": 1e-6, "description": "E-value cut-off.", "help_text": "Specifiy the maximum E-value used for filtering the alignment hits.\n\nFor more information please check the Prokka [documentation](https://github.com/tseemann/prokka).\n\n> Modifies tool parameter(s):\n> - Prokka: `--evalue`", "fa_icon": "fas fa-sort-amount-down" @@ -705,7 +705,7 @@ "amp_ampcombi_db": { "type": "string", "description": "The path to the folder containing the reference database files.", - "help_text": "The path to the folder containing the reference database files (`*.fasta` and `*.tsv`); a fasta file and the corresponding table with structural, functional and if reported taxonomic classifications. AMPcombi will then generate the corresponding `mmseqs2` directory, in which all binary files are prepared for the downstream alignment of the recovered AMPs with [MMseqs2](https://github.com/soedinglab/MMseqs2). These can also be provided by the user by setting up an mmseqs2 compatible database using `mmseqs createdb *.fasta` in a directory called `mmseqs2`.\n\nExample file structure for the reference database supplied by the user:\n\n```bash\namp_DRAMP_database/\n├── general_amps_2024_11_13.fasta\n├── general_amps_2024_11_13.txt\n└── mmseqs2\n ├── ref_DB\n ├── ref_DB.dbtype\n ├── ref_DB_h\n ├── ref_DB_h.dbtype\n ├── ref_DB_h.index\n ├── ref_DB.index\n ├── ref_DB.lookup\n └── ref_DB.source```\n\nFor more information check the AMPcombi [documentation](https://ampcombi.readthedocs.io/en/main/usage.html#parse-tables)." + "help_text": "The path to the folder containing the reference database files (`*.fasta` and `*.tsv`); a fasta file and the corresponding table with structural, functional and if reported taxonomic classifications. AMPcombi will then generate the corresponding `mmseqs2` directory, in which all binary files are prepared for the downstream alignment of the recovered AMPs with [MMseqs2](https://github.com/soedinglab/MMseqs2). These can also be provided by the user by setting up an mmseqs2 compatible database using `mmseqs createdb *.fasta` in a directory called `mmseqs2`.\n\nExample file structure for the reference database supplied by the user:\n\n```bash\namp_DRAMP_database/\n\u251c\u2500\u2500 general_amps_2024_11_13.fasta\n\u251c\u2500\u2500 general_amps_2024_11_13.txt\n\u2514\u2500\u2500 mmseqs2\n \u251c\u2500\u2500 ref_DB\n \u251c\u2500\u2500 ref_DB.dbtype\n \u251c\u2500\u2500 ref_DB_h\n \u251c\u2500\u2500 ref_DB_h.dbtype\n \u251c\u2500\u2500 ref_DB_h.index\n \u251c\u2500\u2500 ref_DB.index\n \u251c\u2500\u2500 ref_DB.lookup\n \u2514\u2500\u2500 ref_DB.source```\n\nFor more information check the AMPcombi [documentation](https://ampcombi.readthedocs.io/en/main/usage.html#parse-tables)." }, "amp_ampcombi_parsetables_cutoff": { "type": "number", @@ -723,7 +723,7 @@ }, "amp_ampcombi_parsetables_dbevalue": { "type": "number", - "default": 5, + "default": 5.0, "description": "Remove all DRAMP annotations that have an e-value greater than this value.", "help_text": "This e-value is used as a cut-off for the annotations from the internal Diamond alignment step (against the DRAMP database by default). Any e-value below this value will only remove the DRAMP classification and not the entire hit.\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--db_evalue`", "fa_icon": "fas fa-sort-numeric-down" @@ -794,14 +794,14 @@ "properties": { "amp_ampcombi_cluster_covmode": { "type": "number", - "default": 0, + "default": 0.0, "description": "MMseqs2 coverage mode.", "help_text": "This assigns the coverage mode to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More details can be found in the [MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_cov_mode`", "fa_icon": "far fa-circle" }, "amp_ampcombi_cluster_sensitivity": { "type": "number", - "default": 4, + "default": 4.0, "description": "Remove hits that have no stop codon upstream and downstream of the AMP.", "help_text": "This assigns the sensitivity of alignment to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More information can be obtained in the [MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_sensitivity`", "fa_icon": "fas fa-arrows-alt-h" @@ -815,7 +815,7 @@ }, "amp_ampcombi_cluster_mode": { "type": "number", - "default": 1, + "default": 1.0, "description": "MMseqs2 clustering mode.", "help_text": "This assigns the cluster mode to the MMseqs2 cluster module. This determines how AMPs are grouped into clusters. More information can be obtained in the [MMseqs2 documentation](https://mmseqs.com/latest/userguide.pdf).\n\n> Modifies tool parameter(s):\n> - AMPCOMBI: `--cluster_mode`", "fa_icon": "fas fa-circle" @@ -863,7 +863,7 @@ }, "arg_amrfinderplus_identmin": { "type": "number", - "default": -1, + "default": -1.0, "help_text": "Specify the minimum percentage amino-acid identity to reference protein or nucleotide identity for nucleotide reference must have if a BLAST alignment (based on methods: BLAST or PARTIAL) was detected, otherwise NA.\n\n If you specify `-1`, this means use a curated threshold if it exists and `0.9` otherwise.\n\nSetting this value to something other than `-1` will override any curated similarity cutoffs. For BLAST: alignment is > 90% of length and > 90% identity to a protein in the AMRFinderPlus database. For PARTIAL: alignment is > 50% of length, but < 90% of length and > 90% identity to the reference, and does not end at a contig boundary.\n\nFor more information check the AMRFinderPlus [documentation](https://github.com/ncbi/amr/wiki/Running-AMRFinderPlus#--organism-option).\n\n> Modifies tool parameter(s):\n> - AMRFinderPlus: `--ident_min`", "description": "Minimum percent identity to reference sequence.", "fa_icon": "fas fa-angle-left" @@ -1065,14 +1065,14 @@ }, "arg_rgi_includeloose": { "type": "boolean", - "description": "Include all of loose, strict and perfect hits (i.e. ≥ 95% identity) found by RGI.", + "description": "Include all of loose, strict and perfect hits (i.e. \u2265 95% identity) found by RGI.", "help_text": "When activated RGI output will include 'Loose' hits in addition to 'Strict' and 'Perfect' hits. The 'Loose' algorithm works outside of the detection model cut-offs to provide detection of new, emergent threats and more distant homologs of AMR genes, but will also catalog homologous sequences and spurious partial matches that may not have a role in AMR.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--include_loose`", "fa_icon": "far fa-hand-scissors" }, "arg_rgi_includenudge": { "type": "boolean", "description": "Suppresses the default behaviour of RGI with `--arg_rgi_includeloose`.", - "help_text": "This flag suppresses the default behaviour of RGI, by listing all 'Loose' matches of ≥ 95% identity as 'Strict' or 'Perfect', regardless of alignment length.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--include_nudge`", + "help_text": "This flag suppresses the default behaviour of RGI, by listing all 'Loose' matches of \u2265 95% identity as 'Strict' or 'Perfect', regardless of alignment length.\n\nFor more information check the RGI [documentation](https://github.com/arpcard/rgi).\n\n> Modifies tool parameter(s):\n> - RGI_MAIN: `--include_nudge`", "fa_icon": "fas fa-hand-scissors" }, "arg_rgi_lowquality": { @@ -1455,6 +1455,22 @@ }, "fa_icon": "fas fa-angle-double-right" }, + "bgc_bigslice": { + "title": "BGC: BiG-SLiCE", + "type": "object", + "default": "", + "properties": { + "bgc_bigslice_run": { + "type": "boolean", + "description": "Run BiG-SLiCE to cluster detected BGCs into gene cluster families (GCFs)." + }, + "bgc_bigslice_db": { + "type": "string", + "description": "Path to the pre-downloaded BiG-SLiCE HMM database directory." + } + }, + "description": "Parameters for BiG-SLiCE clustering of biosynthetic gene clusters (BGCs) into gene cluster families (GCFs). More info: https://github.com/medema-group/bigslice" + }, "bgc_hmmsearch": { "title": "BGC: hmmsearch", "type": "object", @@ -1740,6 +1756,9 @@ { "$ref": "#/$defs/bgc_gecco" }, + { + "$ref": "#/$defs/bgc_bigslice" + }, { "$ref": "#/$defs/bgc_hmmsearch" }, diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 8b231da9..f33dbcc0 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -13,6 +13,7 @@ include { COMBGC } from '../../modules/local/com include { TABIX_BGZIP as BGC_TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip' include { MERGE_TAXONOMY_COMBGC } from '../../modules/local/merge_taxonomy_combgc' include { GECCO_CONVERT } from '../../modules/nf-core/gecco/convert' +include { BIGSLICE } from '../../modules/nf-core/bigslice/run' workflow BGC { take: @@ -116,6 +117,53 @@ workflow BGC { GECCO_CONVERT(ch_gecco_clusters_and_gbk, params.bgc_gecco_convertmode, params.bgc_gecco_convertformat) ch_versions = ch_versions.mix(GECCO_CONVERT.out.versions) } + // BIGSLICE + // BigSLICE requires at least one of the following conditions: + // 1. antiSMASH is enabled (its GBK output is natively compatible with BigSLICE) + // 2. GECCO is enabled AND GECCO_CONVERT is enabled with format "bigslice" + if (params.bgc_bigslice_run) { + + // Validate that BigSLICE has at least one compatible input source + if (params.bgc_skip_antismash && (params.bgc_skip_gecco || !params.bgc_gecco_runconvert || params.bgc_gecco_convertformat != 'bigslice')) { + error('[nf-core/funcscan] error: BigSLICE requires at least one of: (1) antiSMASH enabled, or (2) GECCO enabled with GECCO convert in bigslice format. Please check your parameters.') + } + + // Prepare BigSLICE HMM database + if (params.bgc_bigslice_db) { + ch_bigslice_hmmdb = Channel.fromPath(params.bgc_bigslice_db, checkIfExists: true) + .first() + } + else { + error('[nf-core/funcscan] error: BigSLICE HMM database not found for --bgc_bigslice_db! Please check input.') + } + + // Collect BigSLICE-compatible BGC inputs from available sources + ch_bigslice_input = Channel.empty() + + // Source 1: antiSMASH GBK results (natively compatible with BigSLICE) + if (!params.bgc_skip_antismash) { + ch_bigslice_input = ch_bigslice_input.mix( + ANTISMASH_ANTISMASH.out.gbk_results + ) + } + + // Source 2: GECCO output converted to BigSLICE format via GECCO_CONVERT + if (!params.bgc_skip_gecco && params.bgc_gecco_runconvert && params.bgc_gecco_convertformat == 'bigslice') { + ch_bigslice_input = ch_bigslice_input.mix( + GECCO_CONVERT.out.bigslice + ) + } + + // Group all BGC files per sample and run BigSLICE + ch_bigslice_grouped = ch_bigslice_input + .groupTuple() + .map { meta, files -> + [meta, files.flatten()] + } + + BIGSLICE(ch_bigslice_grouped, ch_bigslice_hmmdb) + ch_versions = ch_versions.mix(BIGSLICE.out.versions_bigslice) + } // HMMSEARCH if (params.bgc_run_hmmsearch) { if (params.bgc_hmmsearch_models) { From 2300eb6ab4f3f18030552e83fe3ced80932993dd Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Thu, 19 Feb 2026 15:56:33 +0200 Subject: [PATCH 02/16] preping the input for bigslice --- subworkflows/local/bgc.nf | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index f33dbcc0..db56663a 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -117,10 +117,7 @@ workflow BGC { GECCO_CONVERT(ch_gecco_clusters_and_gbk, params.bgc_gecco_convertmode, params.bgc_gecco_convertformat) ch_versions = ch_versions.mix(GECCO_CONVERT.out.versions) } - // BIGSLICE - // BigSLICE requires at least one of the following conditions: - // 1. antiSMASH is enabled (its GBK output is natively compatible with BigSLICE) - // 2. GECCO is enabled AND GECCO_CONVERT is enabled with format "bigslice" + // BIGSLICE if (params.bgc_bigslice_run) { // Validate that BigSLICE has at least one compatible input source @@ -154,11 +151,38 @@ workflow BGC { ) } - // Group all BGC files per sample and run BigSLICE + // Group all BGC files per sample and prepare structured input for BiG-SLiCE ch_bigslice_grouped = ch_bigslice_input .groupTuple() .map { meta, files -> - [meta, files.flatten()] + def flat = files.flatten() + + // Create the BiG-SLiCE directory structure in a temp directory + def sample = meta.id + def dataset = "antismash" + def inputDir = java.nio.file.Files.createTempDirectory("bigslice_input_${sample}_") + def datasetDir = inputDir.resolve(dataset).resolve(sample) + java.nio.file.Files.createDirectories(datasetDir) + def taxDir = inputDir.resolve("taxonomy") + java.nio.file.Files.createDirectories(taxDir) + + // Copy GBK files into the structured directory + flat.each { f -> + def target = datasetDir.resolve(f.name) + java.nio.file.Files.copy(f.toPath(), target, java.nio.file.StandardCopyOption.REPLACE_EXISTING) + } + + // Create taxonomy file + def taxFile = taxDir.resolve("dataset_taxonomy.tsv") + taxFile.text = "accession\ttaxdomain\tphylum\tclass\torder\tfamily\tgenus\tspecies\torganism\n" + taxFile.append("${sample}/\tUnknown\tUnknown\tUnknown\tUnknown\tUnknown\tUnknown\tUnknown\tUnknown\n") + + // Create datasets.tsv + def datasetsFile = inputDir.resolve("datasets.tsv") + datasetsFile.text = "# dataset_name\tdataset_path\ttaxonomy_path\tdescription\n" + datasetsFile.append("${dataset}\t${dataset}\ttaxonomy/dataset_taxonomy.tsv\tBGC analysis ${dataset}\n") + + [meta, inputDir.toFile()] } BIGSLICE(ch_bigslice_grouped, ch_bigslice_hmmdb) From 722d4bb36ad5a8e0212786702fc5299ecb7a8d55 Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Thu, 19 Feb 2026 16:28:58 +0200 Subject: [PATCH 03/16] preping the input for bigslice --- subworkflows/local/bgc.nf | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index db56663a..0fd2621b 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -152,6 +152,7 @@ workflow BGC { } // Group all BGC files per sample and prepare structured input for BiG-SLiCE + input for BiG-SLiCE ch_bigslice_grouped = ch_bigslice_input .groupTuple() .map { meta, files -> @@ -168,17 +169,18 @@ workflow BGC { // Copy GBK files into the structured directory flat.each { f -> + def source = f.toPath() def target = datasetDir.resolve(f.name) - java.nio.file.Files.copy(f.toPath(), target, java.nio.file.StandardCopyOption.REPLACE_EXISTING) + java.nio.file.Files.copy(source, target, java.nio.file.StandardCopyOption.REPLACE_EXISTING) } // Create taxonomy file - def taxFile = taxDir.resolve("dataset_taxonomy.tsv") + def taxFile = taxDir.resolve("dataset_taxonomy.tsv").toFile() taxFile.text = "accession\ttaxdomain\tphylum\tclass\torder\tfamily\tgenus\tspecies\torganism\n" taxFile.append("${sample}/\tUnknown\tUnknown\tUnknown\tUnknown\tUnknown\tUnknown\tUnknown\tUnknown\n") // Create datasets.tsv - def datasetsFile = inputDir.resolve("datasets.tsv") + def datasetsFile = inputDir.resolve("datasets.tsv").toFile() datasetsFile.text = "# dataset_name\tdataset_path\ttaxonomy_path\tdescription\n" datasetsFile.append("${dataset}\t${dataset}\ttaxonomy/dataset_taxonomy.tsv\tBGC analysis ${dataset}\n") From 69168d37600741c4dcae3e3bd0012f4757bf8e95 Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Thu, 19 Feb 2026 16:32:48 +0200 Subject: [PATCH 04/16] preping the input for bigslice --- subworkflows/local/bgc.nf | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 0fd2621b..05d8c852 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -117,7 +117,7 @@ workflow BGC { GECCO_CONVERT(ch_gecco_clusters_and_gbk, params.bgc_gecco_convertmode, params.bgc_gecco_convertformat) ch_versions = ch_versions.mix(GECCO_CONVERT.out.versions) } - // BIGSLICE + // BIGSLICE if (params.bgc_bigslice_run) { // Validate that BigSLICE has at least one compatible input source @@ -152,8 +152,7 @@ workflow BGC { } // Group all BGC files per sample and prepare structured input for BiG-SLiCE - input for BiG-SLiCE - ch_bigslice_grouped = ch_bigslice_input +ch_bigslice_grouped = ch_bigslice_input .groupTuple() .map { meta, files -> def flat = files.flatten() From e25440000abaa28eb2af86ba2bf597e863d8cce8 Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Mon, 2 Mar 2026 16:35:09 +0200 Subject: [PATCH 05/16] preping the input for bigslice --- modules/nf-core/bigslice/run/main.nf | 16 ++++++++-- subworkflows/local/bgc.nf | 46 +++++++--------------------- 2 files changed, 24 insertions(+), 38 deletions(-) diff --git a/modules/nf-core/bigslice/run/main.nf b/modules/nf-core/bigslice/run/main.nf index a1782087..d140588c 100644 --- a/modules/nf-core/bigslice/run/main.nf +++ b/modules/nf-core/bigslice/run/main.nf @@ -9,7 +9,7 @@ process BIGSLICE { 'biocontainers/bigslice:2.0.2--pyh8ed023e_0' }" input: - tuple val(meta), path(bgc) + tuple val(meta), path(bgc, stageAs: 'bgc_files/*') path(hmmdb) output: @@ -23,11 +23,21 @@ process BIGSLICE { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" + def sample = meta.id """ + # Prepare BiG-SLiCE required input structure + mkdir -p input/dataset/${sample} input/taxonomy + cp bgc_files/* input/dataset/${sample}/ + + printf "# dataset_name\\tdataset_path\\ttaxonomy_path\\tdescription\\n" > input/datasets.tsv + printf "dataset\\tdataset\\ttaxonomy/taxonomy.tsv\\tBGC dataset\\n" >> input/datasets.tsv + + touch input/taxonomy/taxonomy.tsv + bigslice \\ $args \\ --num_threads ${task.cpus} \\ - -i ${bgc} \\ + -i input \\ --program_db_folder ${hmmdb} \\ ${prefix} """ @@ -42,4 +52,4 @@ process BIGSLICE { touch ${prefix}/result/data.db touch ${prefix}/result/tmp/2e555308dfc411186cf012334262f127/test.fa """ -} +} \ No newline at end of file diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 05d8c852..99bbb001 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -117,7 +117,7 @@ workflow BGC { GECCO_CONVERT(ch_gecco_clusters_and_gbk, params.bgc_gecco_convertmode, params.bgc_gecco_convertformat) ch_versions = ch_versions.mix(GECCO_CONVERT.out.versions) } - // BIGSLICE + // BIGSLICE if (params.bgc_bigslice_run) { // Validate that BigSLICE has at least one compatible input source @@ -137,56 +137,32 @@ workflow BGC { // Collect BigSLICE-compatible BGC inputs from available sources ch_bigslice_input = Channel.empty() - // Source 1: antiSMASH GBK results (natively compatible with BigSLICE) + // Source 1: antiSMASH GBK results if (!params.bgc_skip_antismash) { ch_bigslice_input = ch_bigslice_input.mix( ANTISMASH_ANTISMASH.out.gbk_results ) } - // Source 2: GECCO output converted to BigSLICE format via GECCO_CONVERT + // Source 2: GECCO output converted to BigSLICE format if (!params.bgc_skip_gecco && params.bgc_gecco_runconvert && params.bgc_gecco_convertformat == 'bigslice') { ch_bigslice_input = ch_bigslice_input.mix( GECCO_CONVERT.out.bigslice ) } - // Group all BGC files per sample and prepare structured input for BiG-SLiCE -ch_bigslice_grouped = ch_bigslice_input + // Group all BGC files per sample + ch_bigslice_grouped = ch_bigslice_input .groupTuple() .map { meta, files -> - def flat = files.flatten() - - // Create the BiG-SLiCE directory structure in a temp directory - def sample = meta.id - def dataset = "antismash" - def inputDir = java.nio.file.Files.createTempDirectory("bigslice_input_${sample}_") - def datasetDir = inputDir.resolve(dataset).resolve(sample) - java.nio.file.Files.createDirectories(datasetDir) - def taxDir = inputDir.resolve("taxonomy") - java.nio.file.Files.createDirectories(taxDir) - - // Copy GBK files into the structured directory - flat.each { f -> - def source = f.toPath() - def target = datasetDir.resolve(f.name) - java.nio.file.Files.copy(source, target, java.nio.file.StandardCopyOption.REPLACE_EXISTING) - } - - // Create taxonomy file - def taxFile = taxDir.resolve("dataset_taxonomy.tsv").toFile() - taxFile.text = "accession\ttaxdomain\tphylum\tclass\torder\tfamily\tgenus\tspecies\torganism\n" - taxFile.append("${sample}/\tUnknown\tUnknown\tUnknown\tUnknown\tUnknown\tUnknown\tUnknown\tUnknown\n") - - // Create datasets.tsv - def datasetsFile = inputDir.resolve("datasets.tsv").toFile() - datasetsFile.text = "# dataset_name\tdataset_path\ttaxonomy_path\tdescription\n" - datasetsFile.append("${dataset}\t${dataset}\ttaxonomy/dataset_taxonomy.tsv\tBGC analysis ${dataset}\n") - - [meta, inputDir.toFile()] + [meta, files.flatten()] } - BIGSLICE(ch_bigslice_grouped, ch_bigslice_hmmdb) + // Prepare structured input directory for BiG-SLiCE + BIGSLICE_PREP_INPUT(ch_bigslice_grouped) + + // Run BigSLICE with prepared input + BIGSLICE(BIGSLICE_PREP_INPUT.out.input_dir, ch_bigslice_hmmdb) ch_versions = ch_versions.mix(BIGSLICE.out.versions_bigslice) } // HMMSEARCH From 040dd98b21dad8fad870670045b94f1c083a28ef Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Mon, 2 Mar 2026 16:38:06 +0200 Subject: [PATCH 06/16] preping the input for bigslice --- subworkflows/local/bgc.nf | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 99bbb001..99b9c7b7 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -120,12 +120,10 @@ workflow BGC { // BIGSLICE if (params.bgc_bigslice_run) { - // Validate that BigSLICE has at least one compatible input source if (params.bgc_skip_antismash && (params.bgc_skip_gecco || !params.bgc_gecco_runconvert || params.bgc_gecco_convertformat != 'bigslice')) { error('[nf-core/funcscan] error: BigSLICE requires at least one of: (1) antiSMASH enabled, or (2) GECCO enabled with GECCO convert in bigslice format. Please check your parameters.') } - // Prepare BigSLICE HMM database if (params.bgc_bigslice_db) { ch_bigslice_hmmdb = Channel.fromPath(params.bgc_bigslice_db, checkIfExists: true) .first() @@ -134,35 +132,27 @@ workflow BGC { error('[nf-core/funcscan] error: BigSLICE HMM database not found for --bgc_bigslice_db! Please check input.') } - // Collect BigSLICE-compatible BGC inputs from available sources ch_bigslice_input = Channel.empty() - // Source 1: antiSMASH GBK results if (!params.bgc_skip_antismash) { ch_bigslice_input = ch_bigslice_input.mix( ANTISMASH_ANTISMASH.out.gbk_results ) } - // Source 2: GECCO output converted to BigSLICE format if (!params.bgc_skip_gecco && params.bgc_gecco_runconvert && params.bgc_gecco_convertformat == 'bigslice') { ch_bigslice_input = ch_bigslice_input.mix( GECCO_CONVERT.out.bigslice ) } - // Group all BGC files per sample ch_bigslice_grouped = ch_bigslice_input .groupTuple() .map { meta, files -> [meta, files.flatten()] } - // Prepare structured input directory for BiG-SLiCE - BIGSLICE_PREP_INPUT(ch_bigslice_grouped) - - // Run BigSLICE with prepared input - BIGSLICE(BIGSLICE_PREP_INPUT.out.input_dir, ch_bigslice_hmmdb) + BIGSLICE(ch_bigslice_grouped, ch_bigslice_hmmdb) ch_versions = ch_versions.mix(BIGSLICE.out.versions_bigslice) } // HMMSEARCH From a690c8be2865e296702ba886b437df368ae8e358 Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Mon, 2 Mar 2026 16:57:13 +0200 Subject: [PATCH 07/16] preping the input for bigslice --- modules/nf-core/bigslice/run/main.nf | 12 +++++++++++- nextflow.config | 8 ++++++++ subworkflows/local/bgc.nf | 2 +- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/bigslice/run/main.nf b/modules/nf-core/bigslice/run/main.nf index d140588c..cdecaeff 100644 --- a/modules/nf-core/bigslice/run/main.nf +++ b/modules/nf-core/bigslice/run/main.nf @@ -15,7 +15,7 @@ process BIGSLICE { output: tuple val(meta), path("${prefix}/result/data.db") , emit: db tuple val(meta), path("${prefix}/result/tmp/**/*.fa"), emit: fa - tuple val("${task.process}"), val('bigslice'), eval("echo 2.0.2"), topic: versions, emit: versions_bigslice + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -40,6 +40,11 @@ process BIGSLICE { -i input \\ --program_db_folder ${hmmdb} \\ ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bigslice: 2.0.2 + END_VERSIONS """ stub: @@ -51,5 +56,10 @@ process BIGSLICE { mkdir -p ${prefix}/result/tmp/2e555308dfc411186cf012334262f127 touch ${prefix}/result/data.db touch ${prefix}/result/tmp/2e555308dfc411186cf012334262f127/test.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bigslice: 2.0.2 + END_VERSIONS """ } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 4b7227db..b4b6ba94 100644 --- a/nextflow.config +++ b/nextflow.config @@ -558,5 +558,13 @@ validation { monochromeLogs = params.monochrome_logs } +report { + overwrite = true +} + +timeline { + overwrite = true +} + // Load modules.config for DSL2 module specific options includeConfig 'conf/modules.config' diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 99b9c7b7..1b8e3213 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -153,7 +153,7 @@ workflow BGC { } BIGSLICE(ch_bigslice_grouped, ch_bigslice_hmmdb) - ch_versions = ch_versions.mix(BIGSLICE.out.versions_bigslice) + ch_versions = ch_versions.mix(BIGSLICE.out.versions) } // HMMSEARCH if (params.bgc_run_hmmsearch) { From 0bde3ec3ec3beffa1bf5443d1985d1c58bec4773 Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Wed, 4 Mar 2026 00:13:57 +0200 Subject: [PATCH 08/16] reverting the version genereting method --- modules/nf-core/bigslice/run/main.nf | 12 +---------- .../nf-core/bigslice/run/tests/main.nf.test | 20 ++++++++++++++----- subworkflows/local/bgc.nf | 1 - 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/modules/nf-core/bigslice/run/main.nf b/modules/nf-core/bigslice/run/main.nf index cdecaeff..f067986c 100644 --- a/modules/nf-core/bigslice/run/main.nf +++ b/modules/nf-core/bigslice/run/main.nf @@ -15,7 +15,7 @@ process BIGSLICE { output: tuple val(meta), path("${prefix}/result/data.db") , emit: db tuple val(meta), path("${prefix}/result/tmp/**/*.fa"), emit: fa - path "versions.yml" , emit: versions + tuple val("${task.process}"), val('bigslice'), eval("echo 2.0.2"), topic: versions when: task.ext.when == null || task.ext.when @@ -40,11 +40,6 @@ process BIGSLICE { -i input \\ --program_db_folder ${hmmdb} \\ ${prefix} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bigslice: 2.0.2 - END_VERSIONS """ stub: @@ -56,10 +51,5 @@ process BIGSLICE { mkdir -p ${prefix}/result/tmp/2e555308dfc411186cf012334262f127 touch ${prefix}/result/data.db touch ${prefix}/result/tmp/2e555308dfc411186cf012334262f127/test.fa - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bigslice: 2.0.2 - END_VERSIONS """ } \ No newline at end of file diff --git a/modules/nf-core/bigslice/run/tests/main.nf.test b/modules/nf-core/bigslice/run/tests/main.nf.test index 354301c8..e137e99b 100644 --- a/modules/nf-core/bigslice/run/tests/main.nf.test +++ b/modules/nf-core/bigslice/run/tests/main.nf.test @@ -60,7 +60,12 @@ nextflow_process { when { process { """ - input[0] = UNTAR_GBK.out.untar + // Flatten the GBK directory into a list of individual GBK files with meta + input[0] = UNTAR_GBK.out.untar.map { meta, dir -> + def gbk_files = [] + dir.eachFileRecurse { if (it.name.endsWith('.gbk')) gbk_files << it } + [ meta, gbk_files ] + } input[1] = UNTAR_HMMDB.out.untar.map{ it -> it[1] } """ } @@ -72,7 +77,7 @@ nextflow_process { { assert snapshot( file(process.out.db[0][1]).name, process.out.fa[0][1].size(), - process.out.findAll { key, val -> key.startsWith("versions")} + process.out.versions ).match() } ) } @@ -86,7 +91,12 @@ nextflow_process { when { process { """ - input[0] = UNTAR_GBK.out.untar + // Flatten the GBK directory into a list of individual GBK files with meta + input[0] = UNTAR_GBK.out.untar.map { meta, dir -> + def gbk_files = [] + dir.eachFileRecurse { if (it.name.endsWith('.gbk')) gbk_files << it } + [ meta, gbk_files ] + } input[1] = UNTAR_HMMDB.out.untar.map{ it -> it[1] } """ } @@ -97,11 +107,11 @@ nextflow_process { assertAll( { assert snapshot( process.out, - process.out.findAll { key, val -> key.startsWith("versions")} + process.out.versions ).match() } ) } } -} +} \ No newline at end of file diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 1b8e3213..82a80eb3 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -153,7 +153,6 @@ workflow BGC { } BIGSLICE(ch_bigslice_grouped, ch_bigslice_hmmdb) - ch_versions = ch_versions.mix(BIGSLICE.out.versions) } // HMMSEARCH if (params.bgc_run_hmmsearch) { From 4f4f63002c23219ab63893bff829520217fb47d5 Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Mon, 9 Mar 2026 09:46:31 +0200 Subject: [PATCH 09/16] Updating bigslice after the modules updates --- modules/nf-core/bigslice/{run => }/environment.yml | 0 modules/nf-core/bigslice/{run => }/main.nf | 5 ++--- modules/nf-core/bigslice/{run => }/meta.yml | 0 modules/nf-core/bigslice/{run => }/tests/main.nf.test | 6 +++--- modules/nf-core/bigslice/{run => }/tests/main.nf.test.snap | 4 ++-- modules/nf-core/bigslice/{run => }/tests/nextflow.config | 0 subworkflows/local/bgc.nf | 3 ++- 7 files changed, 9 insertions(+), 9 deletions(-) rename modules/nf-core/bigslice/{run => }/environment.yml (100%) rename modules/nf-core/bigslice/{run => }/main.nf (96%) rename modules/nf-core/bigslice/{run => }/meta.yml (100%) rename modules/nf-core/bigslice/{run => }/tests/main.nf.test (95%) rename modules/nf-core/bigslice/{run => }/tests/main.nf.test.snap (95%) rename modules/nf-core/bigslice/{run => }/tests/nextflow.config (100%) diff --git a/modules/nf-core/bigslice/run/environment.yml b/modules/nf-core/bigslice/environment.yml similarity index 100% rename from modules/nf-core/bigslice/run/environment.yml rename to modules/nf-core/bigslice/environment.yml diff --git a/modules/nf-core/bigslice/run/main.nf b/modules/nf-core/bigslice/main.nf similarity index 96% rename from modules/nf-core/bigslice/run/main.nf rename to modules/nf-core/bigslice/main.nf index f067986c..dc88bcc5 100644 --- a/modules/nf-core/bigslice/run/main.nf +++ b/modules/nf-core/bigslice/main.nf @@ -15,7 +15,7 @@ process BIGSLICE { output: tuple val(meta), path("${prefix}/result/data.db") , emit: db tuple val(meta), path("${prefix}/result/tmp/**/*.fa"), emit: fa - tuple val("${task.process}"), val('bigslice'), eval("echo 2.0.2"), topic: versions + tuple val("${task.process}"), val('bigslice'), eval("echo 2.0.2"), topic: versions, emit: versions_bigslice when: task.ext.when == null || task.ext.when @@ -25,7 +25,6 @@ process BIGSLICE { prefix = task.ext.prefix ?: "${meta.id}" def sample = meta.id """ - # Prepare BiG-SLiCE required input structure mkdir -p input/dataset/${sample} input/taxonomy cp bgc_files/* input/dataset/${sample}/ @@ -52,4 +51,4 @@ process BIGSLICE { touch ${prefix}/result/data.db touch ${prefix}/result/tmp/2e555308dfc411186cf012334262f127/test.fa """ -} \ No newline at end of file +} diff --git a/modules/nf-core/bigslice/run/meta.yml b/modules/nf-core/bigslice/meta.yml similarity index 100% rename from modules/nf-core/bigslice/run/meta.yml rename to modules/nf-core/bigslice/meta.yml diff --git a/modules/nf-core/bigslice/run/tests/main.nf.test b/modules/nf-core/bigslice/tests/main.nf.test similarity index 95% rename from modules/nf-core/bigslice/run/tests/main.nf.test rename to modules/nf-core/bigslice/tests/main.nf.test index e137e99b..19bcb011 100644 --- a/modules/nf-core/bigslice/run/tests/main.nf.test +++ b/modules/nf-core/bigslice/tests/main.nf.test @@ -77,7 +77,7 @@ nextflow_process { { assert snapshot( file(process.out.db[0][1]).name, process.out.fa[0][1].size(), - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions")} ).match() } ) } @@ -107,11 +107,11 @@ nextflow_process { assertAll( { assert snapshot( process.out, - process.out.versions + process.out.findAll { key, val -> key.startsWith("versions")} ).match() } ) } } -} \ No newline at end of file +} diff --git a/modules/nf-core/bigslice/run/tests/main.nf.test.snap b/modules/nf-core/bigslice/tests/main.nf.test.snap similarity index 95% rename from modules/nf-core/bigslice/run/tests/main.nf.test.snap rename to modules/nf-core/bigslice/tests/main.nf.test.snap index cc26a103..c678d7f8 100644 --- a/modules/nf-core/bigslice/run/tests/main.nf.test.snap +++ b/modules/nf-core/bigslice/tests/main.nf.test.snap @@ -63,7 +63,7 @@ "nf-test": "0.9.3", "nextflow": "25.10.3" }, - "timestamp": "2026-02-06T15:13:53.99145463" + "timestamp": "2026-03-04T09:47:43.387153103" }, "streptomyces_coelicolor - bigslice - gbk": { "content": [ @@ -83,6 +83,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.3" }, - "timestamp": "2026-02-06T15:26:29.769543832" + "timestamp": "2026-03-04T09:47:30.918713387" } } \ No newline at end of file diff --git a/modules/nf-core/bigslice/run/tests/nextflow.config b/modules/nf-core/bigslice/tests/nextflow.config similarity index 100% rename from modules/nf-core/bigslice/run/tests/nextflow.config rename to modules/nf-core/bigslice/tests/nextflow.config diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 82a80eb3..3d10da87 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -13,7 +13,7 @@ include { COMBGC } from '../../modules/local/com include { TABIX_BGZIP as BGC_TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip' include { MERGE_TAXONOMY_COMBGC } from '../../modules/local/merge_taxonomy_combgc' include { GECCO_CONVERT } from '../../modules/nf-core/gecco/convert' -include { BIGSLICE } from '../../modules/nf-core/bigslice/run' +include { BIGSLICE } from '../../modules/nf-core/bigslice' workflow BGC { take: @@ -153,6 +153,7 @@ workflow BGC { } BIGSLICE(ch_bigslice_grouped, ch_bigslice_hmmdb) + ch_versions = ch_versions.mix( BIGSLICE.out.versions_bigslice ) } // HMMSEARCH if (params.bgc_run_hmmsearch) { From 93223942ef14a394c5f9bcd44ccde8f08ce1160c Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Mon, 9 Mar 2026 23:02:48 +0200 Subject: [PATCH 10/16] Updating bgc versioning for bigslice --- subworkflows/local/bgc.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 3d10da87..4bffcc10 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -153,7 +153,7 @@ workflow BGC { } BIGSLICE(ch_bigslice_grouped, ch_bigslice_hmmdb) - ch_versions = ch_versions.mix( BIGSLICE.out.versions_bigslice ) + //ch_versions = ch_versions.mix( BIGSLICE.out.versions_bigslice ) } // HMMSEARCH if (params.bgc_run_hmmsearch) { From 48e6043922c39258cd992b8c1b164fc64b44b2d5 Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Tue, 10 Mar 2026 12:49:09 +0200 Subject: [PATCH 11/16] Updating bgc versioning for bigslice --- subworkflows/local/bgc.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 4bffcc10..6b47b386 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -153,7 +153,6 @@ workflow BGC { } BIGSLICE(ch_bigslice_grouped, ch_bigslice_hmmdb) - //ch_versions = ch_versions.mix( BIGSLICE.out.versions_bigslice ) } // HMMSEARCH if (params.bgc_run_hmmsearch) { From 879f1c656ebf6c702226c8977835100df850de9a Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Tue, 10 Mar 2026 12:59:17 +0200 Subject: [PATCH 12/16] linting --- modules.json | 5 +++++ ro-crate-metadata.json | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/modules.json b/modules.json index 711a97d7..449abee9 100644 --- a/modules.json +++ b/modules.json @@ -70,6 +70,11 @@ "git_sha": "72c983560c9b9c2a02ff636451a5e5008f7d020b", "installed_by": ["modules"] }, + "bigslice": { + "branch": "master", + "git_sha": "875cf13d1c974d62483fddd55a02456880363b5c", + "installed_by": ["modules"] + }, "deeparg/downloaddata": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 74992aaa..78cab049 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2025-10-04T19:03:46+00:00", - "description": "

\n \n \n \"nf-core/funcscan\"\n \n

\n\n[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/codespaces/new/nf-core/funcscan)\n[![GitHub Actions CI Status](https://github.com/nf-core/funcscan/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/funcscan/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/funcscan/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7643099-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7643099)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/funcscan)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23funcscan-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/funcscan)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n![HiRSE Code Promo Badge](https://img.shields.io/badge/Promo-8db427?style=plastic&label=HiRSE&labelColor=005aa0&link=https%3A%2F%2Fgo.fzj.de%2FCodePromo)\n\n## Introduction\n\n**nf-core/funcscan** is a bioinformatics best-practice analysis pipeline for the screening of nucleotide sequences such as assembled contigs for functional genes. It currently features mining for antimicrobial peptides, antibiotic resistance genes and biosynthetic gene clusters.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/funcscan/results).\n\nThe nf-core/funcscan AWS full test dataset are contigs generated by the MGnify service from the ENA. We used contigs generated from assemblies of chicken cecum shotgun metagenomes (study accession: MGYS00005631).\n\n## Pipeline summary\n\n1. Quality control of input sequences with [`SeqKit`](https://bioinf.shenwei.me/seqkit/)\n2. Taxonomic classification of contigs of **prokaryotic origin** with [`MMseqs2`](https://github.com/soedinglab/MMseqs2)\n3. Annotation of assembled prokaryotic contigs with [`Prodigal`](https://github.com/hyattpd/Prodigal), [`Pyrodigal`](https://github.com/althonos/pyrodigal), [`Prokka`](https://github.com/tseemann/prokka), or [`Bakta`](https://github.com/oschwengers/bakta)\n4. Annotation of coding sequences from 3. to obtain general protein families and domains with [`InterProScan`](https://github.com/ebi-pf-team/interproscan)\n5. Screening contigs for antimicrobial peptide-like sequences with [`ampir`](https://cran.r-project.org/web/packages/ampir/index.html), [`Macrel`](https://github.com/BigDataBiology/macrel), [`HMMER`](http://hmmer.org/), [`AMPlify`](https://github.com/bcgsc/AMPlify)\n6. Screening contigs for antibiotic resistant gene-like sequences with [`ABRicate`](https://github.com/tseemann/abricate), [`AMRFinderPlus`](https://github.com/ncbi/amr), [`fARGene`](https://github.com/fannyhb/fargene), [`RGI`](https://card.mcmaster.ca/analyze/rgi), [`DeepARG`](https://bench.cs.vt.edu/deeparg). [`argNorm`](https://github.com/BigDataBiology/argNorm) is used to map the outputs of `DeepARG`, `AMRFinderPlus`, and `ABRicate` to the [`Antibiotic Resistance Ontology`](https://www.ebi.ac.uk/ols4/ontologies/aro) for consistent ARG classification terms.\n7. Screening contigs for biosynthetic gene cluster-like sequences with [`antiSMASH`](https://antismash.secondarymetabolites.org), [`DeepBGC`](https://github.com/Merck/deepbgc), [`GECCO`](https://gecco.embl.de/), [`HMMER`](http://hmmer.org/)\n8. Creating aggregated reports for all samples across the workflows with [`AMPcombi`](https://github.com/Darcy220606/AMPcombi) for AMPs, [`hAMRonization`](https://github.com/pha4ge/hAMRonization) for ARGs, and [`comBGC`](https://raw.githubusercontent.com/nf-core/funcscan/master/bin/comBGC.py) for BGCs\n9. Software version and methods text reporting with [`MultiQC`](http://multiqc.info/)\n\n![funcscan metro workflow](docs/images/funcscan_metro_workflow.png)\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fasta\nCONTROL_REP1,AEG588A1_001.fasta\nCONTROL_REP2,AEG588A1_002.fasta\nCONTROL_REP3,AEG588A1_003.fasta\n```\n\nEach row represents a (multi-)fasta file of assembled contig sequences.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/funcscan \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --run_amp_screening \\\n --run_arg_screening \\\n --run_bgc_screening\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/funcscan/usage) and the [parameter documentation](https://nf-co.re/funcscan/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/funcscan/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/funcscan/output).\n\n## Credits\n\nnf-core/funcscan was originally written by Jasmin Frangenberg, Anan Ibrahim, Louisa Perelo, Moritz E. Beber, James A. Fellows Yates.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\nAdam Talbot, Alexandru Mizeranschi, Hugo Tavares, J\u00falia Mir Pedrol, Martin Klapper, Mehrdad Jaberi, Robert Syme, Rosa Herbst, Vedanth Ramji, @Microbion.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#funcscan` channel](https://nfcore.slack.com/channels/funcscan) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/funcscan for your analysis, please cite it using the following doi: [10.5281/zenodo.7643099](https://doi.org/10.5281/zenodo.7643099)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

\n \n \n \"nf-core/funcscan\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/funcscan)\n[![GitHub Actions CI Status](https://github.com/nf-core/funcscan/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/funcscan/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/funcscan/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7643099-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7643099)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/funcscan)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23funcscan-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/funcscan)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n![HiRSE Code Promo Badge](https://img.shields.io/badge/Promo-8db427?style=plastic&label=HiRSE&labelColor=005aa0&link=https%3A%2F%2Fgo.fzj.de%2FCodePromo)\n\n## Introduction\n\n**nf-core/funcscan** is a bioinformatics best-practice analysis pipeline for the screening of nucleotide sequences such as assembled contigs for functional genes. It currently features mining for antimicrobial peptides, antibiotic resistance genes and biosynthetic gene clusters.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/funcscan/results).\n\nThe nf-core/funcscan AWS full test dataset are contigs generated by the MGnify service from the ENA. We used contigs generated from assemblies of chicken cecum shotgun metagenomes (study accession: MGYS00005631).\n\n## Pipeline summary\n\n1. Quality control of input sequences with [`SeqKit`](https://bioinf.shenwei.me/seqkit/)\n2. Taxonomic classification of contigs of **prokaryotic origin** with [`MMseqs2`](https://github.com/soedinglab/MMseqs2)\n3. Annotation of assembled prokaryotic contigs with [`Prodigal`](https://github.com/hyattpd/Prodigal), [`Pyrodigal`](https://github.com/althonos/pyrodigal), [`Prokka`](https://github.com/tseemann/prokka), or [`Bakta`](https://github.com/oschwengers/bakta)\n4. Annotation of coding sequences from 3. to obtain general protein families and domains with [`InterProScan`](https://github.com/ebi-pf-team/interproscan)\n5. Screening contigs for antimicrobial peptide-like sequences with [`ampir`](https://cran.r-project.org/web/packages/ampir/index.html), [`Macrel`](https://github.com/BigDataBiology/macrel), [`HMMER`](http://hmmer.org/), [`AMPlify`](https://github.com/bcgsc/AMPlify)\n6. Screening contigs for antibiotic resistant gene-like sequences with [`ABRicate`](https://github.com/tseemann/abricate), [`AMRFinderPlus`](https://github.com/ncbi/amr), [`fARGene`](https://github.com/fannyhb/fargene), [`RGI`](https://card.mcmaster.ca/analyze/rgi), [`DeepARG`](https://bench.cs.vt.edu/deeparg). [`argNorm`](https://github.com/BigDataBiology/argNorm) is used to map the outputs of `DeepARG`, `AMRFinderPlus`, and `ABRicate` to the [`Antibiotic Resistance Ontology`](https://www.ebi.ac.uk/ols4/ontologies/aro) for consistent ARG classification terms.\n7. Screening contigs for biosynthetic gene cluster-like sequences with [`antiSMASH`](https://antismash.secondarymetabolites.org), [`DeepBGC`](https://github.com/Merck/deepbgc), [`GECCO`](https://gecco.embl.de/), [`HMMER`](http://hmmer.org/)\n8. Creating aggregated reports for all samples across the workflows with [`AMPcombi`](https://github.com/Darcy220606/AMPcombi) for AMPs, [`hAMRonization`](https://github.com/pha4ge/hAMRonization) for ARGs, and [`comBGC`](https://raw.githubusercontent.com/nf-core/funcscan/master/bin/comBGC.py) for BGCs\n9. Software version and methods text reporting with [`MultiQC`](http://multiqc.info/)\n\n![funcscan metro workflow](docs/images/funcscan_metro_workflow.png)\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fasta\nCONTROL_REP1,AEG588A1_001.fasta\nCONTROL_REP2,AEG588A1_002.fasta\nCONTROL_REP3,AEG588A1_003.fasta\n```\n\nEach row represents a (multi-)fasta file of assembled contig sequences.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/funcscan \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --run_amp_screening \\\n --run_arg_screening \\\n --run_bgc_screening\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/funcscan/usage) and the [parameter documentation](https://nf-co.re/funcscan/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/funcscan/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/funcscan/output).\n\n## Credits\n\nnf-core/funcscan was originally written by Jasmin Frangenberg, Anan Ibrahim, Louisa Perelo, Moritz E. Beber, James A. Fellows Yates.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\nAdam Talbot, Alexandru Mizeranschi, Hugo Tavares, J\u00falia Mir Pedrol, Martin Klapper, Mehrdad Jaberi, Robert Syme, Rosa Herbst, Vedanth Ramji, @Microbion, Dediu Octavian-Codrin.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#funcscan` channel](https://nfcore.slack.com/channels/funcscan) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/funcscan for your analysis, please cite it using the following doi: [10.5281/zenodo.7643099](https://doi.org/10.5281/zenodo.7643099)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" From 534dab53b4b498061ba2be54458fd99939a58771 Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Tue, 10 Mar 2026 14:28:16 +0200 Subject: [PATCH 13/16] Modification of the md files --- CHANGELOG.md | 1 + README.md | 2 +- docs/output.md | 23 +++++++++++++++++++++-- docs/usage.md | 28 ++++++++++++++++++++++++++++ ro-crate-metadata.json | 2 +- 5 files changed, 52 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4d641ff..fffeafd5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#500](https://github.com/nf-core/funcscan/pull/500) Updated pipeline template to nf-core/tools version 3.4.1 (by @jfy133) - [#508](https://github.com/nf-core/funcscan/pull/508) Added support for antiSMASH's --clusterhmmer, --fullhmmer, and --tigrfam options (❤️ to @yusukepockyby for requesting, @jfy133) - [#506](https://github.com/nf-core/funcscan/pull/506) Added support GECCO convert for generation of additional files useful for downstream analysis (by @SkyLexS) +- Added BiG-SLiCE (`bigslice`) as a new BGC clustering tool in the BGC subworkflow. BiG-SLiCE clusters BGC sequences detected by antiSMASH and/or GECCO into Gene Cluster Families (GCFs) using an HMM-based approach. Activated with `--bgc_bigslice_run` and requires `--bgc_bigslice_db`. (by @SkyLexS) ### `Fixed` diff --git a/README.md b/README.md index 013fd0b7..ae8e4adf 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ The nf-core/funcscan AWS full test dataset are contigs generated by the MGnify s 4. Annotation of coding sequences from 3. to obtain general protein families and domains with [`InterProScan`](https://github.com/ebi-pf-team/interproscan) 5. Screening contigs for antimicrobial peptide-like sequences with [`ampir`](https://cran.r-project.org/web/packages/ampir/index.html), [`Macrel`](https://github.com/BigDataBiology/macrel), [`HMMER`](http://hmmer.org/), [`AMPlify`](https://github.com/bcgsc/AMPlify) 6. Screening contigs for antibiotic resistant gene-like sequences with [`ABRicate`](https://github.com/tseemann/abricate), [`AMRFinderPlus`](https://github.com/ncbi/amr), [`fARGene`](https://github.com/fannyhb/fargene), [`RGI`](https://card.mcmaster.ca/analyze/rgi), [`DeepARG`](https://bench.cs.vt.edu/deeparg). [`argNorm`](https://github.com/BigDataBiology/argNorm) is used to map the outputs of `DeepARG`, `AMRFinderPlus`, and `ABRicate` to the [`Antibiotic Resistance Ontology`](https://www.ebi.ac.uk/ols4/ontologies/aro) for consistent ARG classification terms. -7. Screening contigs for biosynthetic gene cluster-like sequences with [`antiSMASH`](https://antismash.secondarymetabolites.org), [`DeepBGC`](https://github.com/Merck/deepbgc), [`GECCO`](https://gecco.embl.de/), [`HMMER`](http://hmmer.org/) +7. Screening contigs for biosynthetic gene cluster-like sequences with [`antiSMASH`](https://antismash.secondarymetabolites.org), [`BiG-SLiCE`](https://github.com/medema-group/bigslice), [`DeepBGC`](https://github.com/Merck/deepbgc), [`GECCO`](https://gecco.embl.de/), [`HMMER`](http://hmmer.org/) 8. Creating aggregated reports for all samples across the workflows with [`AMPcombi`](https://github.com/Darcy220606/AMPcombi) for AMPs, [`hAMRonization`](https://github.com/pha4ge/hAMRonization) for ARGs, and [`comBGC`](https://raw.githubusercontent.com/nf-core/funcscan/master/bin/comBGC.py) for BGCs 9. Software version and methods text reporting with [`MultiQC`](http://multiqc.info/) diff --git a/docs/output.md b/docs/output.md index bc36e94e..af6260c0 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,7 +6,7 @@ The output of nf-core/funcscan provides reports for each of the functional group - **antibiotic resistance genes** (tools: [ABRicate](https://github.com/tseemann/abricate), [AMRFinderPlus](https://www.ncbi.nlm.nih.gov/pathogens/antimicrobial-resistance/AMRFinder), [DeepARG](https://bitbucket.org/gusphdproj/deeparg-ss/src/master), [fARGene](https://github.com/fannyhb/fargene), [RGI](https://card.mcmaster.ca/analyze/rgi) – summarised by [hAMRonization](https://github.com/pha4ge/hAMRonization). Results from ABRicate, AMRFinderPlus, and DeepARG are normalised to [ARO](https://obofoundry.org/ontology/aro.html) by [argNorm](https://github.com/BigDataBiology/argNorm).) - **antimicrobial peptides** (tools: [Macrel](https://github.com/BigDataBiology/macrel), [AMPlify](https://github.com/bcgsc/AMPlify), [ampir](https://ampir.marine-omics.net), [hmmsearch](http://hmmer.org) – summarised by [AMPcombi](https://github.com/Darcy220606/AMPcombi)) -- **biosynthetic gene clusters** (tools: [antiSMASH](https://docs.antismash.secondarymetabolites.org), [DeepBGC](https://github.com/Merck/deepbgc), [GECCO](https://gecco.embl.de), [hmmsearch](http://hmmer.org) – summarised by [comBGC](#combgc)) +- **biosynthetic gene clusters** (tools: [antiSMASH](https://docs.antismash.secondarymetabolites.org), [BiGSLiCE](https://github.com/medema-group/bigslice), [DeepBGC](https://github.com/Merck/deepbgc), [GECCO](https://gecco.embl.de), [hmmsearch](http://hmmer.org) – summarised by [comBGC](#combgc)) As a general workflow, we recommend to first look at the summary reports ([ARGs](#hamronization), [AMPs](#ampcombi), [BGCs](#combgc)), to get a general overview of what hits have been found across all the tools of each functional group. After which, you can explore the specific output directories of each tool to get more detailed information about each result. The tool-specific output directories also includes the output from the functional annotation steps of either [prokka](https://github.com/tseemann/prokka), [pyrodigal](https://github.com/althonos/pyrodigal), [prodigal](https://github.com/hyattpd/Prodigal), or [Bakta](https://github.com/oschwengers/bakta) if the `--save_annotations` flag was set. Additionally, taxonomic classifications from [MMseqs2](https://github.com/soedinglab/MMseqs2) are saved if the `--taxa_classification_mmseqs_db_savetmp` and `--taxa_classification_mmseqs_taxonomy_savetmp` flags are set. @@ -38,6 +38,7 @@ results/ | └── rgi/ ├── bgc/ | ├── antismash/ +| ├── bigslice/ | ├── deepbgc/ | ├── gecco/ | └── hmmsearch/ @@ -98,6 +99,7 @@ Antimicrobial Peptides (AMPs): Biosynthetic Gene Clusters (BGCs): - [antiSMASH](#antismash) – biosynthetic gene cluster detection. +- [BiGSLiCE](#bigslice) – biosynthetic gene cluster super-linear clustering engine. - [deepBGC](#deepbgc) - biosynthetic gene cluster detection, using a deep learning model. - [GECCO](#gecco) – biosynthetic gene cluster detection, using Conditional Random Fields (CRFs). - [hmmsearch](#hmmsearch) – biosynthetic gene cluster detection, based on hidden Markov models. @@ -386,7 +388,7 @@ Output Summaries: ### BGC detection tools -[antiSMASH](#antismash), [deepBGC](#deepbgc), [GECCO](#gecco), [hmmsearch](#hmmsearch). +[antiSMASH](#antismash), [BiGSLiCE](#bigslice), [deepBGC](#deepbgc), [GECCO](#gecco), [hmmsearch](#hmmsearch). Note that the BGC tools are run on a set of annotations generated on only long contigs (3000 bp or longer) by default. These specific filtered FASTA files are under `bgc/seqkit/`, and annotations files are under `annotation//long/`, if the corresponding saving flags are specified (see [parameter docs](https://nf-co.re/funcscan/parameters)). However the same annotations _should_ also be annotation files in the sister `all/` directory. @@ -428,6 +430,23 @@ Note that filtered FASTA is only used for BGC workflow for run-time optimisation [antiSMASH](https://docs.antismash.secondarymetabolites.org) (**anti**biotics & **S**econdary **M**etabolite **A**nalysis **SH**ell) is a tool for rapid genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters in bacterial genomes. It identifies biosynthetic loci covering all currently known secondary metabolite compound classes in a rule-based fashion using profile HMMs and aligns the identified regions at the gene cluster level to their nearest relatives from a database containing experimentally verified gene clusters (MIBiG). +#### BiGSLiCE + +
+Output files + +- `bigslice/` + - `/` + - `result/` + - `data.db`: SQLite database containing results for BGCs, CDSs, Gene Cluster Families (GCFs), HMMs and HSPs. + - `tmp/` + - `/` + - `*.fa`: predicted biosynthetic features as FASTA files, one file per hit HMM. + +
+ +[BiG-SLiCE](https://github.com/medema-group/bigslice) (**Bi**osynthetic **G**ene cluster **S**uper-**Li**near **C**lustering **E**ngine) is a highly scalable tool for the large-scale analysis and clustering of Biosynthetic Gene Clusters (BGCs) into Gene Cluster Families (GCFs). It takes BGC regions in GenBank format (e.g. output from antiSMASH or GECCO) along with an HMM database and produces an SQLite database of predicted BGC features and GCF assignments. BiG-SLiCE requires the HMM database to be supplied via `--bgc_bigslice_db` and is activated with `--bgc_bigslice_run`. It requires at least one of antiSMASH or GECCO (with convert in bigslice format) to be enabled. + #### deepBGC
diff --git a/docs/usage.md b/docs/usage.md index 4fe1028c..700f23b6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -155,6 +155,15 @@ When the annotation is run with Prokka, the resulting `.gbk` file passed to anti If antiSMASH is run for BGC detection, we recommend to **not** run Prokka for annotation but instead use the default annotation tool (Pyrodigal), or switch to Prodigal or (for bacteria only!) Bakta. ::: +### BiGSLiCE + +[BiG-SLiCE](https://github.com/medema-group/bigslice) clusters BGC sequences into Gene Cluster Families (GCFs). It is activated with `--bgc_bigslice_run` and requires at least one BGC source to be enabled: + +- antiSMASH (default BGC tool), **or** +- GECCO with `--bgc_gecco_runconvert --bgc_gecco_convertmode gbk --bgc_gecco_convertformat bigslice` + +BiG-SLiCE does **not** discover BGCs itself — it takes GenBank-format BGC regions produced by antiSMASH and/or GECCO convert as input. The HMM database must be provided explicitly via `--bgc_bigslice_db` (see [BiGSLiCE database](#bigslice-1) for details); it is not auto-downloaded by the pipeline. + ## Databases and reference files Various tools of nf-core/funcscan use databases and reference files to operate. @@ -513,6 +522,25 @@ deepbgc_db/ └── myDetectors*.pkl ``` +### BiGSLiCE + +BiG-SLiCE requires its own HMM database. Unlike most other tools, the pipeline does **not** auto-download this database — it **must** be supplied manually with `--bgc_bigslice_db`. + +Download the pre-built database archive from the BiG-SLiCE GitHub releases page: + +```bash +wget https://github.com/medema-group/bigslice/releases/download/v2.0.0rc/bigslice-models.2022-11-30.tar.gz +tar -xzf bigslice-models.2022-11-30.tar.gz +``` + +Then supply the extracted directory to the pipeline: + +```bash +--bgc_bigslice_db '////' +``` + +The contents of the database directory should contain subdirectories such as `biosynthetic_pfams/` and `sub_pfams/` in the top level. + ### InterProScan [InterProScan](https://github.com/ebi-pf-team/interproscan) is used to provide more information about the proteins annotated on the contigs. By default, turning on this subworkflow with `--run_protein_annotation` will download and unzip the [InterPro database](http://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/5.72-103.0/) version 5.72-103.0. The database can be saved in the output directory `/databases/interproscan/` if the `--save_db` is turned on. diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 78cab049..190957b0 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2025-10-04T19:03:46+00:00", - "description": "

\n \n \n \"nf-core/funcscan\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/funcscan)\n[![GitHub Actions CI Status](https://github.com/nf-core/funcscan/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/funcscan/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/funcscan/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7643099-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7643099)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/funcscan)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23funcscan-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/funcscan)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n![HiRSE Code Promo Badge](https://img.shields.io/badge/Promo-8db427?style=plastic&label=HiRSE&labelColor=005aa0&link=https%3A%2F%2Fgo.fzj.de%2FCodePromo)\n\n## Introduction\n\n**nf-core/funcscan** is a bioinformatics best-practice analysis pipeline for the screening of nucleotide sequences such as assembled contigs for functional genes. It currently features mining for antimicrobial peptides, antibiotic resistance genes and biosynthetic gene clusters.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/funcscan/results).\n\nThe nf-core/funcscan AWS full test dataset are contigs generated by the MGnify service from the ENA. We used contigs generated from assemblies of chicken cecum shotgun metagenomes (study accession: MGYS00005631).\n\n## Pipeline summary\n\n1. Quality control of input sequences with [`SeqKit`](https://bioinf.shenwei.me/seqkit/)\n2. Taxonomic classification of contigs of **prokaryotic origin** with [`MMseqs2`](https://github.com/soedinglab/MMseqs2)\n3. Annotation of assembled prokaryotic contigs with [`Prodigal`](https://github.com/hyattpd/Prodigal), [`Pyrodigal`](https://github.com/althonos/pyrodigal), [`Prokka`](https://github.com/tseemann/prokka), or [`Bakta`](https://github.com/oschwengers/bakta)\n4. Annotation of coding sequences from 3. to obtain general protein families and domains with [`InterProScan`](https://github.com/ebi-pf-team/interproscan)\n5. Screening contigs for antimicrobial peptide-like sequences with [`ampir`](https://cran.r-project.org/web/packages/ampir/index.html), [`Macrel`](https://github.com/BigDataBiology/macrel), [`HMMER`](http://hmmer.org/), [`AMPlify`](https://github.com/bcgsc/AMPlify)\n6. Screening contigs for antibiotic resistant gene-like sequences with [`ABRicate`](https://github.com/tseemann/abricate), [`AMRFinderPlus`](https://github.com/ncbi/amr), [`fARGene`](https://github.com/fannyhb/fargene), [`RGI`](https://card.mcmaster.ca/analyze/rgi), [`DeepARG`](https://bench.cs.vt.edu/deeparg). [`argNorm`](https://github.com/BigDataBiology/argNorm) is used to map the outputs of `DeepARG`, `AMRFinderPlus`, and `ABRicate` to the [`Antibiotic Resistance Ontology`](https://www.ebi.ac.uk/ols4/ontologies/aro) for consistent ARG classification terms.\n7. Screening contigs for biosynthetic gene cluster-like sequences with [`antiSMASH`](https://antismash.secondarymetabolites.org), [`DeepBGC`](https://github.com/Merck/deepbgc), [`GECCO`](https://gecco.embl.de/), [`HMMER`](http://hmmer.org/)\n8. Creating aggregated reports for all samples across the workflows with [`AMPcombi`](https://github.com/Darcy220606/AMPcombi) for AMPs, [`hAMRonization`](https://github.com/pha4ge/hAMRonization) for ARGs, and [`comBGC`](https://raw.githubusercontent.com/nf-core/funcscan/master/bin/comBGC.py) for BGCs\n9. Software version and methods text reporting with [`MultiQC`](http://multiqc.info/)\n\n![funcscan metro workflow](docs/images/funcscan_metro_workflow.png)\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fasta\nCONTROL_REP1,AEG588A1_001.fasta\nCONTROL_REP2,AEG588A1_002.fasta\nCONTROL_REP3,AEG588A1_003.fasta\n```\n\nEach row represents a (multi-)fasta file of assembled contig sequences.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/funcscan \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --run_amp_screening \\\n --run_arg_screening \\\n --run_bgc_screening\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/funcscan/usage) and the [parameter documentation](https://nf-co.re/funcscan/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/funcscan/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/funcscan/output).\n\n## Credits\n\nnf-core/funcscan was originally written by Jasmin Frangenberg, Anan Ibrahim, Louisa Perelo, Moritz E. Beber, James A. Fellows Yates.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\nAdam Talbot, Alexandru Mizeranschi, Hugo Tavares, J\u00falia Mir Pedrol, Martin Klapper, Mehrdad Jaberi, Robert Syme, Rosa Herbst, Vedanth Ramji, @Microbion, Dediu Octavian-Codrin.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#funcscan` channel](https://nfcore.slack.com/channels/funcscan) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/funcscan for your analysis, please cite it using the following doi: [10.5281/zenodo.7643099](https://doi.org/10.5281/zenodo.7643099)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

\n \n \n \"nf-core/funcscan\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/funcscan)\n[![GitHub Actions CI Status](https://github.com/nf-core/funcscan/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/funcscan/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/funcscan/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/funcscan/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7643099-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7643099)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/funcscan)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23funcscan-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/funcscan)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n![HiRSE Code Promo Badge](https://img.shields.io/badge/Promo-8db427?style=plastic&label=HiRSE&labelColor=005aa0&link=https%3A%2F%2Fgo.fzj.de%2FCodePromo)\n\n## Introduction\n\n**nf-core/funcscan** is a bioinformatics best-practice analysis pipeline for the screening of nucleotide sequences such as assembled contigs for functional genes. It currently features mining for antimicrobial peptides, antibiotic resistance genes and biosynthetic gene clusters.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/funcscan/results).\n\nThe nf-core/funcscan AWS full test dataset are contigs generated by the MGnify service from the ENA. We used contigs generated from assemblies of chicken cecum shotgun metagenomes (study accession: MGYS00005631).\n\n## Pipeline summary\n\n1. Quality control of input sequences with [`SeqKit`](https://bioinf.shenwei.me/seqkit/)\n2. Taxonomic classification of contigs of **prokaryotic origin** with [`MMseqs2`](https://github.com/soedinglab/MMseqs2)\n3. Annotation of assembled prokaryotic contigs with [`Prodigal`](https://github.com/hyattpd/Prodigal), [`Pyrodigal`](https://github.com/althonos/pyrodigal), [`Prokka`](https://github.com/tseemann/prokka), or [`Bakta`](https://github.com/oschwengers/bakta)\n4. Annotation of coding sequences from 3. to obtain general protein families and domains with [`InterProScan`](https://github.com/ebi-pf-team/interproscan)\n5. Screening contigs for antimicrobial peptide-like sequences with [`ampir`](https://cran.r-project.org/web/packages/ampir/index.html), [`Macrel`](https://github.com/BigDataBiology/macrel), [`HMMER`](http://hmmer.org/), [`AMPlify`](https://github.com/bcgsc/AMPlify)\n6. Screening contigs for antibiotic resistant gene-like sequences with [`ABRicate`](https://github.com/tseemann/abricate), [`AMRFinderPlus`](https://github.com/ncbi/amr), [`fARGene`](https://github.com/fannyhb/fargene), [`RGI`](https://card.mcmaster.ca/analyze/rgi), [`DeepARG`](https://bench.cs.vt.edu/deeparg). [`argNorm`](https://github.com/BigDataBiology/argNorm) is used to map the outputs of `DeepARG`, `AMRFinderPlus`, and `ABRicate` to the [`Antibiotic Resistance Ontology`](https://www.ebi.ac.uk/ols4/ontologies/aro) for consistent ARG classification terms.\n7. Screening contigs for biosynthetic gene cluster-like sequences with [`antiSMASH`](https://antismash.secondarymetabolites.org), [`BiG-SLiCE`](https://github.com/medema-group/bigslice), [`DeepBGC`](https://github.com/Merck/deepbgc), [`GECCO`](https://gecco.embl.de/), [`HMMER`](http://hmmer.org/)\n8. Creating aggregated reports for all samples across the workflows with [`AMPcombi`](https://github.com/Darcy220606/AMPcombi) for AMPs, [`hAMRonization`](https://github.com/pha4ge/hAMRonization) for ARGs, and [`comBGC`](https://raw.githubusercontent.com/nf-core/funcscan/master/bin/comBGC.py) for BGCs\n9. Software version and methods text reporting with [`MultiQC`](http://multiqc.info/)\n\n![funcscan metro workflow](docs/images/funcscan_metro_workflow.png)\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,fasta\nCONTROL_REP1,AEG588A1_001.fasta\nCONTROL_REP2,AEG588A1_002.fasta\nCONTROL_REP3,AEG588A1_003.fasta\n```\n\nEach row represents a (multi-)fasta file of assembled contig sequences.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/funcscan \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --run_amp_screening \\\n --run_arg_screening \\\n --run_bgc_screening\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/funcscan/usage) and the [parameter documentation](https://nf-co.re/funcscan/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/funcscan/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/funcscan/output).\n\n## Credits\n\nnf-core/funcscan was originally written by Jasmin Frangenberg, Anan Ibrahim, Louisa Perelo, Moritz E. Beber, James A. Fellows Yates.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\nAdam Talbot, Alexandru Mizeranschi, Hugo Tavares, J\u00falia Mir Pedrol, Martin Klapper, Mehrdad Jaberi, Robert Syme, Rosa Herbst, Vedanth Ramji, @Microbion, Dediu Octavian-Codrin.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#funcscan` channel](https://nfcore.slack.com/channels/funcscan) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/funcscan for your analysis, please cite it using the following doi: [10.5281/zenodo.7643099](https://doi.org/10.5281/zenodo.7643099)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" From 057d14282e4425a7348e3a584eeda63689a3e5c7 Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Tue, 10 Mar 2026 14:44:01 +0200 Subject: [PATCH 14/16] clearing conflicts --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fffeafd5..cd56cb70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#500](https://github.com/nf-core/funcscan/pull/500) Updated pipeline template to nf-core/tools version 3.4.1 (by @jfy133) - [#508](https://github.com/nf-core/funcscan/pull/508) Added support for antiSMASH's --clusterhmmer, --fullhmmer, and --tigrfam options (❤️ to @yusukepockyby for requesting, @jfy133) - [#506](https://github.com/nf-core/funcscan/pull/506) Added support GECCO convert for generation of additional files useful for downstream analysis (by @SkyLexS) -- Added BiG-SLiCE (`bigslice`) as a new BGC clustering tool in the BGC subworkflow. BiG-SLiCE clusters BGC sequences detected by antiSMASH and/or GECCO into Gene Cluster Families (GCFs) using an HMM-based approach. Activated with `--bgc_bigslice_run` and requires `--bgc_bigslice_db`. (by @SkyLexS) +- [#507](https://github.com/nf-core/funcscan/pull/507) Updated to nf-core template v3.5.1 (by @jfy133) +- [#510](https://github.com/nf-core funcscan/pull/510) Fixed code to make Nextflow strict-syntax compliant (by @jfy133) +- [#519](https://github.com/nf-core/funcscan/pull/519)Added BiG-SLiCE (`bigslice`) as a new BGC clustering tool in the BGC subworkflow. BiG-SLiCE clusters BGC sequences detected by antiSMASH and/or GECCO into Gene Cluster Families (GCFs) using an HMM-based approach. Activated with `--bgc_bigslice_run` and requires `--bgc_bigslice_db`. (by @SkyLexS) ### `Fixed` From 925ef7e52864fcfacb6123f694d8d4f3eab1c6d3 Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Tue, 10 Mar 2026 14:57:08 +0200 Subject: [PATCH 15/16] trailing whitespaces --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index b4b6ba94..0b1bc491 100644 --- a/nextflow.config +++ b/nextflow.config @@ -257,7 +257,7 @@ params { bgc_gecco_convertmode = 'clusters' bgc_gecco_convertformat = 'gff' - + bgc_bigslice_run = false bgc_bigslice_db = null From 59c07338e9561379193572072ea6c336b35d1f8c Mon Sep 17 00:00:00 2001 From: SkyLexS Date: Fri, 20 Mar 2026 11:39:09 +0200 Subject: [PATCH 16/16] Move BigSLICE input validation to pipeline initialisation --- docs/output.md | 4 ++- docs/usage.md | 8 +++-- subworkflows/local/bgc.nf | 31 ++++++------------- .../utils_nfcore_funcscan_pipeline/main.nf | 8 +++++ 4 files changed, 25 insertions(+), 26 deletions(-) diff --git a/docs/output.md b/docs/output.md index af6260c0..8993c021 100644 --- a/docs/output.md +++ b/docs/output.md @@ -445,7 +445,9 @@ Note that filtered FASTA is only used for BGC workflow for run-time optimisation
-[BiG-SLiCE](https://github.com/medema-group/bigslice) (**Bi**osynthetic **G**ene cluster **S**uper-**Li**near **C**lustering **E**ngine) is a highly scalable tool for the large-scale analysis and clustering of Biosynthetic Gene Clusters (BGCs) into Gene Cluster Families (GCFs). It takes BGC regions in GenBank format (e.g. output from antiSMASH or GECCO) along with an HMM database and produces an SQLite database of predicted BGC features and GCF assignments. BiG-SLiCE requires the HMM database to be supplied via `--bgc_bigslice_db` and is activated with `--bgc_bigslice_run`. It requires at least one of antiSMASH or GECCO (with convert in bigslice format) to be enabled. +[BiG-SLiCE](https://github.com/medema-group/bigslice) (**Bi**osynthetic **G**ene cluster **S**uper-**Li**near **C**lustering **E**ngine) is a highly scalable tool for the large-scale analysis and clustering of Biosynthetic Gene Clusters (BGCs) into Gene Cluster Families (GCFs). +It takes BGC regions in GenBank format (e.g. output from antiSMASH or GECCO) along with an HMM database and produces an SQLite database of predicted BGC features and GCF assignments. +BiG-SLiCE requires the HMM database to be supplied via `--bgc_bigslice_db` and is activated with `--bgc_bigslice_run`. It requires at least one of antiSMASH or GECCO (with convert in bigslice format) to be enabled. #### deepBGC diff --git a/docs/usage.md b/docs/usage.md index 700f23b6..6359fe52 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -157,12 +157,14 @@ If antiSMASH is run for BGC detection, we recommend to **not** run Prokka for an ### BiGSLiCE -[BiG-SLiCE](https://github.com/medema-group/bigslice) clusters BGC sequences into Gene Cluster Families (GCFs). It is activated with `--bgc_bigslice_run` and requires at least one BGC source to be enabled: +[BiG-SLiCE](https://github.com/medema-group/bigslice) clusters BGC sequences into Gene Cluster Families (GCFs). +It is activated with `--bgc_bigslice_run` and requires at least one BGC source to be enabled: -- antiSMASH (default BGC tool), **or** +- antiSMASH (default BGC tool). - GECCO with `--bgc_gecco_runconvert --bgc_gecco_convertmode gbk --bgc_gecco_convertformat bigslice` -BiG-SLiCE does **not** discover BGCs itself — it takes GenBank-format BGC regions produced by antiSMASH and/or GECCO convert as input. The HMM database must be provided explicitly via `--bgc_bigslice_db` (see [BiGSLiCE database](#bigslice-1) for details); it is not auto-downloaded by the pipeline. +BiG-SLiCE does **not** discover BGCs itself — it takes GenBank-format BGC regions produced by antiSMASH and/or GECCO convert as input. +The HMM database must be provided explicitly via `--bgc_bigslice_db` (see [BiGSLiCE database](#bigslice-1) for details); it is not auto-downloaded by the pipeline. ## Databases and reference files diff --git a/subworkflows/local/bgc.nf b/subworkflows/local/bgc.nf index 6b47b386..fd839459 100644 --- a/subworkflows/local/bgc.nf +++ b/subworkflows/local/bgc.nf @@ -120,30 +120,17 @@ workflow BGC { // BIGSLICE if (params.bgc_bigslice_run) { - if (params.bgc_skip_antismash && (params.bgc_skip_gecco || !params.bgc_gecco_runconvert || params.bgc_gecco_convertformat != 'bigslice')) { - error('[nf-core/funcscan] error: BigSLICE requires at least one of: (1) antiSMASH enabled, or (2) GECCO enabled with GECCO convert in bigslice format. Please check your parameters.') - } - - if (params.bgc_bigslice_db) { - ch_bigslice_hmmdb = Channel.fromPath(params.bgc_bigslice_db, checkIfExists: true) - .first() - } - else { - error('[nf-core/funcscan] error: BigSLICE HMM database not found for --bgc_bigslice_db! Please check input.') - } + ch_bigslice_hmmdb = Channel.fromPath(params.bgc_bigslice_db, checkIfExists: true) + .first() - ch_bigslice_input = Channel.empty() - - if (!params.bgc_skip_antismash) { - ch_bigslice_input = ch_bigslice_input.mix( - ANTISMASH_ANTISMASH.out.gbk_results - ) - } + def gecco_bigslice = !params.bgc_skip_gecco && params.bgc_gecco_runconvert && params.bgc_gecco_convertformat == 'bigslice' - if (!params.bgc_skip_gecco && params.bgc_gecco_runconvert && params.bgc_gecco_convertformat == 'bigslice') { - ch_bigslice_input = ch_bigslice_input.mix( - GECCO_CONVERT.out.bigslice - ) + if (!params.bgc_skip_antismash && gecco_bigslice) { + ch_bigslice_input = ANTISMASH_ANTISMASH.out.gbk_results.mix(GECCO_CONVERT.out.bigslice) + } else if (!params.bgc_skip_antismash) { + ch_bigslice_input = ANTISMASH_ANTISMASH.out.gbk_results + } else { + ch_bigslice_input = GECCO_CONVERT.out.bigslice } ch_bigslice_grouped = ch_bigslice_input diff --git a/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf b/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf index d0a4fa2f..55e8347a 100644 --- a/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_funcscan_pipeline/main.nf @@ -173,6 +173,14 @@ def validateInputParameters() { error("[nf-core/funcscan] ERROR: when specifying --bgc_gecco_convertmode 'clusters', --bgc_gecco_convertformat can only be set to 'gff'. You specified --bgc_gecco_convertformat '${params.bgc_gecco_convertformat}'. Check input!") } } + if (params.run_bgc_screening && params.bgc_bigslice_run) { + if (params.bgc_skip_antismash && (params.bgc_skip_gecco || !params.bgc_gecco_runconvert || params.bgc_gecco_convertformat != 'bigslice')) { + error('[nf-core/funcscan] ERROR: BigSLICE requires at least one of: (1) antiSMASH enabled, or (2) GECCO enabled with GECCO convert in bigslice format. Please check your parameters.') + } + if (!params.bgc_bigslice_db) { + error('[nf-core/funcscan] ERROR: BigSLICE HMM database not found for --bgc_bigslice_db! Please check input.') + } + } } //