diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 5b6dfbc..ed09b3b 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -13,3 +13,50 @@ report_section_order: export_plots: true disable_version_detection: true + +custom_content: + order: + - sample_registration + - submission_results + +# Custom data configuration +custom_data: + + sample_registration: + id: 'sample_registration' + section_name: 'genpmes_samples' + description: 'Sample assigned during registration process' + file_format: 'tsv' + plot_type: 'table' + pconfig: + id: 'sample_registration' + title: 'Sample registration assigned accessions' + col1_header: 'ID' + col2_header: 'ENA sample accession' + headers: + alias: + title: 'Uploaded fasta' + description: 'FASTA file for upload' + accession: + title: 'ENA sample accession' + description: 'Assigned sample accession after registration' + + submission_results: + id: 'submission_results' + section_name: 'Submission results' + description: 'Accessions assigned during upload process' + file_format: 'tsv' + plot_type: 'table' + pconfig: + id: 'custom_metrics_table' + title: 'Submission results' + col1_header: 'ID' + col2_header: 'ENA accession' + headers: + alias: + title: 'Uploaded fasta' + description: 'FASTA file for upload' + accession: + title: 'ENA accession' + description: 'Assigned accession after submission' + diff --git a/modules/local/ena_webin_cli_wrapper/main.nf b/modules/local/ena_webin_cli_wrapper/main.nf index 7918ce1..be053b1 100644 --- a/modules/local/ena_webin_cli_wrapper/main.nf +++ b/modules/local/ena_webin_cli_wrapper/main.nf @@ -5,7 +5,7 @@ process ENA_WEBIN_CLI_WRAPPER { label 'process_low' tag "${meta.id}" - container "quay.io/microbiome-informatics/java_mgnify-pipelines-toolkit:1.4.21" + container "community.wave.seqera.io/library/ena-webin-cli_mgnify-pipelines-toolkit:0fd318932c5ba88e" stageInMode 'copy' input: @@ -14,6 +14,7 @@ process ENA_WEBIN_CLI_WRAPPER { output: tuple val(meta), path("*_accessions.tsv"), emit: accessions + path("*_accessions_mqc.tsv"), emit: accessions_multiqc path "versions.yml", emit: versions script: @@ -35,6 +36,8 @@ process ENA_WEBIN_CLI_WRAPPER { ${test_flag} \\ ${args} + cp ${prefix}_accessions.tsv ${prefix}_accessions_mqc.tsv + cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version 2>&1 | sed 's/Python //g') diff --git a/subworkflows/local/utils_nfcore_seqsubmit_pipeline/main.nf b/subworkflows/local/utils_nfcore_seqsubmit_pipeline/main.nf index 8a19c8a..ae85a5b 100644 --- a/subworkflows/local/utils_nfcore_seqsubmit_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_seqsubmit_pipeline/main.nf @@ -183,23 +183,65 @@ def validateInputSamplesheet(input) { // Generate methods description for MultiQC // def toolCitationText() { - // TODO nf-core: Optionally add in-text citation tools to this list. // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report + // fastafalidator + + def mgnify_tools = [ + "assembly_uploader (Richardson et al. 2023)", + "genome_uploader (Gurbich et al. 2023)" + ].join(' ').trim() + + def ena_tools = [ + "webin-cli (European Nucleotide Archive)" + ].join(' ').trim() + + def preprocessing_tools = [ + "py_fasta_validator (Edwards et al. 2023)" + ].join(' ').trim() + + def stats_tools = [ + "CheckM2 (Chklovski et al. 2023)", + "CoverM (Aroney et el. 2025)" + ].join(' ').trim() + + def taxonomy_tools = [ + "CAT_pack (Von Meijenfeldt et al. 2019)" + ].join(' ').trim() + + def rna_tools = [ + "barrnap (Author: Torsten Seemann)", + "tRNAscan-SE (Chan et al. 2021)" + ].join(' ').trim() + + def postprocessing_text = "MultiQC (Ewels et al. 2016)" + def citation_text = [ - "Tools used in the workflow included:", - "MultiQC (Ewels et al. 2016)", - "." + mgnify_tools, + ena_tools, + preprocessing_tools, + stats_tools, + taxonomy_tools, + rna_tools, + postprocessing_text ].join(' ').trim() return citation_text } def toolBibliographyText() { - // TODO nf-core: Optionally add bibliographic entries to this list. // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ + "
  • Richardson, L., Allen, B., Baldi, G., Beracochea, M., Bileschi, M. L., Burdett, T., ... & Finn, R. D. (2023). MGnify: the microbiome sequence data analysis resource in 2023. Nucleic acids research, 51(D1), D753-D759. doi: /10.1093/nar/gkac1080
  • ", + "
  • Gurbich, T. A., Almeida, A., Beracochea, M., Burdett, T., Burgin, J., Cochrane, G., ... & Finn, R. D. (2023). MGnify genomes: a resource for biome-specific microbial genome catalogues. Journal of molecular biology, 435(14), 168016. doi: /10.1016/j.jmb.2023.168016
  • ", + "
  • webin-cli: GitHub: https://github.com/enasequence/webin-cli. Docs: https://ena-docs.readthedocs.io/en/latest/submit/general-guide/webin-cli.html
  • ", + "
  • Edwards, R. (2023). linsalrob/py_fasta_validator: Compressy. doi: /10.5281/zenodo.5002710
  • ", + "
  • Chklovski, A., Parks, D. H., Woodcroft, B. J., & Tyson, G. W. (2023). CheckM2: a rapid, scalable and accurate tool for assessing microbial genome quality using machine learning. Nature Methods, 20(8), 1203-1212. doi: 10.1038/s41592-023-01940-w
  • ", + "
  • Aroney, S. T., Newell, R. J., Nissen, J. N., Camargo, A. P., Tyson, G. W., & Woodcroft, B. J. (2025). CoverM: read alignment statistics for metagenomics. Bioinformatics, 41(4), btaf147. doi: /10.1093/bioinformatics/btaf147
  • ", + "
  • Von Meijenfeldt, F. B., Arkhipova, K., Cambuy, D. D., Coutinho, F. H., & Dutilh, B. E. (2019). Robust taxonomic classification of uncharted microbial sequences and bins with CAT and BAT. Genome biology, 20(1), 217. doi: /10.1038/s41467-024-47155-1
  • ", + "
  • barrnap: GitHub: https://github.com/tseemann/barrnap
  • ", + "
  • Chan, P. P., Lin, B. Y., Mak, A. J., & Lowe, T. M. (2021). tRNAscan-SE 2.0: improved detection and functional classification of transfer RNA genes. Nucleic acids research, 49(16), 9077-9096. doi: /10.1093/nar/gkab688
  • ", "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " ].join(' ').trim() @@ -230,9 +272,8 @@ def methodsDescriptionText(mqc_methods_yaml) { meta["tool_citations"] = "" meta["tool_bibliography"] = "" - // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - // meta["tool_bibliography"] = toolBibliographyText() + meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + meta["tool_bibliography"] = toolBibliographyText() def methods_text = mqc_methods_yaml.text diff --git a/workflows/genomesubmit.nf b/workflows/genomesubmit.nf index a17299c..13a5d47 100644 --- a/workflows/genomesubmit.nf +++ b/workflows/genomesubmit.nf @@ -352,6 +352,13 @@ workflow GENOMESUBMIT { ch_methods_description = channel.value( methodsDescriptionText(ch_multiqc_custom_methods_description)) + samples_mqc = CREATE_MANIFESTS.out.upload_registered_mags + .map { meta, file -> + def newFile = file.copyTo(file.parent.resolve("${file.baseName}_mqc.tsv")) + return newFile + } + ch_multiqc_files = ch_multiqc_files.mix(samples_mqc) + ch_multiqc_files = ch_multiqc_files.mix(SUBMIT.out.accessions_multiqc) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix( ch_methods_description.collectFile(