Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,22 @@ process {
// SUBMISSION AND MANIFEST GENERATION
//

withName: 'CREATE_ASSEMBLY_METADATA_CSV' {
publishDir = [
path: { "${params.outdir}/${params.mode}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'CREATE_GENOME_METADATA_TSV' {
publishDir = [
path: { "${params.outdir}/genome_metadata" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'GENOME_UPLOAD' {
publishDir = [
path: { "${params.outdir}/${params.mode}/upload/manifests" },
Expand Down
52 changes: 52 additions & 0 deletions modules/local/create_assembly_metadata_csv/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
process CREATE_ASSEMBLY_METADATA_CSV {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/31/31f1c42a25a80ebc296a0aa07d83b3f0e408f9f3c240f9375c55d9790576c1de/data' :
'community.wave.seqera.io/library/pip_pygments:37b2b421ce07e516' }"

input:
tuple val(meta), path(fasta)

output:
tuple val(meta), path("${meta.id}_assembly_metadata.csv"), emit: csv
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def run_accession = meta.run_accession ?: ''
def coverage = meta.coverage ?: ''
def assembler = meta.assembler ?: ''
def assembler_version = meta.assembler_version ?: ''
def sample = '' // Sample column left empty because co-assemblies are not supported

"""
cat > ${meta.id}_assembly_metadata.csv << EOF
Runs,Coverage,Assembler,Version,Filepath,Sample
${run_accession},${coverage},${assembler},${assembler_version},${fasta.name},${sample}
EOF

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bash: \$(bash --version | head -n1 | sed 's/.*version //; s/ .*//')
END_VERSIONS
"""

stub:
"""
cat > ${meta.id}_assembly_metadata.csv << EOF
Runs,Coverage,Assembler,Version,Filepath,Sample
${meta.run_accession ?: ''},${meta.coverage ?: ''},${meta.assembler ?: ''},${meta.assembler_version ?: ''},${fasta.name},
EOF

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bash: \$(bash --version | head -n1 | sed 's/.*version //; s/ .*//')
END_VERSIONS
"""
}
48 changes: 48 additions & 0 deletions modules/local/create_assembly_metadata_csv/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "create_assembly_metadata_csv"
description: Create input CSV metadata file for assembly_uploader tool
keywords:
- assembly
- metadata
- ena
- submission
tools:
- bash:
description: Bash shell scripting
homepage: https://www.gnu.org/software/bash/

input:
- - meta:
type: map
description: |
Groovy Map containing sample information:
- id: Sample identifier
- run_accession: ENA run accession
- coverage: Assembly coverage value
- assembler: Name of the assembler used
- assembler_version: Version of the assembler
- fasta:
type: file
description: Assembly FASTA file
pattern: "*.{fasta,fa,fna}"

output:
- csv:
- meta:
type: map
description: |
Groovy Map containing sample information (same as input)
- "*.csv":
type: file
description: CSV file containing assembly metadata to be used as input for the assembly_uploader
pattern: "*_assembly_metadata.csv"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@ochkalova"
maintainers:
- "@ochkalova"
93 changes: 93 additions & 0 deletions modules/local/create_assembly_metadata_csv/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
nextflow_process {

name "Test Process CREATE_ASSEMBLY_METADATA_CSV"
script "../main.nf"
process "CREATE_ASSEMBLY_METADATA_CSV"

tag "modules"
tag "modules_local"
tag "create_assembly_metadata_csv"

test("test_create_assembly_metadata_csv - complete metadata") {

when {
process {
"""
input[0] = [
[
id: 'test_sample',
run_accession: 'ERR123456',
coverage: 50.5,
assembler: 'metaspades',
assembler_version: '3.15.5'
],
file('https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz')
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("test_create_assembly_metadata_csv - minimal metadata") {

when {
process {
"""
input[0] = [
[
id: 'minimal_sample'
],
file('https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz')
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("test_create_assembly_metadata_csv - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[
id: 'stub_sample',
run_accession: 'ERR999999',
coverage: 75.0,
assembler: 'megahit',
assembler_version: '1.2.9'
],
file('https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz')
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
48 changes: 48 additions & 0 deletions modules/local/create_genome_metadata_tsv/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
process CREATE_GENOME_METADATA_TSV {
tag "${meta.id}"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://community.wave.seqera.io/library/bash:5.2.26' :
'community.wave.seqera.io/library/bash:5.2.26'}"

input:
tuple val(meta), path(fasta)

output:
path "*.tsv" , emit: tsv
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def prefix = task.ext.prefix ?: 'genomes_metadata'
def co_assembly_value = meta.co_assembly == "Yes" ? "True" : "False"
def rna_presence_value = meta.RNA_presence == "Yes" ? "True" : "False"
"""
# Create header
echo -e "genome_name\\tgenome_path\\taccessions\\tassembly_software\\tbinning_software\\tbinning_parameters\\tstats_generation_software\\tcompleteness\\tcontamination\\tgenome_coverage\\tmetagenome\\tco-assembly\\tbroad_environment\\tlocal_environment\\tenvironmental_medium\\trRNA_presence\\tNCBI_lineage" > ${prefix}.tsv

# Add data row
echo -e "${meta.id}\\t${fasta.getName()}\\t${meta.accession}\\t${meta.assembly_software}\\t${meta.binning_software}\\t${meta.binning_parameters}\\t${meta.stats_generation_software}\\t${meta.completeness}\\t${meta.contamination}\\t${meta.genome_coverage}\\t${meta.metagenome}\\t${co_assembly_value}\\t${meta.broad_environment}\\t${meta.local_environment}\\t${meta.environmental_medium}\\t${rna_presence_value}\\t${meta.NCBI_lineage}" >> ${prefix}.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bash: \$(bash --version | head -n 1 | sed 's/GNU bash, version //; s/ .*//')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: 'genomes_metadata'
"""
echo -e "genome_name\\tgenome_path\\taccessions\\tassembly_software\\tbinning_software\\tbinning_parameters\\tstats_generation_software\\tcompleteness\\tcontamination\\tgenome_coverage\\tmetagenome\\tco-assembly\\tbroad_environment\\tlocal_environment\\tenvironmental_medium\\trRNA_presence\\tNCBI_lineage" > ${prefix}.tsv
echo -e "${meta.id}\\t${fasta.getName()}\\t${meta.accession}\\t${meta.assembly_software}\\t${meta.binning_software}\\t${meta.binning_parameters}\\t${meta.stats_generation_software}\\t${meta.completeness}\\t${meta.contamination}\\t${meta.genome_coverage}\\t${meta.metagenome}\\tTrue\\t${meta.broad_environment}\\t${meta.local_environment}\\t${meta.environmental_medium}\\tTrue\\t${meta.NCBI_lineage}" >> ${prefix}.tsv

cat <<-END_VERSIONS > versions.yml
"${task.process}":
bash: \$(bash --version | head -n 1 | sed 's/GNU bash, version //; s/ .*//')
END_VERSIONS
"""
}
60 changes: 60 additions & 0 deletions modules/local/create_genome_metadata_tsv/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "create_genome_metadata_tsv"
description: Creates a tab-separated TSV file containing metadata for genome MAGs/bins submissions
keywords:
- genomics
- metadata
- submission
- metagenomics
- MAG
- bins
tools:
- bash:
description: GNU Bourne-Again SHell
homepage: https://www.gnu.org/software/bash/
documentation: https://www.gnu.org/software/bash/manual/
licence: ["GPL-3.0-or-later"]
identifier: ""

input:
- - meta:
type: map
description: |
Groovy Map containing genome metadata. Required fields:
- id: Genome identifier
- accession: Sample accession
- assembly_software: Software used for assembly
- binning_software: Software used for binning
- binning_parameters: Parameters used for binning
- stats_generation_software: Software used for stats generation
- completeness: Genome completeness percentage
- contamination: Genome contamination percentage
- genome_coverage: Genome coverage value
- metagenome: Metagenome identifier
- co_assembly: Co-assembly status ("Yes" or other)
- broad_environment: Broad environmental context
- local_environment: Local environmental context
- environmental_medium: Environmental medium description
- RNA_presence: RNA presence status ("Yes" or other)
- NCBI_lineage: NCBI taxonomic lineage
- fasta:
type: file
description: Genome FASTA file
pattern: "*.{fasta,fa,fna}"

output:
- tsv:
- "*.tsv":
type: file
description: Tab-separated TSV file with genome metadata
pattern: "*.tsv"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@ochkalova"
maintainers:
- "@ochkalova"
Loading
Loading