diff --git a/modules/nf-core/dorado/aligner/environment.yml b/modules/nf-core/dorado/aligner/environment.yml new file mode 100644 index 00000000000..6ea28e8c8af --- /dev/null +++ b/modules/nf-core/dorado/aligner/environment.yml @@ -0,0 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +# dorado is distributed under ONTPL and not on bioconda; use the vendor container. +channels: [] +dependencies: [] diff --git a/modules/nf-core/dorado/aligner/main.nf b/modules/nf-core/dorado/aligner/main.nf new file mode 100644 index 00000000000..59643365578 --- /dev/null +++ b/modules/nf-core/dorado/aligner/main.nf @@ -0,0 +1,47 @@ +process DORADO_ALIGNER { + tag "$meta.id" + label 'process_high' + + // dorado is not on bioconda (ONTPL licence). Using + // Docker Hub image directly. SHA tag pins to v1.4.0; a semver tag is tracked in + // nanoporetech/dorado#1584. Same pattern as nf-core/parabricks modules. + conda null + container "docker.io/nanoporetech/dorado:shac8f356489fa8b44b31beba841b84d2879de2088e" + + input: + tuple val(meta), path(bam) // unaligned BAM from dorado basecaller + tuple val(meta2), path(reference), path(fai) // reference FASTA (or .mmi index) and .fai + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*_summary.tsv"), emit: summary, optional: true + tuple val("${task.process}"), val('dorado'), eval("dorado --version 2>&1 | head -1 | sed 's/^//'"), emit: versions_dorado, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + dorado \\ + aligner \\ + ${args} \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${bam} \\ + > ${prefix}.bam + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}_summary.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dorado: 1.4.0 + END_VERSIONS + """ +} diff --git a/modules/nf-core/dorado/aligner/meta.yml b/modules/nf-core/dorado/aligner/meta.yml new file mode 100644 index 00000000000..46d6c9aa892 --- /dev/null +++ b/modules/nf-core/dorado/aligner/meta.yml @@ -0,0 +1,111 @@ +name: dorado_aligner +description: | + Align unaligned Oxford Nanopore BAM files (typically produced by dorado basecaller) + to a reference genome using the dorado aligner, which wraps minimap2 and + preserves modified base tags (MM/ML) and other BAM auxiliary tags. +keywords: + - alignment + - minimap2 + - ont + - long-read + - methylation + - modified-bases + - nanopore +tools: + - "dorado": + description: Oxford Nanopore's aligner that wraps minimap2 and preserves + modification tags (MM/ML) from basecaller output. + homepage: https://github.com/nanoporetech/dorado + documentation: https://software-docs.nanoporetech.com/dorado/latest/ + tool_dev_url: https://github.com/nanoporetech/dorado + licence: + - "Oxford Nanopore Technologies PLC. Public License Version 1.0" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bam: + type: file + description: | + Unaligned BAM file produced by dorado basecaller. Modified base tags + (MM/ML) and other BAM tags are preserved through alignment. + pattern: "*.bam" + ontologies: + - edam: http://edamontology.org/format_2572 + - - meta2: + type: map + description: | + Groovy Map containing reference genome information + e.g. `[ id:'hg38' ]` + - reference: + type: file + description: | + Reference FASTA (or pre-built minimap2 .mmi index) to align to. + pattern: "*.{fa,fasta,fa.gz,fasta.gz,mmi}" + ontologies: + - edam: http://edamontology.org/format_1929 + - fai: + type: file + description: | + FASTA index (.fai) for the reference. Required when reference is a FASTA. + Pass `[]` when reference is a minimap2 .mmi index. + pattern: "*.fai" + ontologies: + - edam: http://edamontology.org/format_3003 +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.bam": + type: file + description: | + Aligned BAM with modification tags preserved. Output sort order is + SO:unknown — pipe to SAMTOOLS_SORT + SAMTOOLS_INDEX for coordinate-sorted + indexed BAMs. + pattern: "*.bam" + ontologies: + - edam: http://edamontology.org/format_2572 + summary: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*_summary.tsv": + type: file + description: Per-read summary TSV with alignment statistics. Present when + `--emit-summary` is set in task.ext.args. + pattern: "*_summary.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 + versions_dorado: + - - ${task.process}: + type: string + description: The name of the process + - dorado: + type: string + description: The name of the tool + - dorado --version 2>&1 | head -1 | sed 's/^//': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - dorado: + type: string + description: The name of the tool + - dorado --version 2>&1 | head -1 | sed 's/^//': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@sahuno" +maintainers: + - "@sahuno" diff --git a/modules/nf-core/dorado/aligner/tests/main.nf.test b/modules/nf-core/dorado/aligner/tests/main.nf.test new file mode 100644 index 00000000000..7169cbbef1f --- /dev/null +++ b/modules/nf-core/dorado/aligner/tests/main.nf.test @@ -0,0 +1,86 @@ +nextflow_process { + + name "Test Process DORADO_ALIGNER" + script "../main.nf" + process "DORADO_ALIGNER" + + tag "modules" + tag "modules_nfcore" + tag "dorado" + tag "dorado/aligner" + + // ------------------------------------------------------------------------- + // Stub tests — run in CI without the dorado container or real alignment + // ------------------------------------------------------------------------- + + test("homo sapiens - GIAB HG002 unaligned BAM, hg38 reference - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'HG002' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/HG002_PAW70337_giab_10reads.unaligned.bam', + checkIfExists: true) + ] + input[1] = [ + [ id: 'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', + checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', + checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + // ------------------------------------------------------------------------- + // Real test — exercises minimap2 via dorado aligner (CPU-only, no GPU needed) + // Snapshots reads-only MD5 to avoid header paths (@SQ UR:, @PG CL:) that + // vary between test environments. + // ------------------------------------------------------------------------- + + test("homo sapiens - GIAB HG002 unaligned BAM, hg38 reference") { + + when { + process { + """ + input[0] = [ + [ id: 'HG002' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/HG002_PAW70337_giab_10reads.unaligned.bam', + checkIfExists: true) + ] + input[1] = [ + [ id: 'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', + checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', + checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.bam[0][1]).exists() }, + { assert path(process.out.bam[0][1]).size() > 0 }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions_dorado + ).match() } + ) + } + } +} diff --git a/modules/nf-core/dorado/aligner/tests/main.nf.test.snap b/modules/nf-core/dorado/aligner/tests/main.nf.test.snap new file mode 100644 index 00000000000..f87c88e1707 --- /dev/null +++ b/modules/nf-core/dorado/aligner/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "homo sapiens - GIAB HG002 unaligned BAM, hg38 reference": { + "content": [ + "HG002.bam", + [ + [ + "DORADO_ALIGNER", + "dorado", + "1.4.0+ba44a013" + ] + ] + ], + "timestamp": "2026-04-18T15:42:17.310250004", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.04.6" + } + }, + "homo sapiens - GIAB HG002 unaligned BAM, hg38 reference - stub": { + "content": [ + { + "0": [ + [ + { + "id": "HG002" + }, + "HG002.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "HG002" + }, + "HG002_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "DORADO_ALIGNER", + "dorado", + "1.4.0+ba44a013" + ] + ], + "bam": [ + [ + { + "id": "HG002" + }, + "HG002.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "summary": [ + [ + { + "id": "HG002" + }, + "HG002_summary.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_dorado": [ + [ + "DORADO_ALIGNER", + "dorado", + "1.4.0+ba44a013" + ] + ] + } + ], + "timestamp": "2026-04-18T15:42:09.954814026", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.04.6" + } + } +} \ No newline at end of file