diff --git a/modules/nf-core/modkit/extract/full/environment.yml b/modules/nf-core/modkit/extract/full/environment.yml new file mode 100644 index 00000000000..dac7b506e89 --- /dev/null +++ b/modules/nf-core/modkit/extract/full/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::ont-modkit=0.6.1" diff --git a/modules/nf-core/modkit/extract/full/main.nf b/modules/nf-core/modkit/extract/full/main.nf new file mode 100644 index 00000000000..92f5a88039b --- /dev/null +++ b/modules/nf-core/modkit/extract/full/main.nf @@ -0,0 +1,46 @@ +process MODKIT_EXTRACT_FULL { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ont-modkit:0.6.1--hcdda2d0_0': + 'quay.io/biocontainers/ont-modkit:0.6.1--hcdda2d0_0' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta), path(fai) + + output: + tuple val(meta), path("*.tsv{,.gz}"), emit: tsv + tuple val(meta), path("*.log") , emit: log, optional: true + tuple val("${task.process}"), val('modkit'), eval("modkit --version | sed 's/modkit //'"), emit: versions_modkit, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : '' + // BGZF output is detected automatically from --bgzf in ext.args; suffix accordingly + def out_suffix = args.tokenize().contains('--bgzf') ? 'tsv.gz' : 'tsv' + """ + modkit \\ + extract \\ + full \\ + $args \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${bam} \\ + ${prefix}.${out_suffix} + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def out_suffix = args.tokenize().contains('--bgzf') ? 'tsv.gz' : 'tsv' + """ + touch ${prefix}.${out_suffix} + """ +} diff --git a/modules/nf-core/modkit/extract/full/meta.yml b/modules/nf-core/modkit/extract/full/meta.yml new file mode 100644 index 00000000000..0a2d9fee403 --- /dev/null +++ b/modules/nf-core/modkit/extract/full/meta.yml @@ -0,0 +1,115 @@ +name: modkit_extract_full +description: | + Transform the probabilities from the MM/ML tags in a modBAM into a + tab-separated per-read-per-position table. Emits one row for every + modified-base probability call in every read — useful for downstream + custom filtering, plotting, and ML training. Optionally BGZF-compressed + via `--bgzf` in `ext.args`. +keywords: + - modkit + - methylation + - extract + - read-level + - modbam + - nanopore + - ont +tools: + - "modkit": + description: A bioinformatics tool for working with modified bases in Oxford Nanopore + sequencing data. + homepage: https://github.com/nanoporetech/modkit + documentation: https://nanoporetech.github.io/modkit/ + tool_dev_url: https://github.com/nanoporetech/modkit + licence: + - "Oxford Nanopore Technologies PLC. Public License Version 1.0" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]`. The output inherits this meta. + - bam: + type: file + description: Input modBAM with MM/ML tags. + pattern: "*.{bam,cram}" + ontologies: + - edam: http://edamontology.org/format_2572 + - bai: + type: file + description: BAM index (`.bai` or `.csi`). Optional — if absent, modkit does + a serial scan of the BAM (see `--ignore-index`). + pattern: "*.{bai,csi}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'mm10' ]`. May be `[[], [], []]` to skip (modkit works + unaligned, but reference context columns will be missing). + - fasta: + type: file + description: Reference FASTA. Required to populate reference-context columns + in the output (`ref_kmer`, strand, etc.). + pattern: "*.{fa,fasta,fna}" + ontologies: + - edam: http://edamontology.org/format_1929 + - fai: + type: file + description: Samtools FASTA index for `fasta`. + pattern: "*.fai" + ontologies: + - edam: http://edamontology.org/format_3475 +output: + tsv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]`. + - "*.tsv{,.gz}": + type: file + description: | + Per-read-per-position probability table. BGZF-compressed when + `--bgzf` is passed via `ext.args`. + pattern: "*.{tsv,tsv.gz}" + ontologies: + - edam: http://edamontology.org/format_3475 + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]`. + - "*.log": + type: file + description: | + Optional modkit debug log (only emitted when `--log-filepath + .log` is passed via `ext.args`). + pattern: "*.log" + ontologies: [] + versions_modkit: + - - ${task.process}: + type: string + description: The name of the process + - modkit: + type: string + description: The name of the tool + - modkit --version | sed 's/modkit //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - modkit: + type: string + description: The name of the tool + - modkit --version | sed 's/modkit //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@sahuno" +maintainers: + - "@sahuno" diff --git a/modules/nf-core/modkit/extract/full/tests/main.nf.test b/modules/nf-core/modkit/extract/full/tests/main.nf.test new file mode 100644 index 00000000000..d6a041fc47f --- /dev/null +++ b/modules/nf-core/modkit/extract/full/tests/main.nf.test @@ -0,0 +1,72 @@ +nextflow_process { + + name "Test Process MODKIT_EXTRACT_FULL" + script "../main.nf" + process "MODKIT_EXTRACT_FULL" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "modkit" + tag "modkit/extract" + tag "modkit/extract/full" + + test("homo sapiens - nanopore modbam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id: 'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo sapiens - nanopore modbam") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id: 'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.tsv }, + { assert path(process.out.tsv[0][1]).exists() }, + { assert path(process.out.tsv[0][1]).size() > 0 }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/modkit/extract/full/tests/main.nf.test.snap b/modules/nf-core/modkit/extract/full/tests/main.nf.test.snap new file mode 100644 index 00000000000..3d0bea22e28 --- /dev/null +++ b/modules/nf-core/modkit/extract/full/tests/main.nf.test.snap @@ -0,0 +1,96 @@ +{ + "homo sapiens - nanopore modbam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,8300986c98d1b67973439ced4e7f233c" + ] + ], + "1": [ + + ], + "2": [ + [ + "MODKIT_EXTRACT_FULL", + "modkit", + "0.6.1" + ] + ], + "log": [ + + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,8300986c98d1b67973439ced4e7f233c" + ] + ], + "versions_modkit": [ + [ + "MODKIT_EXTRACT_FULL", + "modkit", + "0.6.1" + ] + ] + } + ], + "timestamp": "2026-04-23T22:03:49.629847264", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.04.6" + } + }, + "homo sapiens - nanopore modbam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + [ + "MODKIT_EXTRACT_FULL", + "modkit", + "0.6.1" + ] + ], + "log": [ + + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_modkit": [ + [ + "MODKIT_EXTRACT_FULL", + "modkit", + "0.6.1" + ] + ] + } + ], + "timestamp": "2026-04-23T22:03:35.292337916", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.04.6" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/modkit/extract/full/tests/nextflow.config b/modules/nf-core/modkit/extract/full/tests/nextflow.config new file mode 100644 index 00000000000..aa9b6c33b5d --- /dev/null +++ b/modules/nf-core/modkit/extract/full/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MODKIT_EXTRACT_FULL' { + ext.args = '' + } +}