diff --git a/modules/nf-core/modkit/extract/calls/environment.yml b/modules/nf-core/modkit/extract/calls/environment.yml new file mode 100644 index 00000000000..dac7b506e89 --- /dev/null +++ b/modules/nf-core/modkit/extract/calls/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::ont-modkit=0.6.1" diff --git a/modules/nf-core/modkit/extract/calls/main.nf b/modules/nf-core/modkit/extract/calls/main.nf new file mode 100644 index 00000000000..1539d827f19 --- /dev/null +++ b/modules/nf-core/modkit/extract/calls/main.nf @@ -0,0 +1,45 @@ +process MODKIT_EXTRACT_CALLS { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ont-modkit:0.6.1--hcdda2d0_0': + 'quay.io/biocontainers/ont-modkit:0.6.1--hcdda2d0_0' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta), path(fai) + + output: + tuple val(meta), path("*.tsv{,.gz}"), emit: tsv + tuple val(meta), path("*.log") , emit: log, optional: true + tuple val("${task.process}"), val('modkit'), eval("modkit --version | sed 's/modkit //'"), emit: versions_modkit, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : '' + def out_suffix = args.tokenize().contains('--bgzf') ? 'tsv.gz' : 'tsv' + """ + modkit \\ + extract \\ + calls \\ + $args \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${bam} \\ + ${prefix}.${out_suffix} + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def out_suffix = args.tokenize().contains('--bgzf') ? 'tsv.gz' : 'tsv' + """ + touch ${prefix}.${out_suffix} + """ +} diff --git a/modules/nf-core/modkit/extract/calls/meta.yml b/modules/nf-core/modkit/extract/calls/meta.yml new file mode 100644 index 00000000000..7b86e4d56c0 --- /dev/null +++ b/modules/nf-core/modkit/extract/calls/meta.yml @@ -0,0 +1,115 @@ +name: modkit_extract_calls +description: | + Produce a per-read per-position table of base modification **calls** + (pass/fail/filtered, with the called base) from a modBAM using the same + thresholding algorithm as `modkit pileup`. Complementary to `modkit + extract full`, which emits raw probabilities: `extract calls` emits the + thresholded categorical decision per site per read. +keywords: + - modkit + - methylation + - extract + - calls + - read-level + - modbam + - nanopore + - ont +tools: + - "modkit": + description: A bioinformatics tool for working with modified bases in Oxford Nanopore + sequencing data. + homepage: https://github.com/nanoporetech/modkit + documentation: https://nanoporetech.github.io/modkit/ + tool_dev_url: https://github.com/nanoporetech/modkit + licence: + - "Oxford Nanopore Technologies PLC. Public License Version 1.0" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]`. The output inherits this meta. + - bam: + type: file + description: Input modBAM with MM/ML tags. + pattern: "*.{bam,cram}" + ontologies: + - edam: http://edamontology.org/format_2572 + - bai: + type: file + description: BAM index (`.bai` or `.csi`). + pattern: "*.{bai,csi}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'mm10' ]`. May be `[[], [], []]` to skip — reference + context columns will then be "." in the output. + - fasta: + type: file + description: Optional reference FASTA. Required only to populate reference-context + columns in the output. + pattern: "*.{fa,fasta,fna}" + ontologies: + - edam: http://edamontology.org/format_1929 + - fai: + type: file + description: Samtools FASTA index for `fasta`. + pattern: "*.fai" + ontologies: + - edam: http://edamontology.org/format_3475 +output: + tsv: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]`. + - "*.tsv{,.gz}": + type: file + description: | + Per-read per-position call table. BGZF-compressed when `--bgzf` + is passed via `ext.args`. + pattern: "*.{tsv,tsv.gz}" + ontologies: + - edam: http://edamontology.org/format_3475 + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]`. + - "*.log": + type: file + description: | + Optional modkit debug log (only emitted when `--log-filepath + .log` is passed via `ext.args`). + pattern: "*.log" + ontologies: [] + versions_modkit: + - - ${task.process}: + type: string + description: The name of the process + - modkit: + type: string + description: The name of the tool + - modkit --version | sed 's/modkit //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - modkit: + type: string + description: The name of the tool + - modkit --version | sed 's/modkit //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@sahuno" +maintainers: + - "@sahuno" diff --git a/modules/nf-core/modkit/extract/calls/tests/main.nf.test b/modules/nf-core/modkit/extract/calls/tests/main.nf.test new file mode 100644 index 00000000000..3aadd7fc0bf --- /dev/null +++ b/modules/nf-core/modkit/extract/calls/tests/main.nf.test @@ -0,0 +1,72 @@ +nextflow_process { + + name "Test Process MODKIT_EXTRACT_CALLS" + script "../main.nf" + process "MODKIT_EXTRACT_CALLS" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "modkit" + tag "modkit/extract" + tag "modkit/extract/calls" + + test("homo sapiens - nanopore modbam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id: 'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo sapiens - nanopore modbam") { + + when { + process { + """ + input[0] = [ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.phased.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id: 'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.tsv }, + { assert path(process.out.tsv[0][1]).exists() }, + { assert path(process.out.tsv[0][1]).size() > 0 }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/modkit/extract/calls/tests/main.nf.test.snap b/modules/nf-core/modkit/extract/calls/tests/main.nf.test.snap new file mode 100644 index 00000000000..6d86deffa5b --- /dev/null +++ b/modules/nf-core/modkit/extract/calls/tests/main.nf.test.snap @@ -0,0 +1,96 @@ +{ + "homo sapiens - nanopore modbam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,0ce2e5a6bf0889aaf8cbd682e2b17acb" + ] + ], + "1": [ + + ], + "2": [ + [ + "MODKIT_EXTRACT_CALLS", + "modkit", + "0.6.1" + ] + ], + "log": [ + + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,0ce2e5a6bf0889aaf8cbd682e2b17acb" + ] + ], + "versions_modkit": [ + [ + "MODKIT_EXTRACT_CALLS", + "modkit", + "0.6.1" + ] + ] + } + ], + "timestamp": "2026-04-23T22:05:37.027585203", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.04.6" + } + }, + "homo sapiens - nanopore modbam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + [ + "MODKIT_EXTRACT_CALLS", + "modkit", + "0.6.1" + ] + ], + "log": [ + + ], + "tsv": [ + [ + { + "id": "test" + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_modkit": [ + [ + "MODKIT_EXTRACT_CALLS", + "modkit", + "0.6.1" + ] + ] + } + ], + "timestamp": "2026-04-23T22:05:22.616980777", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.04.6" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/modkit/extract/calls/tests/nextflow.config b/modules/nf-core/modkit/extract/calls/tests/nextflow.config new file mode 100644 index 00000000000..4c3353e86e4 --- /dev/null +++ b/modules/nf-core/modkit/extract/calls/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MODKIT_EXTRACT_CALLS' { + ext.args = '' + } +}