From 5f3baa4b4ac013d5a735f796803dd1171e276e25 Mon Sep 17 00:00:00 2001 From: Anabella Trigila <18577080+atrigila@users.noreply.github.com> Date: Wed, 15 Apr 2026 18:45:15 -0300 Subject: [PATCH] add quilt2 sbwf --- .../nf-core/bam_impute_quilt2/main.nf | 80 +++++++++ .../nf-core/bam_impute_quilt2/meta.yml | 131 +++++++++++++++ .../bam_impute_quilt2/tests/main.nf.test | 155 ++++++++++++++++++ .../bam_impute_quilt2/tests/main.nf.test.snap | 105 ++++++++++++ .../bam_impute_quilt2/tests/nextflow.config | 16 ++ 5 files changed, 487 insertions(+) create mode 100644 subworkflows/nf-core/bam_impute_quilt2/main.nf create mode 100644 subworkflows/nf-core/bam_impute_quilt2/meta.yml create mode 100644 subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test create mode 100644 subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/bam_impute_quilt2/tests/nextflow.config diff --git a/subworkflows/nf-core/bam_impute_quilt2/main.nf b/subworkflows/nf-core/bam_impute_quilt2/main.nf new file mode 100644 index 00000000000..0053051180b --- /dev/null +++ b/subworkflows/nf-core/bam_impute_quilt2/main.nf @@ -0,0 +1,80 @@ +include { QUILT_QUILT2 } from '../../../modules/nf-core/quilt/quilt2/main' +include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate/main' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main' + +workflow BAM_IMPUTE_QUILT2 { + take: + ch_input // channel (mandatory): [ meta, bam/cram, bai/crai, bampaths, bamnames ] + ch_reference_panel // channel (mandatory): [ meta, reference_vcf, reference_index ] + ch_chunks // channel (optional): [ meta, chr, start, end ] + ch_map // channel (optional): [ meta, genetic_map ] + ch_fasta // channel (optional): [ meta, fasta, fai ] + n_gen // integer: Number of generations since founding or mixing + buffer // integer: Buffer of region to perform imputation over + + main: + + ch_versions = channel.empty() + + ch_parameters = ch_reference_panel + .combine(ch_map, by: 0) + .combine(ch_chunks, by: 0) + + ch_parameters.ifEmpty { + error("ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input.") + } + + ch_bam_params = ch_input + .combine(ch_parameters) + .map { meta_input, bam, bai, bampath, bamname, meta_panel, reference_vcf, reference_index, genetic_map, chr, start, end -> + def regionout = "${chr}" + if (start != [] && end != []) { + regionout = "${chr}:${start}-${end}" + } + + [ + meta_panel + meta_input + [regionout: regionout], + bam, + bai, + bampath, + bamname, + reference_vcf, + reference_index, + [], + [], + [], + chr, + start, + end, + n_gen, + buffer, + genetic_map, + ] + } + + QUILT_QUILT2(ch_bam_params, ch_fasta) + ch_versions = ch_versions.mix(QUILT_QUILT2.out.versions_r_quilt) + ch_versions = ch_versions.mix(QUILT_QUILT2.out.versions_r_base) + + ligate_input = QUILT_QUILT2.out.vcf + .join(QUILT_QUILT2.out.tbi) + .map { meta, vcf, index -> + def keys_to_keep = meta.keySet() - ['regionout'] + [meta.subMap(keys_to_keep), vcf, index] + } + .groupTuple() + + GLIMPSE2_LIGATE(ligate_input) + + BCFTOOLS_INDEX(GLIMPSE2_LIGATE.out.merged_variants) + + ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants.join( + BCFTOOLS_INDEX.out.tbi.mix(BCFTOOLS_INDEX.out.csi), + failOnMismatch: true, + failOnDuplicate: true, + ) + + emit: + vcf_index = ch_vcf_index // channel: [ meta, vcf, tbi/csi ] + versions = ch_versions // channel: [ versions.yml ] or topic-based version tuples +} diff --git a/subworkflows/nf-core/bam_impute_quilt2/meta.yml b/subworkflows/nf-core/bam_impute_quilt2/meta.yml new file mode 100644 index 00000000000..73b470ceb88 --- /dev/null +++ b/subworkflows/nf-core/bam_impute_quilt2/meta.yml @@ -0,0 +1,131 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bam_impute_quilt2" +description: | + Impute low-coverage BAM or CRAM inputs with QUILT2 and ligate chunked outputs per chromosome. +keywords: + - bam + - cram + - imputation + - quilt + - quilt2 + - vcf +components: + - quilt/quilt2 + - glimpse2/ligate + - bcftools/index +input: + - ch_input: + description: | + Channel with target sequencing data and optional rename files. + structure: + - meta: + type: map + description: Metadata map for the input batch. + - bam_or_cram: + type: file + description: Input BAM or CRAM file, or a list of BAM or CRAM files. + pattern: "*.{bam,cram}" + - index: + type: file + description: Index for the BAM or CRAM input. + pattern: "*.{bai,crai}" + - bampaths: + type: file + description: | + Optional text file listing BAM or CRAM paths to impute. + One file per line. + pattern: "*.{txt,tsv}" + - bamnames: + type: file + description: | + Optional text file listing sample names in the same order as `bampaths`. + One file per line. + pattern: "*.{txt,tsv}" + - ch_reference_panel: + description: | + Channel with phased reference panel VCFs per chromosome. + structure: + - meta: + type: map + description: | + Metadata map that will be combined with the input metadata. + Must include `id` for the panel name and `chr` for the chromosome. + - vcf: + type: file + description: Reference panel VCF or BCF. + pattern: "*.{vcf,vcf.gz,bcf}" + - index: + type: file + description: Index for the reference panel VCF or BCF. + pattern: "*.{tbi,csi}" + - ch_chunks: + description: | + Channel with optional imputation chunks per chromosome. + structure: + - meta: + type: map + description: Metadata map with the panel id and chromosome. + - chr: + type: string + description: Chromosome name. + - start: + type: integer + description: Start position of the chunk. + - end: + type: integer + description: End position of the chunk. + - ch_map: + description: | + Channel with optional genetic maps per chromosome. + structure: + - meta: + type: map + description: Metadata map with the panel id and chromosome. + - map: + type: file + description: Genetic map used by QUILT2. + pattern: "*.{txt,map}{,gz}" + - ch_fasta: + description: | + Channel with optional reference FASTA data, required for CRAM inputs. + structure: + - meta: + type: map + description: Metadata map for the reference genome. + - fasta: + type: file + description: Reference genome FASTA. + pattern: "*.{fa,fasta}" + - fai: + type: file + description: FASTA index file. + pattern: "*.fai" + - n_gen: + type: integer + description: Number of generations since founding or mixing. + - buffer: + type: integer + description: Buffer of region to perform imputation over. +output: + - vcf_index: + description: | + Imputed and indexed VCFs after ligating chunked outputs per chromosome. + structure: + - meta: + type: map + description: Metadata map combined from the input batch and panel metadata. + - vcf: + type: file + description: Imputed VCF file. + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - index: + type: file + description: Index for the imputed VCF or BCF file. + pattern: "*.{tbi,csi}" + - versions: + description: | + Collected software version information emitted by the constituent modules. +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test b/subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test new file mode 100644 index 00000000000..d66881ddd47 --- /dev/null +++ b/subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test @@ -0,0 +1,155 @@ +nextflow_workflow { + + name "Test Subworkflow BAM_IMPUTE_QUILT2" + script "../main.nf" + config "./nextflow.config" + + workflow "BAM_IMPUTE_QUILT2" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_impute_quilt2" + + tag "quilt" + tag "quilt/quilt2" + tag "bcftools" + tag "bcftools/index" + tag "glimpse2" + tag "glimpse2/ligate" + + test("Impute with quilt2 one individual, one region, map and fasta") { + when { + params { + quilt_args = "--save_prepared_reference=TRUE --seed=1" + } + workflow { + """ + bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true) + input[0] = channel.of([ + [id: "allid"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true) + ]) + .combine(bampath) + .combine(channel.of([[]])) + input[1] = channel.of([ + [id: "1000GP", chr: "chr22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists: true) + ]) + input[2] = channel.of( + [[id: "1000GP", chr: "chr22"], "chr22", "16570000", "16610000"] + ) + input[3] = channel.of([ + [id: "1000GP", chr: "chr22"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.stitch.map", checkIfExists: true) + ]) + input[4] = channel.of([ + [id: "GRCh38"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.fai", checkIfExists: true) + ]).collect() + input[5] = 100 + input[6] = 10000 + """ + } + } + then { + assert workflow.success + assert snapshot( + workflow.out.vcf_index.collect { meta, vcf, index -> [ + path(vcf).getFileName().toString(), + path(index).getFileName().toString(), + path(vcf).vcf.summary, + path(vcf).vcf.header.getGenotypeSamples().sort(), + path(vcf).vcf.variantsMD5 + ]}, + workflow.out.versions.collect { it instanceof String ? path(it).yaml : it } + ).match() + } + } + + test("homo_sapiens - empty channels - stub") { + options "-stub" + when { + params { + quilt_args = "--seed=1" + } + workflow { + """ + bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true) + bamname = channel.of("MySample1").collectFile(name: "bamname.txt", newLine: true) + ch_samples = channel.of([ + [id: "allid"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true) + ]) + input[0] = ch_samples.combine(bampath).combine(bamname) + input[1] = channel.of( + [[id: "1000GP", chr: "chr22"], [], []], + [[id: "1000GP", chr: "chr21"], [], []] + ) + input[2] = channel.of( + [[id: "1000GP", chr: "chr22"], "chr22", 16570065, 16592216], + [[id: "1000GP", chr: "chr22"], "chr22", 16592229, 16609999], + [[id: "1000GP", chr: "chr21"], "chr21", 16570065, 16592216], + [[id: "1000GP", chr: "chr21"], "chr21", 16592229, 16609999] + ) + input[3] = channel.of( + [[id: "1000GP", chr: "chr22"], []], + [[id: "1000GP", chr: "chr21"], []] + ) + input[4] = channel.of([[id: "GRCh38"], [], []]).collect() + input[5] = 100 + input[6] = 10000 + """ + } + } + then { + assert workflow.success + assert snapshot(sanitizeOutput(workflow.out)).match() + } + } + + test("homo_sapiens - error empty join - stub") { + options "-stub" + when { + params { + quilt_args = "--seed=1" + } + workflow { + """ + bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true) + bamname = channel.of("MySample1").collectFile(name: "bamname.txt", newLine: true) + ch_samples = channel.of([ + [id: "allid"], + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true) + ]) + input[0] = ch_samples.combine(bampath).combine(bamname) + input[1] = channel.of( + [[id: "otherpanel", chr: "chr22"], [], []], + [[id: "1000GP", chr: "chr21"], [], []] + ) + input[2] = channel.of( + [[id: "1000GP", chr: "chr22"], "chr22", 16570065, 16592216], + [[id: "1000GP", chr: "chr22"], "chr22", 16592229, 16609999], + [[id: "1000GP", chr: "chr21"], "chr21", 16570065, 16592216], + [[id: "1000GP", chr: "chr21"], "chr21", 16592229, 16609999] + ) + input[3] = channel.of( + [[id: "1000GP", chr: "chr22"], []], + [[id: "otherpanel", chr: "chr21"], []] + ) + input[4] = channel.of([[id: "GRCh38"], [], []]).collect() + input[5] = 100 + input[6] = 10000 + """ + } + } + then { + assert workflow.failed + assert workflow.errorMessage.contains("ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input.") + } + } +} diff --git a/subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test.snap b/subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test.snap new file mode 100644 index 00000000000..0d6a47e7cd7 --- /dev/null +++ b/subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test.snap @@ -0,0 +1,105 @@ +{ + "homo_sapiens - empty channels - stub": { + "content": [ + { + "vcf_index": [ + [ + { + "id": "allid", + "chr": "chr21" + }, + "allid_chr21.ligate.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "allid_chr21.ligate.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "allid", + "chr": "chr22" + }, + "allid_chr22.ligate.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "allid_chr22.ligate.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + [ + "BAM_IMPUTE_QUILT2:QUILT_QUILT2", + "r-base", + "4.4.3" + ], + [ + "BAM_IMPUTE_QUILT2:QUILT_QUILT2", + "r-base", + "4.4.3" + ], + [ + "BAM_IMPUTE_QUILT2:QUILT_QUILT2", + "r-base", + "4.4.3" + ], + [ + "BAM_IMPUTE_QUILT2:QUILT_QUILT2", + "r-base", + "4.4.3" + ], + [ + "BAM_IMPUTE_QUILT2:QUILT_QUILT2", + "r-quilt", + "2.0.4" + ], + [ + "BAM_IMPUTE_QUILT2:QUILT_QUILT2", + "r-quilt", + "2.0.4" + ], + [ + "BAM_IMPUTE_QUILT2:QUILT_QUILT2", + "r-quilt", + "2.0.4" + ], + [ + "BAM_IMPUTE_QUILT2:QUILT_QUILT2", + "r-quilt", + "2.0.4" + ] + ] + } + ], + "timestamp": "2026-04-15T18:38:49.393022606", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "Impute with quilt2 one individual, one region, map and fasta": { + "content": [ + [ + [ + "allid_chr22.ligate.vcf.gz", + "allid_chr22.ligate.vcf.gz.tbi", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=903, phased=true, phasedAutodetect=false]", + [ + "NA12878" + ], + "de865f8128d121d8ae4b5e4c3aa662e2" + ] + ], + [ + [ + "BAM_IMPUTE_QUILT2:QUILT_QUILT2", + "r-base", + "4.4.3" + ], + [ + "BAM_IMPUTE_QUILT2:QUILT_QUILT2", + "r-quilt", + "2.0.4" + ] + ] + ], + "timestamp": "2026-04-15T18:41:23.30801371", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_impute_quilt2/tests/nextflow.config b/subworkflows/nf-core/bam_impute_quilt2/tests/nextflow.config new file mode 100644 index 00000000000..30cbb8ab8f0 --- /dev/null +++ b/subworkflows/nf-core/bam_impute_quilt2/tests/nextflow.config @@ -0,0 +1,16 @@ +process { + withName: QUILT_QUILT2 { + cpus = 1 + ext.args = { "${params.quilt_args}" } + ext.prefix = { "${meta.id}_${meta.chr}_${meta.regionout}.quilt2" } + } + + withName: GLIMPSE2_LIGATE { + ext.prefix = { "${meta.id}_${meta.chr}.ligate" } + } + + withName: BCFTOOLS_INDEX { + ext.args = "--tbi" + publishDir = [enabled: false] + } +}