nf-core · atrigila · Apr 15, 2026 · LouisLeNezet · Apr 20, 2026 · LouisLeNezet
diff --git a/subworkflows/nf-core/bam_impute_quilt2/main.nf b/subworkflows/nf-core/bam_impute_quilt2/main.nf
@@ -0,0 +1,80 @@
+include { QUILT_QUILT2    } from '../../../modules/nf-core/quilt/quilt2/main'
+include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate/main'
+include { BCFTOOLS_INDEX  } from '../../../modules/nf-core/bcftools/index/main'
+
+workflow BAM_IMPUTE_QUILT2 {
+    take:
+    ch_input           // channel (mandatory): [ meta, bam/cram, bai/crai, bampaths, bamnames ]
+    ch_reference_panel // channel (mandatory): [ meta, reference_vcf, reference_index ]
+    ch_chunks          // channel (optional):  [ meta, chr, start, end ]
+    ch_map             // channel (optional):  [ meta, genetic_map ]
+    ch_fasta           // channel (optional):  [ meta, fasta, fai ]
+    n_gen              // integer: Number of generations since founding or mixing
+    buffer             // integer: Buffer of region to perform imputation over
+
+    main:
+
+    ch_versions = channel.empty()
+
+    ch_parameters = ch_reference_panel
+        .combine(ch_map, by: 0)
+        .combine(ch_chunks, by: 0)
+
+    ch_parameters.ifEmpty {
+        error("ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input.")
+    }
+
+    ch_bam_params = ch_input
+        .combine(ch_parameters)
+        .map { meta_input, bam, bai, bampath, bamname, meta_panel, reference_vcf, reference_index, genetic_map, chr, start, end ->
+            def regionout = "${chr}"
+            if (start != [] && end != []) {
+                regionout = "${chr}:${start}-${end}"
+            }
+
+            [
+                meta_panel + meta_input + [regionout: regionout],
+                bam,
+                bai,
+                bampath,
+                bamname,
+                reference_vcf,
+                reference_index,
+                [],
+                [],
+                [],
+                chr,
+                start,
+                end,
+                n_gen,
+                buffer,
+                genetic_map,
+            ]
+        }
+
+    QUILT_QUILT2(ch_bam_params, ch_fasta)
+    ch_versions = ch_versions.mix(QUILT_QUILT2.out.versions_r_quilt)
+    ch_versions = ch_versions.mix(QUILT_QUILT2.out.versions_r_base)
+
+    ligate_input = QUILT_QUILT2.out.vcf
+        .join(QUILT_QUILT2.out.tbi)
+        .map { meta, vcf, index ->
+            def keys_to_keep = meta.keySet() - ['regionout']
+            [meta.subMap(keys_to_keep), vcf, index]
+        }
+        .groupTuple()
+
+    GLIMPSE2_LIGATE(ligate_input)
+
+    BCFTOOLS_INDEX(GLIMPSE2_LIGATE.out.merged_variants)
+
+    ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants.join(
+        BCFTOOLS_INDEX.out.tbi.mix(BCFTOOLS_INDEX.out.csi),
+        failOnMismatch: true,
+        failOnDuplicate: true,
+    )
+
+    emit:
+    vcf_index = ch_vcf_index // channel: [ meta, vcf, tbi/csi ]
+    versions  = ch_versions  // channel: [ versions.yml ] or topic-based version tuples
+}
diff --git a/subworkflows/nf-core/bam_impute_quilt2/meta.yml b/subworkflows/nf-core/bam_impute_quilt2/meta.yml
@@ -0,0 +1,131 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "bam_impute_quilt2"
+description: |
+  Impute low-coverage BAM or CRAM inputs with QUILT2 and ligate chunked outputs per chromosome.
+keywords:
+  - bam
+  - cram
+  - imputation
+  - quilt
+  - quilt2
+  - vcf
+components:
+  - quilt/quilt2
+  - glimpse2/ligate
+  - bcftools/index
+input:
+  - ch_input:
+      description: |
+        Channel with target sequencing data and optional rename files.
+      structure:
+        - meta:
+            type: map
+            description: Metadata map for the input batch.
+        - bam_or_cram:
+            type: file
+            description: Input BAM or CRAM file, or a list of BAM or CRAM files.
+            pattern: "*.{bam,cram}"
+        - index:
+            type: file
+            description: Index for the BAM or CRAM input.
+            pattern: "*.{bai,crai}"
+        - bampaths:
+            type: file
+            description: |
+              Optional text file listing BAM or CRAM paths to impute.
+              One file per line.
+            pattern: "*.{txt,tsv}"
+        - bamnames:
+            type: file
+            description: |
+              Optional text file listing sample names in the same order as `bampaths`.
+              One file per line.
+            pattern: "*.{txt,tsv}"
+  - ch_reference_panel:
+      description: |
+        Channel with phased reference panel VCFs per chromosome.
+      structure:
+        - meta:
+            type: map
+            description: |
+              Metadata map that will be combined with the input metadata.
+              Must include `id` for the panel name and `chr` for the chromosome.
+        - vcf:
+            type: file
+            description: Reference panel VCF or BCF.
+            pattern: "*.{vcf,vcf.gz,bcf}"
+        - index:
+            type: file
+            description: Index for the reference panel VCF or BCF.
+            pattern: "*.{tbi,csi}"
+  - ch_chunks:
+      description: |
+        Channel with optional imputation chunks per chromosome.
+      structure:
+        - meta:
+            type: map
+            description: Metadata map with the panel id and chromosome.
+        - chr:
+            type: string
+            description: Chromosome name.
+        - start:
+            type: integer
+            description: Start position of the chunk.
+        - end:
+            type: integer
+            description: End position of the chunk.
+  - ch_map:
+      description: |
+        Channel with optional genetic maps per chromosome.
+      structure:
+        - meta:
+            type: map
+            description: Metadata map with the panel id and chromosome.
+        - map:
+            type: file
+            description: Genetic map used by QUILT2.
+            pattern: "*.{txt,map}{,gz}"
+  - ch_fasta:
+      description: |
+        Channel with optional reference FASTA data, required for CRAM inputs.
+      structure:
+        - meta:
+            type: map
+            description: Metadata map for the reference genome.
+        - fasta:
+            type: file
+            description: Reference genome FASTA.
+            pattern: "*.{fa,fasta}"
+        - fai:
+            type: file
+            description: FASTA index file.
+            pattern: "*.fai"
+  - n_gen:
+      type: integer
+      description: Number of generations since founding or mixing.
+  - buffer:
+      type: integer
+      description: Buffer of region to perform imputation over.
+output:
+  - vcf_index:
+      description: |
+        Imputed and indexed VCFs after ligating chunked outputs per chromosome.
+      structure:
+        - meta:
+            type: map
+            description: Metadata map combined from the input batch and panel metadata.
+        - vcf:
+            type: file
+            description: Imputed VCF file.
+            pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}"
+        - index:
+            type: file
+            description: Index for the imputed VCF or BCF file.
+            pattern: "*.{tbi,csi}"
+  - versions:
+      description: |
+        Collected software version information emitted by the constituent modules.
+authors:
+  - "@atrigila"
+maintainers:
+  - "@atrigila"
diff --git a/subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test b/subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test
@@ -0,0 +1,155 @@
+nextflow_workflow {
+
+    name "Test Subworkflow BAM_IMPUTE_QUILT2"
+    script "../main.nf"
+    config "./nextflow.config"
+
+    workflow "BAM_IMPUTE_QUILT2"
+
+    tag "subworkflows"
+    tag "subworkflows_nfcore"
+    tag "subworkflows/bam_impute_quilt2"
+
+    tag "quilt"
+    tag "quilt/quilt2"
+    tag "bcftools"
+    tag "bcftools/index"
+    tag "glimpse2"
+    tag "glimpse2/ligate"
+
+    test("Impute with quilt2 one individual, one region, map and fasta") {
+        when {
+            params {
+                quilt_args = "--save_prepared_reference=TRUE --seed=1"
+            }
+            workflow {
+                """
+                bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true)
+                input[0] = channel.of([
+                    [id: "allid"],
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true),
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true)
+                ])
+                .combine(bampath)
+                .combine(channel.of([[]]))
+                input[1] = channel.of([
+                    [id: "1000GP", chr: "chr22"],
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists: true),
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists: true)
+                ])
+                input[2] = channel.of(
+                    [[id: "1000GP", chr: "chr22"], "chr22", "16570000", "16610000"]
+                )
+                input[3] = channel.of([
+                    [id: "1000GP", chr: "chr22"],
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.stitch.map", checkIfExists: true)
+                ])
+                input[4] = channel.of([
+                    [id: "GRCh38"],
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true),
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.fai", checkIfExists: true)
+                ]).collect()
+                input[5] = 100
+                input[6] = 10000
+                """
+            }
+        }
+        then {
+            assert workflow.success
+            assert snapshot(
+                workflow.out.vcf_index.collect { meta, vcf, index -> [
+                    path(vcf).getFileName().toString(),
+                    path(index).getFileName().toString(),
+                    path(vcf).vcf.summary,
+                    path(vcf).vcf.header.getGenotypeSamples().sort(),
+                    path(vcf).vcf.variantsMD5
+                ]},
+                workflow.out.versions.collect { it instanceof String ? path(it).yaml : it }
+            ).match()
+        }
+    }
+
+    test("homo_sapiens - empty channels - stub") {
+        options "-stub"
+        when {
+            params {
+                quilt_args = "--seed=1"
+            }
+            workflow {
+                """
+                bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true)
+                bamname = channel.of("MySample1").collectFile(name: "bamname.txt", newLine: true)
+                ch_samples = channel.of([
+                    [id: "allid"],
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true),
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true)
+                ])
+                input[0] = ch_samples.combine(bampath).combine(bamname)
+                input[1] = channel.of(
+                    [[id: "1000GP", chr: "chr22"], [], []],
+                    [[id: "1000GP", chr: "chr21"], [], []]
+                )
+                input[2] = channel.of(
+                    [[id: "1000GP", chr: "chr22"], "chr22", 16570065, 16592216],
+                    [[id: "1000GP", chr: "chr22"], "chr22", 16592229, 16609999],
+                    [[id: "1000GP", chr: "chr21"], "chr21", 16570065, 16592216],
+                    [[id: "1000GP", chr: "chr21"], "chr21", 16592229, 16609999]
+                )
+                input[3] = channel.of(
+                    [[id: "1000GP", chr: "chr22"], []],
+                    [[id: "1000GP", chr: "chr21"], []]
+                )
+                input[4] = channel.of([[id: "GRCh38"], [], []]).collect()
+                input[5] = 100
+                input[6] = 10000
+                """
+            }
+        }
+        then {
+            assert workflow.success
+            assert snapshot(sanitizeOutput(workflow.out)).match()
+        }
+    }
+
+    test("homo_sapiens - error empty join - stub") {
+        options "-stub"
+        when {
+            params {
+                quilt_args = "--seed=1"
+            }
+            workflow {
+                """
+                bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true)
+                bamname = channel.of("MySample1").collectFile(name: "bamname.txt", newLine: true)
+                ch_samples = channel.of([
+                    [id: "allid"],
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true),
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true)
+                ])
+                input[0] = ch_samples.combine(bampath).combine(bamname)
+                input[1] = channel.of(
+                    [[id: "otherpanel", chr: "chr22"], [], []],
+                    [[id: "1000GP", chr: "chr21"], [], []]
+                )
+                input[2] = channel.of(
+                    [[id: "1000GP", chr: "chr22"], "chr22", 16570065, 16592216],
+                    [[id: "1000GP", chr: "chr22"], "chr22", 16592229, 16609999],
+                    [[id: "1000GP", chr: "chr21"], "chr21", 16570065, 16592216],
+                    [[id: "1000GP", chr: "chr21"], "chr21", 16592229, 16609999]
+                )
+                input[3] = channel.of(
+                    [[id: "1000GP", chr: "chr22"], []],
+                    [[id: "otherpanel", chr: "chr21"], []]
+                )
+                input[4] = channel.of([[id: "GRCh38"], [], []]).collect()
+                input[5] = 100
+                input[6] = 10000
+                """
+            }
+        }
+        then {
+            assert workflow.failed
+            assert workflow.errorMessage.contains("ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input.")
+        }
+    }
+}