Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions subworkflows/nf-core/bam_impute_quilt2/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
include { QUILT_QUILT2 } from '../../../modules/nf-core/quilt/quilt2/main'
include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate/main'
include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main'

workflow BAM_IMPUTE_QUILT2 {
take:
ch_input // channel (mandatory): [ meta, bam/cram, bai/crai, bampaths, bamnames ]
ch_reference_panel // channel (mandatory): [ meta, reference_vcf, reference_index ]
ch_chunks // channel (optional): [ meta, chr, start, end ]
ch_map // channel (optional): [ meta, genetic_map ]
ch_fasta // channel (optional): [ meta, fasta, fai ]
n_gen // integer: Number of generations since founding or mixing
buffer // integer: Buffer of region to perform imputation over

main:

ch_versions = channel.empty()

ch_parameters = ch_reference_panel
.combine(ch_map, by: 0)
.combine(ch_chunks, by: 0)

ch_parameters.ifEmpty {
error("ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input.")
}

ch_bam_params = ch_input
.combine(ch_parameters)
.map { meta_input, bam, bai, bampath, bamname, meta_panel, reference_vcf, reference_index, genetic_map, chr, start, end ->
def regionout = "${chr}"
if (start != [] && end != []) {
regionout = "${chr}:${start}-${end}"
}

[
meta_panel + meta_input + [regionout: regionout],
bam,
bai,
bampath,
bamname,
reference_vcf,
reference_index,
[],
[],
[],
chr,
start,
end,
n_gen,
buffer,
genetic_map,
]
}

QUILT_QUILT2(ch_bam_params, ch_fasta)
ch_versions = ch_versions.mix(QUILT_QUILT2.out.versions_r_quilt)
ch_versions = ch_versions.mix(QUILT_QUILT2.out.versions_r_base)
Comment on lines +56 to +57
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't QUILT2 use topics for that ?


ligate_input = QUILT_QUILT2.out.vcf
.join(QUILT_QUILT2.out.tbi)
.map { meta, vcf, index ->
def keys_to_keep = meta.keySet() - ['regionout']
[meta.subMap(keys_to_keep), vcf, index]
}
.groupTuple()

GLIMPSE2_LIGATE(ligate_input)

BCFTOOLS_INDEX(GLIMPSE2_LIGATE.out.merged_variants)

ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants.join(
BCFTOOLS_INDEX.out.tbi.mix(BCFTOOLS_INDEX.out.csi),
failOnMismatch: true,
failOnDuplicate: true,
)

emit:
vcf_index = ch_vcf_index // channel: [ meta, vcf, tbi/csi ]
versions = ch_versions // channel: [ versions.yml ] or topic-based version tuples
}
131 changes: 131 additions & 0 deletions subworkflows/nf-core/bam_impute_quilt2/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
name: "bam_impute_quilt2"
description: |
Impute low-coverage BAM or CRAM inputs with QUILT2 and ligate chunked outputs per chromosome.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you just add a line to explain that the regionout key will be used to store temporarily the region and therefore shouldn't be used in the meta maps ?

keywords:
- bam
- cram
- imputation
- quilt
- quilt2
- vcf
components:
- quilt/quilt2
- glimpse2/ligate
- bcftools/index
input:
- ch_input:
description: |
Channel with target sequencing data and optional rename files.
structure:
- meta:
type: map
description: Metadata map for the input batch.
- bam_or_cram:
type: file
description: Input BAM or CRAM file, or a list of BAM or CRAM files.
pattern: "*.{bam,cram}"
- index:
type: file
description: Index for the BAM or CRAM input.
pattern: "*.{bai,crai}"
- bampaths:
type: file
description: |
Optional text file listing BAM or CRAM paths to impute.
One file per line.
pattern: "*.{txt,tsv}"
- bamnames:
type: file
description: |
Optional text file listing sample names in the same order as `bampaths`.
One file per line.
pattern: "*.{txt,tsv}"
- ch_reference_panel:
description: |
Channel with phased reference panel VCFs per chromosome.
structure:
- meta:
type: map
description: |
Metadata map that will be combined with the input metadata.
Must include `id` for the panel name and `chr` for the chromosome.
- vcf:
type: file
description: Reference panel VCF or BCF.
pattern: "*.{vcf,vcf.gz,bcf}"
- index:
type: file
description: Index for the reference panel VCF or BCF.
pattern: "*.{tbi,csi}"
- ch_chunks:
description: |
Channel with optional imputation chunks per chromosome.
structure:
- meta:
type: map
description: Metadata map with the panel id and chromosome.
- chr:
type: string
description: Chromosome name.
- start:
type: integer
description: Start position of the chunk.
- end:
type: integer
description: End position of the chunk.
- ch_map:
description: |
Channel with optional genetic maps per chromosome.
structure:
- meta:
type: map
description: Metadata map with the panel id and chromosome.
- map:
type: file
description: Genetic map used by QUILT2.
pattern: "*.{txt,map}{,gz}"
- ch_fasta:
description: |
Channel with optional reference FASTA data, required for CRAM inputs.
structure:
- meta:
type: map
description: Metadata map for the reference genome.
- fasta:
type: file
description: Reference genome FASTA.
pattern: "*.{fa,fasta}"
- fai:
type: file
description: FASTA index file.
pattern: "*.fai"
- n_gen:
type: integer
description: Number of generations since founding or mixing.
- buffer:
type: integer
description: Buffer of region to perform imputation over.
output:
- vcf_index:
description: |
Imputed and indexed VCFs after ligating chunked outputs per chromosome.
structure:
- meta:
type: map
description: Metadata map combined from the input batch and panel metadata.
- vcf:
type: file
description: Imputed VCF file.
pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}"
- index:
type: file
description: Index for the imputed VCF or BCF file.
pattern: "*.{tbi,csi}"
- versions:
description: |
Collected software version information emitted by the constituent modules.
authors:
- "@atrigila"
maintainers:
- "@atrigila"
155 changes: 155 additions & 0 deletions subworkflows/nf-core/bam_impute_quilt2/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
nextflow_workflow {

name "Test Subworkflow BAM_IMPUTE_QUILT2"
script "../main.nf"
config "./nextflow.config"

workflow "BAM_IMPUTE_QUILT2"

tag "subworkflows"
tag "subworkflows_nfcore"
tag "subworkflows/bam_impute_quilt2"

tag "quilt"
tag "quilt/quilt2"
tag "bcftools"
tag "bcftools/index"
tag "glimpse2"
tag "glimpse2/ligate"

test("Impute with quilt2 one individual, one region, map and fasta") {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add the optional bamnames input file to ensure it does work properly.
This file will be necessary for phaseimpute to work with simulated data at the validation step.

when {
params {
quilt_args = "--save_prepared_reference=TRUE --seed=1"
}
workflow {
"""
bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true)
input[0] = channel.of([
[id: "allid"],
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true)
])
.combine(bampath)
.combine(channel.of([[]]))
input[1] = channel.of([
[id: "1000GP", chr: "chr22"],
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists: true)
])
input[2] = channel.of(
[[id: "1000GP", chr: "chr22"], "chr22", "16570000", "16610000"]
)
input[3] = channel.of([
[id: "1000GP", chr: "chr22"],
file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.stitch.map", checkIfExists: true)
])
input[4] = channel.of([
[id: "GRCh38"],
file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.fai", checkIfExists: true)
]).collect()
input[5] = 100
input[6] = 10000
"""
}
}
then {
assert workflow.success
assert snapshot(
workflow.out.vcf_index.collect { meta, vcf, index -> [
path(vcf).getFileName().toString(),
path(index).getFileName().toString(),
path(vcf).vcf.summary,
path(vcf).vcf.header.getGenotypeSamples().sort(),
path(vcf).vcf.variantsMD5
]},
workflow.out.versions.collect { it instanceof String ? path(it).yaml : it }
).match()
}
}

test("homo_sapiens - empty channels - stub") {
options "-stub"
when {
params {
quilt_args = "--seed=1"
}
workflow {
"""
bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true)
bamname = channel.of("MySample1").collectFile(name: "bamname.txt", newLine: true)
ch_samples = channel.of([
[id: "allid"],
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true)
])
input[0] = ch_samples.combine(bampath).combine(bamname)
input[1] = channel.of(
[[id: "1000GP", chr: "chr22"], [], []],
[[id: "1000GP", chr: "chr21"], [], []]
)
input[2] = channel.of(
[[id: "1000GP", chr: "chr22"], "chr22", 16570065, 16592216],
[[id: "1000GP", chr: "chr22"], "chr22", 16592229, 16609999],
[[id: "1000GP", chr: "chr21"], "chr21", 16570065, 16592216],
[[id: "1000GP", chr: "chr21"], "chr21", 16592229, 16609999]
)
input[3] = channel.of(
[[id: "1000GP", chr: "chr22"], []],
[[id: "1000GP", chr: "chr21"], []]
)
input[4] = channel.of([[id: "GRCh38"], [], []]).collect()
input[5] = 100
input[6] = 10000
"""
}
}
then {
assert workflow.success
assert snapshot(sanitizeOutput(workflow.out)).match()
}
}

test("homo_sapiens - error empty join - stub") {
options "-stub"
when {
params {
quilt_args = "--seed=1"
}
workflow {
"""
bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true)
bamname = channel.of("MySample1").collectFile(name: "bamname.txt", newLine: true)
ch_samples = channel.of([
[id: "allid"],
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true),
file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true)
])
input[0] = ch_samples.combine(bampath).combine(bamname)
input[1] = channel.of(
[[id: "otherpanel", chr: "chr22"], [], []],
[[id: "1000GP", chr: "chr21"], [], []]
)
input[2] = channel.of(
[[id: "1000GP", chr: "chr22"], "chr22", 16570065, 16592216],
[[id: "1000GP", chr: "chr22"], "chr22", 16592229, 16609999],
[[id: "1000GP", chr: "chr21"], "chr21", 16570065, 16592216],
[[id: "1000GP", chr: "chr21"], "chr21", 16592229, 16609999]
)
input[3] = channel.of(
[[id: "1000GP", chr: "chr22"], []],
[[id: "otherpanel", chr: "chr21"], []]
)
input[4] = channel.of([[id: "GRCh38"], [], []]).collect()
input[5] = 100
input[6] = 10000
"""
}
}
then {
assert workflow.failed
assert workflow.errorMessage.contains("ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input.")
}
}
}
Loading
Loading