-
Notifications
You must be signed in to change notification settings - Fork 1k
add quilt2 sbwf #11223
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
add quilt2 sbwf #11223
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| include { QUILT_QUILT2 } from '../../../modules/nf-core/quilt/quilt2/main' | ||
| include { GLIMPSE2_LIGATE } from '../../../modules/nf-core/glimpse2/ligate/main' | ||
| include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main' | ||
|
|
||
| workflow BAM_IMPUTE_QUILT2 { | ||
| take: | ||
| ch_input // channel (mandatory): [ meta, bam/cram, bai/crai, bampaths, bamnames ] | ||
| ch_reference_panel // channel (mandatory): [ meta, reference_vcf, reference_index ] | ||
| ch_chunks // channel (optional): [ meta, chr, start, end ] | ||
| ch_map // channel (optional): [ meta, genetic_map ] | ||
| ch_fasta // channel (optional): [ meta, fasta, fai ] | ||
| n_gen // integer: Number of generations since founding or mixing | ||
| buffer // integer: Buffer of region to perform imputation over | ||
|
|
||
| main: | ||
|
|
||
| ch_versions = channel.empty() | ||
|
|
||
| ch_parameters = ch_reference_panel | ||
| .combine(ch_map, by: 0) | ||
| .combine(ch_chunks, by: 0) | ||
|
|
||
| ch_parameters.ifEmpty { | ||
| error("ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input.") | ||
| } | ||
|
|
||
| ch_bam_params = ch_input | ||
| .combine(ch_parameters) | ||
| .map { meta_input, bam, bai, bampath, bamname, meta_panel, reference_vcf, reference_index, genetic_map, chr, start, end -> | ||
| def regionout = "${chr}" | ||
| if (start != [] && end != []) { | ||
| regionout = "${chr}:${start}-${end}" | ||
| } | ||
|
|
||
| [ | ||
| meta_panel + meta_input + [regionout: regionout], | ||
| bam, | ||
| bai, | ||
| bampath, | ||
| bamname, | ||
| reference_vcf, | ||
| reference_index, | ||
| [], | ||
| [], | ||
| [], | ||
| chr, | ||
| start, | ||
| end, | ||
| n_gen, | ||
| buffer, | ||
| genetic_map, | ||
| ] | ||
| } | ||
|
|
||
| QUILT_QUILT2(ch_bam_params, ch_fasta) | ||
| ch_versions = ch_versions.mix(QUILT_QUILT2.out.versions_r_quilt) | ||
| ch_versions = ch_versions.mix(QUILT_QUILT2.out.versions_r_base) | ||
|
|
||
| ligate_input = QUILT_QUILT2.out.vcf | ||
| .join(QUILT_QUILT2.out.tbi) | ||
| .map { meta, vcf, index -> | ||
| def keys_to_keep = meta.keySet() - ['regionout'] | ||
| [meta.subMap(keys_to_keep), vcf, index] | ||
| } | ||
| .groupTuple() | ||
|
|
||
| GLIMPSE2_LIGATE(ligate_input) | ||
|
|
||
| BCFTOOLS_INDEX(GLIMPSE2_LIGATE.out.merged_variants) | ||
|
|
||
| ch_vcf_index = GLIMPSE2_LIGATE.out.merged_variants.join( | ||
| BCFTOOLS_INDEX.out.tbi.mix(BCFTOOLS_INDEX.out.csi), | ||
| failOnMismatch: true, | ||
| failOnDuplicate: true, | ||
| ) | ||
|
|
||
| emit: | ||
| vcf_index = ch_vcf_index // channel: [ meta, vcf, tbi/csi ] | ||
| versions = ch_versions // channel: [ versions.yml ] or topic-based version tuples | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,131 @@ | ||
| # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json | ||
| name: "bam_impute_quilt2" | ||
| description: | | ||
| Impute low-coverage BAM or CRAM inputs with QUILT2 and ligate chunked outputs per chromosome. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you just add a line to explain that the regionout key will be used to store temporarily the region and therefore shouldn't be used in the meta maps ? |
||
| keywords: | ||
| - bam | ||
| - cram | ||
| - imputation | ||
| - quilt | ||
| - quilt2 | ||
| - vcf | ||
| components: | ||
| - quilt/quilt2 | ||
| - glimpse2/ligate | ||
| - bcftools/index | ||
| input: | ||
| - ch_input: | ||
| description: | | ||
| Channel with target sequencing data and optional rename files. | ||
| structure: | ||
| - meta: | ||
| type: map | ||
| description: Metadata map for the input batch. | ||
| - bam_or_cram: | ||
| type: file | ||
| description: Input BAM or CRAM file, or a list of BAM or CRAM files. | ||
| pattern: "*.{bam,cram}" | ||
| - index: | ||
| type: file | ||
| description: Index for the BAM or CRAM input. | ||
| pattern: "*.{bai,crai}" | ||
| - bampaths: | ||
| type: file | ||
| description: | | ||
| Optional text file listing BAM or CRAM paths to impute. | ||
| One file per line. | ||
| pattern: "*.{txt,tsv}" | ||
| - bamnames: | ||
| type: file | ||
| description: | | ||
| Optional text file listing sample names in the same order as `bampaths`. | ||
| One file per line. | ||
| pattern: "*.{txt,tsv}" | ||
| - ch_reference_panel: | ||
| description: | | ||
| Channel with phased reference panel VCFs per chromosome. | ||
| structure: | ||
| - meta: | ||
| type: map | ||
| description: | | ||
| Metadata map that will be combined with the input metadata. | ||
| Must include `id` for the panel name and `chr` for the chromosome. | ||
| - vcf: | ||
| type: file | ||
| description: Reference panel VCF or BCF. | ||
| pattern: "*.{vcf,vcf.gz,bcf}" | ||
| - index: | ||
| type: file | ||
| description: Index for the reference panel VCF or BCF. | ||
| pattern: "*.{tbi,csi}" | ||
| - ch_chunks: | ||
| description: | | ||
| Channel with optional imputation chunks per chromosome. | ||
| structure: | ||
| - meta: | ||
| type: map | ||
| description: Metadata map with the panel id and chromosome. | ||
| - chr: | ||
| type: string | ||
| description: Chromosome name. | ||
| - start: | ||
| type: integer | ||
| description: Start position of the chunk. | ||
| - end: | ||
| type: integer | ||
| description: End position of the chunk. | ||
| - ch_map: | ||
| description: | | ||
| Channel with optional genetic maps per chromosome. | ||
| structure: | ||
| - meta: | ||
| type: map | ||
| description: Metadata map with the panel id and chromosome. | ||
| - map: | ||
| type: file | ||
| description: Genetic map used by QUILT2. | ||
| pattern: "*.{txt,map}{,gz}" | ||
| - ch_fasta: | ||
| description: | | ||
| Channel with optional reference FASTA data, required for CRAM inputs. | ||
| structure: | ||
| - meta: | ||
| type: map | ||
| description: Metadata map for the reference genome. | ||
| - fasta: | ||
| type: file | ||
| description: Reference genome FASTA. | ||
| pattern: "*.{fa,fasta}" | ||
| - fai: | ||
| type: file | ||
| description: FASTA index file. | ||
| pattern: "*.fai" | ||
| - n_gen: | ||
| type: integer | ||
| description: Number of generations since founding or mixing. | ||
| - buffer: | ||
| type: integer | ||
| description: Buffer of region to perform imputation over. | ||
| output: | ||
| - vcf_index: | ||
| description: | | ||
| Imputed and indexed VCFs after ligating chunked outputs per chromosome. | ||
| structure: | ||
| - meta: | ||
| type: map | ||
| description: Metadata map combined from the input batch and panel metadata. | ||
| - vcf: | ||
| type: file | ||
| description: Imputed VCF file. | ||
| pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" | ||
| - index: | ||
| type: file | ||
| description: Index for the imputed VCF or BCF file. | ||
| pattern: "*.{tbi,csi}" | ||
| - versions: | ||
| description: | | ||
| Collected software version information emitted by the constituent modules. | ||
| authors: | ||
| - "@atrigila" | ||
| maintainers: | ||
| - "@atrigila" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,155 @@ | ||
| nextflow_workflow { | ||
|
|
||
| name "Test Subworkflow BAM_IMPUTE_QUILT2" | ||
| script "../main.nf" | ||
| config "./nextflow.config" | ||
|
|
||
| workflow "BAM_IMPUTE_QUILT2" | ||
|
|
||
| tag "subworkflows" | ||
| tag "subworkflows_nfcore" | ||
| tag "subworkflows/bam_impute_quilt2" | ||
|
|
||
| tag "quilt" | ||
| tag "quilt/quilt2" | ||
| tag "bcftools" | ||
| tag "bcftools/index" | ||
| tag "glimpse2" | ||
| tag "glimpse2/ligate" | ||
|
|
||
| test("Impute with quilt2 one individual, one region, map and fasta") { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add the optional bamnames input file to ensure it does work properly. |
||
| when { | ||
| params { | ||
| quilt_args = "--save_prepared_reference=TRUE --seed=1" | ||
| } | ||
| workflow { | ||
| """ | ||
| bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true) | ||
| input[0] = channel.of([ | ||
| [id: "allid"], | ||
| file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true), | ||
| file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true) | ||
| ]) | ||
| .combine(bampath) | ||
| .combine(channel.of([[]])) | ||
| input[1] = channel.of([ | ||
| [id: "1000GP", chr: "chr22"], | ||
| file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz", checkIfExists: true), | ||
| file(params.modules_testdata_base_path + "genomics/homo_sapiens/popgen/1000GP.chr22.vcf.gz.csi", checkIfExists: true) | ||
| ]) | ||
| input[2] = channel.of( | ||
| [[id: "1000GP", chr: "chr22"], "chr22", "16570000", "16610000"] | ||
| ) | ||
| input[3] = channel.of([ | ||
| [id: "1000GP", chr: "chr22"], | ||
| file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genetic_map/genome.GRCh38.chr22.stitch.map", checkIfExists: true) | ||
| ]) | ||
| input[4] = channel.of([ | ||
| [id: "GRCh38"], | ||
| file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta", checkIfExists: true), | ||
| file(params.modules_testdata_base_path + "genomics/homo_sapiens/genome/genome.fasta.fai", checkIfExists: true) | ||
| ]).collect() | ||
| input[5] = 100 | ||
| input[6] = 10000 | ||
| """ | ||
| } | ||
| } | ||
| then { | ||
| assert workflow.success | ||
| assert snapshot( | ||
| workflow.out.vcf_index.collect { meta, vcf, index -> [ | ||
| path(vcf).getFileName().toString(), | ||
| path(index).getFileName().toString(), | ||
| path(vcf).vcf.summary, | ||
| path(vcf).vcf.header.getGenotypeSamples().sort(), | ||
| path(vcf).vcf.variantsMD5 | ||
| ]}, | ||
| workflow.out.versions.collect { it instanceof String ? path(it).yaml : it } | ||
| ).match() | ||
| } | ||
| } | ||
|
|
||
| test("homo_sapiens - empty channels - stub") { | ||
| options "-stub" | ||
| when { | ||
| params { | ||
| quilt_args = "--seed=1" | ||
| } | ||
| workflow { | ||
| """ | ||
| bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true) | ||
| bamname = channel.of("MySample1").collectFile(name: "bamname.txt", newLine: true) | ||
| ch_samples = channel.of([ | ||
| [id: "allid"], | ||
| file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true), | ||
| file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true) | ||
| ]) | ||
| input[0] = ch_samples.combine(bampath).combine(bamname) | ||
| input[1] = channel.of( | ||
| [[id: "1000GP", chr: "chr22"], [], []], | ||
| [[id: "1000GP", chr: "chr21"], [], []] | ||
| ) | ||
| input[2] = channel.of( | ||
| [[id: "1000GP", chr: "chr22"], "chr22", 16570065, 16592216], | ||
| [[id: "1000GP", chr: "chr22"], "chr22", 16592229, 16609999], | ||
| [[id: "1000GP", chr: "chr21"], "chr21", 16570065, 16592216], | ||
| [[id: "1000GP", chr: "chr21"], "chr21", 16592229, 16609999] | ||
| ) | ||
| input[3] = channel.of( | ||
| [[id: "1000GP", chr: "chr22"], []], | ||
| [[id: "1000GP", chr: "chr21"], []] | ||
| ) | ||
| input[4] = channel.of([[id: "GRCh38"], [], []]).collect() | ||
| input[5] = 100 | ||
| input[6] = 10000 | ||
| """ | ||
| } | ||
| } | ||
| then { | ||
| assert workflow.success | ||
| assert snapshot(sanitizeOutput(workflow.out)).match() | ||
| } | ||
| } | ||
|
|
||
| test("homo_sapiens - error empty join - stub") { | ||
| options "-stub" | ||
| when { | ||
| params { | ||
| quilt_args = "--seed=1" | ||
| } | ||
| workflow { | ||
| """ | ||
| bampath = channel.of("NA12878.chr21_22.1X.bam").collectFile(name: "bampath.txt", newLine: true) | ||
| bamname = channel.of("MySample1").collectFile(name: "bamname.txt", newLine: true) | ||
| ch_samples = channel.of([ | ||
| [id: "allid"], | ||
| file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam", checkIfExists: true), | ||
| file(params.modules_testdata_base_path + "genomics/homo_sapiens/illumina/bam/NA12878.chr21_22.1X.bam.bai", checkIfExists: true) | ||
| ]) | ||
| input[0] = ch_samples.combine(bampath).combine(bamname) | ||
| input[1] = channel.of( | ||
| [[id: "otherpanel", chr: "chr22"], [], []], | ||
| [[id: "1000GP", chr: "chr21"], [], []] | ||
| ) | ||
| input[2] = channel.of( | ||
| [[id: "1000GP", chr: "chr22"], "chr22", 16570065, 16592216], | ||
| [[id: "1000GP", chr: "chr22"], "chr22", 16592229, 16609999], | ||
| [[id: "1000GP", chr: "chr21"], "chr21", 16570065, 16592216], | ||
| [[id: "1000GP", chr: "chr21"], "chr21", 16592229, 16609999] | ||
| ) | ||
| input[3] = channel.of( | ||
| [[id: "1000GP", chr: "chr22"], []], | ||
| [[id: "otherpanel", chr: "chr21"], []] | ||
| ) | ||
| input[4] = channel.of([[id: "GRCh38"], [], []]).collect() | ||
| input[5] = 100 | ||
| input[6] = 10000 | ||
| """ | ||
| } | ||
| } | ||
| then { | ||
| assert workflow.failed | ||
| assert workflow.errorMessage.contains("ERROR: join operation resulted in an empty channel. Please provide a valid ch_chunks and ch_map channel as input.") | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't QUILT2 use topics for that ?