Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions modules/nf-core/gatk4/cleansam/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- conda-forge
- bioconda
dependencies:
# renovate: datasource=conda depName=bioconda/gatk4
- "bioconda::gatk4=4.6.2.0"
69 changes: 69 additions & 0 deletions modules/nf-core/gatk4/cleansam/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
process GATK4_CLEANSAM {
tag "${meta.id}"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/ce/ced519873646379e287bc28738bdf88e975edd39a92e7bc6a34bccd37153d9d0/data'
: 'community.wave.seqera.io/library/gatk4_gcnvkernel:edb12e4f0bf02cd3'}"

input:
tuple val(meta), path(bam)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fasta_index)
// input file must be sorted for index to be created
val create_index
val create_md5

output:
tuple val(meta), path("*.bam"), emit: bam
tuple val(meta), path("*.bai"), emit: bai, optional: true
tuple val(meta), path("*.md5"), emit: md5, optional: true
tuple val("${task.process}"), val('gatk'), eval("gatk CleanSam --version | grep GATK | sed 's/.*(GATK) v//'"), topic: versions, emit: versions_gatk

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def index = create_index ? "--CREATE_INDEX true" : ""
def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : ""
def md5 = create_md5 ? "--CREATE_MD5_FILE true" : ""
def avail_mem = 3072
if (!task.memory) {
log.info('[GATK CleanSam] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.')
}
else {
avail_mem = (task.memory.mega * 0.8).intValue()
}

if ("${bam}" == "${prefix}.bam") {
error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!")
}

"""
gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData"\\
CleanSam \\
${args} \\
${reference} \\
${index} \\
${md5} \\
--INPUT ${bam} \\
--OUTPUT ${prefix}.bam

"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def index = create_index ? "touch ${prefix}.bam.bai" : ""
def md5 = create_md5 ? "touch ${prefix}.md5" : ""
if ("${bam}" == "${prefix}.bam") {
error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!")
}
"""
touch ${prefix}.bam \\
${index} \\
${md5}
"""
}
123 changes: 123 additions & 0 deletions modules/nf-core/gatk4/cleansam/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
name: "gatk4_cleansam"
description: Cleans the provided BAM, soft-clipping beyond-end-of-reference
alignments and setting MAPQ to 0 for unmapped reads
keywords:
- clean
- bam
- gatk
- sam
- clipping
tools:
- "gatk":
description: |
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
with a primary focus on variant discovery and genotyping. Its powerful processing engine
and high-performance computing features make it capable of taking on projects of any size.
homepage: https://gatk.broadinstitute.org/hc/en-us
documentation: https://gatk.broadinstitute.org/hc/en-us/categories/360002369672s
doi: 10.1158/1538-7445.AM2017-3590
licence:
- "Apache-2.0"
identifier: ""
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM file
pattern: "*.{bam}"
ontologies:
- edam: "http://edamontology.org/format_2572"
- - meta2:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: Reference FASTA file
pattern: "*.{fasta}"
ontologies:
- edam: "http://edamontology.org/format_1929"
- - meta3:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- fasta_index:
type: file
description: Index file for the reference FASTA
pattern: "*.{fai}"
ontologies:
- edam: "http://edamontology.org/format_3703"
- create_index:
type: boolean
description: Whether to create an index file for the output BAM
- create_md5:
type: boolean
description: Whether to create an MD5 checksum for the output BAM
output:
bam:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "*.bam":
type: file
description: Cleaned BAM file
pattern: "*.{bam}"
ontologies:
- edam: "http://edamontology.org/format_2572"
bai:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "*.bai":
type: file
description: Index file for the cleaned BAM file
pattern: "*.{bai}"
ontologies:
- edam: "http://edamontology.org/format_3327"
md5:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "*.md5":
type: file
description: MD5 checksum for the cleaned BAM file
pattern: "*.{md5}"
ontologies:
- edam: "http://edamontology.org/format_3823"
versions_gatk:
- - ${task.process}:
type: string
description: The process the versions were collected from
- gatk:
type: string
description: The tool name
- "gatk CleanSam --version | grep GATK | sed 's/.*(GATK) v//'":
type: eval
description: The command used to generate the version of the tool
topics:
versions:
- - ${task.process}:
type: string
description: The process the versions were collected from
- gatk:
type: string
description: The tool name
- "gatk CleanSam --version | grep GATK | sed 's/.*(GATK) v//'":
type: eval
description: The command used to generate the version of the tool
authors:
- "@sofiademmou"
maintainers:
- "@sofiademmou"
197 changes: 197 additions & 0 deletions modules/nf-core/gatk4/cleansam/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@

nextflow_process {

name "Test Process GATK4_CLEANSAM"
script "../main.nf"
process "GATK4_CLEANSAM"
config "./nextflow.config"

tag "modules"
tag "modules_nfcore"
tag "gatk4"
tag "gatk4/cleansam"

test("sarscov2 - bam") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:true ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true)
]
input[1] = [[], []]
input[2] = [[], []]
input[3] = false
input[4] = false
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
bam(process.out.bam[0][1]).getReadsMD5(),
process.out.findAll { key, val -> key.startsWith("versions") }
).match()}
)
}
}

test("sarscov2 - bam + fasta") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:true ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true)
]
input[1] = [
[ id:'test', single_end:true ], // meta2 map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
]
input[2] = [
[ id:'test', single_end:true ], // meta3 map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
]
input[3] = false
input[4] = false
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
bam(process.out.bam[0][1]).getReadsMD5(),
process.out.findAll { key, val -> key.startsWith("versions") }
).match()}
)
}
}

test("sarscov2 - bam + fasta - bai + md5") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:true ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true)
]
input[1] = [
[ id:'test', single_end:true ], // meta2 map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
]
input[2] = [
[ id:'test', single_end:true ], // meta3 map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
]
input[3] = true
input[4] = true
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
bam(process.out.bam[0][1]).getReadsMD5(),
file(process.out.bai[0][1]).name,
process.out.md5,
process.out.findAll { key, val -> key.startsWith("versions") }
).match()}
)
}
}

test("sarscov2 - stub") {
options "-stub"
when {
process {
"""
input[0] = [
[ id:'test', single_end:true ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true)
]
input[1] = [[], []]
input[2] = [[], []]
input[3] = false
input[4] = false
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(sanitizeOutput(process.out)).match() }
)
}
}
test("sarscov2 - bam + fasta - stub") {
options "-stub"
when {
process {
"""
input[0] = [
[ id:'test', single_end:true ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true)
]
input[1] = [
[ id:'test', single_end:true ], // meta2 map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
]
input[2] = [
[ id:'test', single_end:true ], // meta3 map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
]
input[3] = false
input[4] = false
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(sanitizeOutput(process.out)).match() }
)
}
}

test("sarscov2 - bam + fasta - bai + md5 - stub") {
options "-stub"
when {
process {
"""
input[0] = [
[ id:'test', single_end:true ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true)
]
input[1] = [
[ id:'test', single_end:true ], // meta2 map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
]
input[2] = [
[ id:'test', single_end:true ], // meta3 map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
]
input[3] = true
input[4] = true
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(sanitizeOutput(process.out)).match() }
)
}
}
}
Loading
Loading