-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMultiAssemblerAssembly.snakefile
More file actions
68 lines (55 loc) · 2.51 KB
/
MultiAssemblerAssembly.snakefile
File metadata and controls
68 lines (55 loc) · 2.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import os
import tempfile
import subprocess
import os.path
env = os.environ
import sys
# Snakemake and working directories
SD = os.path.dirname(workflow.snakefile)
# Snakemake and working directories
SD = os.path.dirname(workflow.snakefile)
# Config
configfile: "multi_asm.json"
ref=config["ref"]
fai = open(ref + ".fai")
allChroms = [l.strip().split()[0] for l in fai ]
haps=["1", "2"]
assemblers=config["assemblers"]
datasets = [entry for entry in config["datasets"].keys()]
rule all:
input:
config=expand("{dataset}/{assembler}/partitioned_assembly.json", dataset=datasets, assembler=assemblers, hap=haps),
asm=expand("{dataset}/{assembler}/assembly.{hap}.consensus.fasta", dataset=datasets, assembler=assemblers, hap=haps)
rule GenerateJSON:
input:
bams=lambda wildcards: config["datasets"][wildcards.datasetID]["bams"],
vcf=lambda wildcards: config["datasets"][wildcards.datasetID]["vcf"]
output:
json="{datasetID}/{assembler}/partitioned_assembly.json",
params:
sd=SD,
sample=lambda wildcards: config["datasets"][wildcards.datasetID]["sample"],
wd=lambda wildcards: config["workingDir"] + "/" + wildcards.datasetID,
ref=config["ref"],
readtype=lambda wildcards: config["datasets"][wildcards.datasetID]["datatype"],
workingDir=config["workingDir"]
shell:"""
mkdir -p {wildcards.datasetID}/{wildcards.assembler}
{params.sd}/CreateAssemblyJSON.py --bams {input.bams} --workingDir {params.workingDir}/{wildcards.datasetID}/{wildcards.assembler}/run --sample {params.sample} --ref {params.ref} --vcf {input.vcf} --readtype {params.readtype} --assembler {wildcards.assembler} > {wildcards.datasetID}/{wildcards.assembler}/partitioned_assembly.json
"""
rule RunAssembly:
input:
json="{dataset}/{assembler}/partitioned_assembly.json",
output:
asms=expand("{{dataset}}/{{assembler}}/assembly.{hap}.consensus.fasta",hap=haps)
params:
wd=lambda wildcards: config["workingDir"] + "/" + wildcards.dataset + "/" + wildcards.assembler,
pd=config["pd"],
jobs_per_run=config["jobs_per_run"]
shell:"""
mkdir -p {params.wd}
cp {input.json} {params.wd}/
pushd {params.wd} && snakemake -p -s {params.pd}/PartitionedAssembly.snakefile -j {params.jobs_per_run} --cluster " {{params.grid_opts}} -c {{resources.threads}} --mem={{resources.mem_gb}}G {{params.node_constraint}} " --restart-times 4 && popd
cp {params.wd}/assembly.*.consensus.fasta {wildcards.dataset}/{wildcards.assembler}/
rm -rf {params.wd}/
"""