-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.py
More file actions
59 lines (53 loc) · 2.04 KB
/
config.py
File metadata and controls
59 lines (53 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# config file for the allelic-MPRA design pipeline
# has to be in the same folder as the shell
# INPUT
# input file in vcf format
in_vcf = './example_files/02_input_vcf.txt'
# input file containing finished sequences
in_sequence = './example_files/03_input_seq.txt'
# input file containing the barcodes to be used
in_barcode = './barcodes/barcodes.nt12.filtered.dist_3.json'
# barcode file type [txt (list/tsv), json]?
in_barcode_type = 'json'
# genome for use with pyfaidx.Fasta()
db_genome = './hs37d5.fa'
# DESIGN
# design order
de_order = 'abcde'
# added sequence 1 (default: five prime seq)
de_seq_1 = 'AGGACCGGATCAACT'
# added sequence 2 (default: spacer)
de_seq_2 = 'CCTGCAGGGAATTC'
# added sequence 3 (default: three prime seq)
de_seq_3 = 'CATTGCGTGAACCGA'
# SETTINGS
# feature size per side
set_feature_size = 85
# create all allelic features (1) or only reference features (0)
set_all_features = 1
# discard indels larger than this # can be used to remove all indels (1)
set_indel_max_length = 10
# only create full length features (1) (else: account for different length of indels by creating multiple features (two ref for insertions, two alt for deletions) (0))
set_indel_features = 0
# barcodes added to each distinct feature (e.g. reference and alternative allele feature, each)
set_barcodes_per_feature = 100
# create reverse complementary versions of all features?
set_rev_comp = 0
# ENZYMES
# enzyme file (processed)
enz_file_processed = './enzymes_processed.csv'
# enzyme file (NEB REBASE format)
enz_file = './enzymes_new.txt'
# all enzymes used
enz_used = 'EcoRI,SbfI'
# OR: all relevant cut sites (only used if 'enz_used = None'; should be a regex list) --> e.g. ['AGTACT', 'CC[AT]GG']
enz_sites = "['GAATTC', 'CCTGCAGG']"
# total restriction sites expected (in final feature)
enz_cumul_cuts = 2
# total restriction sites expected (in barcode plus the sequence before and after it)
enz_cumul_cuts_bc = 2
# OUTPUT
# output as json or tsv
out_format = 'json'
# path to output
out_output = './mpra_design'