-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdownsample_methylKit.py
More file actions
executable file
·81 lines (57 loc) · 2.25 KB
/
downsample_methylKit.py
File metadata and controls
executable file
·81 lines (57 loc) · 2.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python3
import argparse
import sys
import random
if len(sys.argv) == 1:
print("""Usage: cat <methylkit file> | grep -v chrBase | downsample_methylKit.py --fraction <fraction of reads to keep> > <output file>
OR
cat <bedGraph file> | grep -v track | downsample_methylKit.py --fraction <fraction of reads to keep> --bedGraph > <output file>
""")
sys.exit()
def argparser():
parser = argparse.ArgumentParser()
parser.add_argument("--fraction", required = True, help = "Fraction of reads to keep")
parser.add_argument("--bedGraph", required = False, action = "store_true", help = "Maximum coverage to include")
args = parser.parse_args()
return args
args = argparser()
downsample_len = len(args.fraction.split(".")[1]) # Counts number of decimal places, for downsampling precision
downsample_number = int(args.fraction.split(".")[1])
downsample_cap = int("".join(["1" ,"".join(["0" for x in range(downsample_len)])])) # An order of magnitude larger than downsample_len
def remove_covgs(covg_list):
my_covg_list = covg_list
for x in range(len(my_covg_list)):
tmp_random = random.randint(0, downsample_cap)
if tmp_random > downsample_number:
my_covg_list[x] = 0
return my_covg_list
counter = 0
for line in sys.stdin:
counter += 1
if counter % 1000000 == 0:
print("\rProcessed {} lines".format(counter), end = "", file = sys.stderr)
line = line.strip().split("\t")
if args.bedGraph == True:
meth_covg = int(line[4])
unmeth_covg = int(line[5])
else:
covg = int(line[4])
meth_fraction = float(line[5]) / 100
unmeth_fraction = float(line[6]) / 100
meth_covg = int(round(covg * meth_fraction))
unmeth_covg = int(round(covg * unmeth_fraction))
meth = remove_covgs([1] * meth_covg)
unmeth = remove_covgs([1] * unmeth_covg)
new_covg = sum(meth) + sum(unmeth)
if new_covg > 0:
if args.bedGraph == True:
line[3] = str(int(100 * (sum(meth) / new_covg)))
line[4] = str(sum(meth))
line[5] = str(sum(unmeth))
else:
new_meth_fraction = round(float(sum(meth) / new_covg) * 100, 2)
new_unmeth_fraction = round(float(sum(unmeth) / new_covg) * 100, 2)
line[4] = str(new_covg)
line[5] = str(new_meth_fraction)
line[6] = str(new_unmeth_fraction)
print("\t".join(line))