-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathFilterTooMasked.py
More file actions
executable file
·30 lines (26 loc) · 1.21 KB
/
FilterTooMasked.py
File metadata and controls
executable file
·30 lines (26 loc) · 1.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/usr/bin/env python
import sys
import argparse
import pysam
ap=argparse.ArgumentParser(description="Remove sedef output if one alignment has too much masked")
ap.add_argument("--genome", help="Genome", required=True)
ap.add_argument("--pct", help="Min percent masked", required=True, type=float)
ap.add_argument("--sedef", help="Sedef output", required=True)
ap.add_argument("--maskedBed", help="Write a bed file of all the masked entries here.", required=True)
args=ap.parse_args()
sedef = open(args.sedef)
asm=pysam.FastaFile(args.genome)
mb=open(args.maskedBed, "w")
for line in sedef:
vals=line.split()
rgn1=vals[0] + ":" + vals[1] + "-" + vals[2]
rgn2=vals[3] + ":" + vals[4] + "-" + vals[5]
seq1=asm.fetch(vals[0], int(vals[1]), int(vals[2]))
seq2=asm.fetch(vals[3], int(vals[4]), int(vals[5]))
mask1=seq1.count("a") + seq1.count("c") + seq1.count("g") + seq1.count("t")
mask2=seq2.count("a") + seq2.count("c") + seq2.count("g") + seq2.count("t")
if mask1/len(seq1) > args.pct or mask2/len(seq2) > args.pct:
mb.write(vals[0] + "\t" + vals[1] + "\t" + vals[2] + "\n")
mb.write(vals[3] + "\t" + vals[4] + "\t" + vals[5] + "\n")
continue
sys.stdout.write(line)