-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgenerate_barcode_file.py
More file actions
executable file
·43 lines (35 loc) · 1.5 KB
/
generate_barcode_file.py
File metadata and controls
executable file
·43 lines (35 loc) · 1.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env python3
from sys import argv
from os import walk
from os.path import basename
from os.path import join
from Bash import bash
def get_barcode(filename):
display_filename = "cat {}".format(filename)
if filename.endswith(".gz"):
display_filename = "gunzip -c {}".format(filename)
# Potentially, the barcode can be at -f3 rather than -f10
command = ("{} | head -n 10000 | grep ^@ | cut -d':' -f10 | tr -d ' ' "
"| sort | uniq -c | sort -nr | head -1 | sed -e "
"'s/^[[:space:]]*//' | cut -d ' ' -f2").format(display_filename)
print("Running: {}".format(command))
return bash(command)[0]
def main(root, output):
with open(output, "w") as fh:
for root, directories, files in walk(root):
for filename in files:
if filename.endswith(".fq.gz") \
or filename.endswith(".fastq.gz") \
or filename.endswith(".fq") \
or filename.endswith(".fastq"):
if "_R1" in filename:
abs_path = join(root, filename)
base = basename(abs_path)
name = base.split(".")[0].replace("_R1", "_pe")
barcode = get_barcode(abs_path)
fh.write("{} {}".format(name, barcode))
if __name__ == "__main__":
if len(argv) == 3:
main(argv[1], argv[2])
else:
print("generate_barcode_file.py <root directory> <output>")