forked from quentin0515/devas
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_features.py
More file actions
executable file
·74 lines (56 loc) · 2.26 KB
/
get_features.py
File metadata and controls
executable file
·74 lines (56 loc) · 2.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import pysam
import csv
import numpy as np
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import random_split, Dataset, DataLoader
import argparse
import sys
import os
###################################
# parameters
from utils import *
def main(args):
cur_valid_types = args.type
vcf_file = args.vcf_file
x_file = args.output_file
working_dir = args.working_dir
reference_filename = args.ref_file
# Extract features:
bam_file = args.bam_file
# bam = pysam.AlignmentFile(bam_file, "rb")
cram_file = bam_file
crai_file = cram_file + '.crai'
bam = pysam.AlignmentFile(cram_file, "rc", reference_filename = reference_filename, index_filename = crai_file)
cur_min_len = 400
cur_max_len = 2000
x, y = [], []
sv_dict = index_vcf(vcf_file, cur_valid_types)
bed_file = working_dir + "/sv_bed_file.bed"
output_bed(sv_dict, bed_file)
sv_list = input_bed(bed_file)
get_features(reference_filename, x, y, sv_list, 1, bam, False)
with open(x_file, "w") as file_x:
for i in range(len(x)):
cur_x, cur_y = x[i], y[i]
for features in cur_x: file_x.write(f"{features[0]} ")
file_x.write("\n")
for features in cur_x: file_x.write(f"{features[1]} ")
file_x.write("\n")
for features in cur_x: file_x.write(f"{features[2]} ")
file_x.write("\n")
for features in cur_x: file_x.write(f"{features[3]} ")
file_x.write("\n")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Args Parser")
parser.add_argument('-t', '--type', type=str, help='Type of SV', required=True)
parser.add_argument('-v', '--vcf_file', type=str, help='Input vcf file', required=True)
parser.add_argument('-b', '--bam_file', type=str, help='Bam file', required=True)
parser.add_argument('-r', '--ref_file', type=str, help='Reference file', required=True)
parser.add_argument('-o', '--output_file', type=str, help='Output feature file', required=True)
parser.add_argument('-d', '--working_dir', type=str, help='Working directory', required=True)
# Parse the arguments
args = parser.parse_args()
main(args)