forked from Biocomputing-Research-Group/WinnowNet
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPSM_feature.py
More file actions
executable file
·98 lines (88 loc) · 3.24 KB
/
PSM_feature.py
File metadata and controls
executable file
·98 lines (88 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import glob
import pickle
Comet_dict=dict()
MSGF_dict=dict()
Myrimatch_dict=dict()
with open('/media/fs0199/easystore1/Protein/DeepFilterV2/spectra_features/assembly_features_raw/train/comet.pin') as f:
for line in f:
s=line.strip().split('\t')
a=s[0].replace('new_ms2/','').split('_')
specID='_'.join(a[:-2])
charge=a[-2]
peptide=s[26].replace('[15.9949]','~')
expmass=s[3]
calmass=s[4]
Mass=s[12]
dM=str(float(calmass)-float(expmass))
absdM=str(abs(float(calmass)-float(expmass)))
peplen=s[13]
enzInt=s[-6]
chargeSet=[0,0,0]
if int(charge)>2:
chargeSet[2]=1
else:
chargeSet[int(charge)-1]=1
feature=[Mass,dM,absdM,peplen,enzInt,str(chargeSet[0]),str(chargeSet[1]),str(chargeSet[2])]
Comet_dict[specID+'_'+charge+'_'+peptide]=feature
with open('/media/fs0199/easystore1/Protein/DeepFilterV2/spectra_features/assembly_features_raw/train/MSGF.pin') as f:
for line in f:
s=line.strip().split('\t')
a=s[0].split('_')
specID='_'.join(a[:-2])
charge=a[-2]
peptide=s[17].replace('+16','~')
expmass=str(float(s[3])-float(charge)*1.00784)
calmass=str(float(s[4])-float(charge)*1.00784)
Mass=str(float(s[6])-float(charge)*1.00784)
dM=str((float(calmass)-float(expmass)))
absdM=str(abs((float(calmass)-float(expmass))))
peplen=str(len(peptide.split('.')[1])-peptide.split('.')[1].count('~'))
enzInt=str(peptide.split('.')[1][:-1].count('K')+peptide.split('.')[1][:-1].count('R'))
chargeSet=[0,0,0]
if int(charge)>2:
chargeSet[2]=1
else:
chargeSet[int(charge)-1]=1
feature=[Mass,dM,absdM,peplen,enzInt,str(chargeSet[0]),str(chargeSet[1]),str(chargeSet[2])]
MSGF_dict[specID+'_'+charge+'_'+peptide]=feature
with open('/media/fs0199/easystore1/Protein/DeepFilterV2/spectra_features/assembly_features_raw/train/myrimatch.pin') as f:
for line in f:
s=line.strip().split('\t')
a=s[0].split('_')
specID='_'.join(a[:-2]).replace('.','')
charge=a[-2]
peptide=s[18]
expmass=s[3]
calmass=s[4]
Mass=str(float(expmass)+1.00784)
dM=str(float(calmass)-float(expmass))
absdM=str(abs(float(calmass)-float(expmass)))
peplen=str(s[9])
enzInt=str(peptide.split('.')[1][:-1].count('K')+peptide.split('.')[1][:-1].count('R'))
chargeSet=[0,0,0]
if int(charge)>2:
chargeSet[2]=1
else:
chargeSet[int(charge)-1]=1
feature=[Mass,dM,absdM,peplen,enzInt,str(chargeSet[0]),str(chargeSet[1]),str(chargeSet[2])]
Myrimatch_dict[specID+'_'+charge+'_'+peptide]=feature
train_files=glob.glob('/media/fs0199/easystore1/Protein/DeepFilterV2/spectra_features/assembly_features_raw/PSMs/train/*tsv')
for file in train_files:
psmfeature_dict=dict()
with open(file) as f:
for line in f:
s=line.strip().split('\t')
a=s[1].split('_')
specID='_'.join(a[:-2])
charge=a[-2]
peptide=s[4]
key=specID+'_'+charge+'_'+peptide
if key in Comet_dict:
psmfeature_dict[s[1]]=Comet_dict[key]
elif key in Myrimatch_dict:
psmfeature_dict[s[1]]=Myrimatch_dict[key]
else:
psmfeature_dict[s[1]]=MSGF_dict[key]
writename=file.replace('/media/fs0199/easystore1/Protein/DeepFilterV2/spectra_features/assembly_features_raw/PSMs/train/','/media/fs0199/easystore1/Protein/DeepFilterV2/spectra_features/assembly_features_raw/train/').replace('tsv','pkl')
with open(writename,'wb') as fw:
pickle.dump(psmfeature_dict,fw)