-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathprocessing.py
More file actions
107 lines (76 loc) · 2.66 KB
/
processing.py
File metadata and controls
107 lines (76 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from Proxy import Descriptor
from sys import path
import pandas as pd
import numpy as np
import pickle
class Predictor:
"""
Predictor clas: make predcition of unkown molecule
"""
def __init__(self):
## loading model and transformer
self.transformer_ = self.transformer_loader()
self.predictor_ = self.model_loader()
self._all_prop = None
def model_loader(self):
"""
method to load trained model
"""
try:
with open("./models/Extree.pkl","rb") as model:
predictor_ = pickle.load(model)
except:
raise ValueError("Problem loading model..")
return predictor_
def smiles(self,data):
""""
extract all features from the smiles of chemical
"""
try:
des = Descriptor(data)
self._all_prop = des.get_all(as_dataframe=True)
return self._all_prop
except ValueError:
raise ValueError("smiles conversion Error")
def molecular_prop(self):
mol_prop = self._all_mol_prop["Weight","TPSA","logP",""]
def data_cleaner(self,data):
"""
clean dataset from null values and other
"""
assert isinstance(data, pd.DataFrame), "df needs to be a pd.DataFrame"
data.dropna(inplace=True)
indices_to_keep = ~data.isin([np.nan, np.inf, -np.inf]).any(1)
return data[indices_to_keep].astype(np.float64)
def transformer_loader(self):
"""
Trained model of transformer is loaded.
"""
try:
with open("./models/transformer1.pkl","rb") as model:
transformer = pickle.load(model)
except:
raise ValueError("Problem loading Transformer..")
return transformer
def transform_data(self,data):
"""
transform data according to their variance
"""
if isinstance(data,pd.DataFrame):
try:
self.transformer_.transform(data)
return data[data.columns[self.transformer_.get_support(indices=True)]]
except:
raise ValueError("data transformation error...")
else:
raise ValueError("Data Error...")
def test_data(self,smiles_):
try:
descriptors = self.smiles(smiles_)
cleaned_transformed = self.transform_data(self.data_cleaner(descriptors))
except:
raise ValueError(" Error occured.")
return cleaned_transformed
def final_predict(self,data):
result = self.predictor_.predict(data)
return result