-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_processing.py
More file actions
55 lines (45 loc) · 2.05 KB
/
Copy pathdata_processing.py
File metadata and controls
55 lines (45 loc) · 2.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from pathlib import Path
from zipfile36 import ZipFile
import os
import pandas as pd
from sklearn.model_selection import train_test_split
path = str(Path(__file__).resolve().parent)
os.chdir(path)
def unzip(folder):
if not os.path.exists('./dataset/' + folder):
_zip = ZipFile('./dataset/' + folder + '.zip','r')
_zip.extractall('./dataset/')
_zip.close()
def create_dataframe(paths, state):
df = pd.DataFrame(paths)
df.columns = ['path']
if not state=='own_test':
df['alucan'] = df['path'].apply(lambda x: (x.find('AluCan') >= 0)*1 )
df['glass'] = df['path'].apply(lambda x: (x.find('Glass') >= 0)*1 )
df['hdpe'] = df['path'].apply(lambda x: (x.find('HDPEM') >= 0)*1 )
df['pet'] = df['path'].apply(lambda x: (x.find('PET') >= 0)*1 )
return df
def preprocessing(state):
if not state == 'own_test':
unzip(folder = 'test')
test_images_paths = [os.path.join("./dataset/test/", i)
for i in os.listdir("./dataset/test/")]
test = create_dataframe(test_images_paths, state)
if state == 'default_test':
return test
elif state == 'train':
unzip(folder = 'train')
train_images_paths = [os.path.join("./dataset/train/", i)
for i in os.listdir("./dataset/train/")]
train = create_dataframe(train_images_paths, state)
train = train.sample(frac = 1).reset_index(drop=True)
#делаем доп сплит для валидации на каждой эпохе
train, val, _,_ = train_test_split(train, train, test_size = 0.1)
train = train.reset_index(drop = True)
val = val.reset_index(drop = True)
return test, train, val
else:
random_test_paths = [os.path.join("./example/", i)
for i in os.listdir("./example/")]
random_test = create_dataframe(random_test_paths, state)
return random_test