IML_Task2_updated/main.py at Task3 · Blueonics/IML_Task2_updated · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from subtask1 import subtask1_predict
from subtask2 import subtask2_predict
from subtask3 import subtask3_predict
from sklearn.preprocessing import StandardScaler
import helper
from zipfile import ZipFile
import csv


df = pd.read_csv('C:/Users/Lannan Jiang/PycharmProjects/IML_Task2/train_features.csv')
df_test = pd.read_csv('C:/Users/Lannan Jiang/PycharmProjects/IML_Task2/test_features.csv')
labels = pd.read_csv('C:/Users/Lannan Jiang/PycharmProjects/IML_Task2/train_labels.csv')

labels = labels.drop(['pid'], axis=1)

# preprocessing
X_train = np.asarray(df)[:, 2:]
X_test = np.asarray(df_test)[:, 2:]

hours = 12
num_samp_train = int(X_train.shape[0] / hours)
num_samp_test = int(X_test.shape[0] / hours)

X_train = X_train.reshape((num_samp_train, hours, -1))
X_test = X_test.reshape((num_samp_test, hours, -1))

X_train_imputed = helper.imputer(X_train, df)
X_test_imputed = helper.imputer(X_test, df_test)
print("imputed shape", X_train_imputed.shape)

# X_train_imputed, X_test_imputed = helper.pca_for_time(X_train_imputed, X_test_imputed, n_components=4)

# X_train_imputed = helper.average_dim(X_train_imputed)
# X_test_imputed = helper.average_dim(X_test_imputed)


X_train_norm, mean, std = helper.batch_norm(X_train_imputed)
X_test_norm = helper.batch_norm(X_test_imputed, mean, std)

X_train_flatten = X_train_norm.reshape((X_train_norm.shape[0], X_train_norm.shape[1] * X_train_norm.shape[2]))
X_test_flatten = X_test_norm.reshape(X_test_norm.shape[0], X_test_norm.shape[1] * X_test_norm.shape[2])
print("flattened shape", X_train_flatten.shape)

# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train_flatten)
# X_test_scaled = scaler.fit_transform(X_test_flatten)

# nn_pca = PCA(n_components=200)
# nn_pca.fit(X_train_flatten)
# X_train_procs = nn_pca.transform(X_train_flatten)
# X_test_procs = nn_pca.transform(X_test_flatten)

X_train_procs = X_train_flatten
X_test_procs = X_test_flatten

labels_one = subtask1_predict(X_train_procs, X_test_procs, labels)
labels_two = subtask2_predict(X_train_procs, X_test_procs, labels)
y_pred_three = subtask3_predict(X_train_procs, X_test_procs, labels)

# labels_one = np.ones((12))

extract_ID = np.array(df_test)
val, indices = np.unique(extract_ID[:, 0], return_index=True)
keys = np.asarray(extract_ID[:, 0][np.sort(indices)]).reshape((12664, 1))

arr1 = np.concatenate((labels_one, labels_two), axis=1)
values = np.concatenate((arr1, y_pred_three), axis=1)

lut = np.concatenate((keys, values), axis=1)

# write
f = open('C:/Users/Lannan Jiang/PycharmProjects/IML_Task2/submission/submission.csv', 'w', newline='')

header = ['pid', 'LABEL_BaseExcess', 'LABEL_Fibrinogen', 'LABEL_AST', 'LABEL_Alkalinephos', 'LABEL_Bilirubin_total',
          'LABEL_Lactate',
          'LABEL_TroponinI', 'LABEL_SaO2', 'LABEL_Bilirubin_direct', 'LABEL_EtCO2', 'LABEL_Sepsis', 'LABEL_RRate',
          'LABEL_ABPm', 'LABEL_SpO2',
          'LABEL_Heartrate']

writer = csv.DictWriter(f, delimiter=',', fieldnames=header)
writer.writeheader()

new_Writer = csv.writer(f, delimiter=',')
for i in lut:
    new_Writer.writerow(['{:.3f}'.format(x) for x in i])

with ZipFile('C:/Users/Lannan Jiang/PycharmProjects/IML_Task2/submission.zip', 'w') as zip:
    zip.write('submission.csv')