-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain.py
More file actions
88 lines (67 loc) · 2.88 KB
/
main.py
File metadata and controls
88 lines (67 loc) · 2.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-
__author__ = 'popka'
import numpy as np
import pandas as pd
import matplotlib.pylab as pl
import sklearn.cross_validation as cv
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from DecisionTree import DecisionTree
from sklearn.metrics import mean_squared_error as mse
from GradientBoosting import GradientBoosting
from sklearn.ensemble import GradientBoostingRegressor
FOLDER = "data/"
FILES = [
"iris.txt", "bezdekIris.txt", "wine.txt", "bupa.txt", "housing.txt", "auto-mpg.txt", "spam"
]
FILE = "spam"
# Подготавливаем признаки и целевую функцию
if FILE in FILES[:6]:
df = pd.read_csv(FOLDER+FILE, sep=",", header=None)#, encoding="utf-8", quoting=csv.QUOTE_NONNUMERIC)
if FILE in FILES[:2]:
# ИРИСЫ
df[4] = pd.factorize(df[4])[0]
X = df[[0,1,2,3]].as_matrix()
y = df[4].as_matrix()
if FILE == FILES[2]:
x_indexes = [x for x in range(1,14)]
X = df[x_indexes].as_matrix()
y = df[0].as_matrix()
if FILE == FILES[3]:
X = df[[0,1,2,3,4,5]].as_matrix()
y = df[6]
if FILE == FILES[4]:
df = pd.read_csv(FOLDER+FILE, sep=" ", header=None)#, encoding="utf-8", quoting=csv.QUOTE_NONNUMERIC)
X = df[df.columns[1:]].as_matrix()
y = df[df.columns[0]].as_matrix()
if FILE == FILES[5]:
df = pd.read_csv(FOLDER+FILE, sep=" ", header=None)#, encoding="utf-8", quoting=csv.QUOTE_NONNUMERIC)
X = df[df.columns[1:-1]].as_matrix()
y = df[df.columns[0]].as_matrix()
x_train, x_test, y_train, y_test = cv.train_test_split(X, y, test_size=0.25)
else:
df_train = pd.read_csv(FOLDER+FILE+".train.txt", sep=" ", header=None)#, encoding="utf-8", quoting=csv.QUOTE_NONNUMERIC)
df_test = pd.read_csv(FOLDER+FILE+".test.txt", sep=" ", header=None)#, encoding="utf-8", quoting=csv.QUOTE_NONNUMERIC)
x_train = df_train[df_train.columns[1:]].as_matrix()
y_train = df_train[df_train.columns[0]].as_matrix()
x_test = df_test[df_test.columns[1:]].as_matrix()
y_test = df_test[df_test.columns[0]].as_matrix()
my_gb = GradientBoosting(n_estimators=100, max_depth=4, shrinkage=0.1, rsm=False)
my_gb.fit(x_train, y_train)
y_predicted = my_gb.predict(x_test)
print mse(y_test, y_predicted)
"""
my_gb = GradientBoostingRegressor(n_estimators=10, max_depth=4, learning_rate=0.1)
my_gb.fit(x_train, y_train)
y_predicted = my_gb.predict(x_test)
print mse(y_test, y_predicted)
my_tree = DecisionTree(is_classification=False, rsm=False, max_depth=5)
my_tree.fit(x_train, y_train)
y_predicted = my_tree.predict(x_test)
print mse(y_test, y_predicted)
print mse(y_test, [np.mean(y_train)]*len(y_test))
my_tree = DecisionTree(is_classification=True, rsm=True, max_steps=None)
my_tree.fit(x_train, y_train)
y_predicted = my_tree.predict(x_test)
print mse(y_test, y_predicted)
"""