-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHomework02.Rmd
More file actions
211 lines (198 loc) · 5.9 KB
/
Homework02.Rmd
File metadata and controls
211 lines (198 loc) · 5.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
---
title: "Multivariate Hw02"
author: "Lin Chian Hung"
date: "2018/4/21"
output:
html_document:
toc: true
toc_float:
collapsed: true
smooth_scroll: true
---
# Classification tree
```{r}
fish<-read.csv("~/Desktop/碩一下/多變量/Fishdata.csv")
library(rpart)
fish.control<-rpart.control(minisplit=10,minbucket=3,xval=0)
fish.treeorig<-rpart(Species~
Weight+L1+L2+L3+Height+Width,data=fish,method="class",control=fish.control)
```
Let’s now plot the tree:
```{r}
# 第一種畫法
plot(fish.treeorig)
text(fish.treeorig)
# 第二種畫法
require(rpart.plot)
prp(fish.treeorig, # 模型
faclen=0, # 呈現的變數不要縮寫
fallen.leaves=TRUE, # 讓樹枝以垂直方式呈現
shadow.col="gray", # 最下面的節點塗上陰影
# number of correct classifications / number of observations in that node
extra=2)
```
Also check out the complexity parameter (CP):
```{r}
printcp(fish.treeorig)
```
```{r}
summary(fish.treeorig)
```
```{r}
fish.prunetree<-prune.rpart(fish.treeorig,cp=0.02)
# 第一種畫法
plot(fish.prunetree)
text(fish.prunetree)
# 第二種畫法
require(rpart.plot)
prp(fish.prunetree, # 模型
faclen=0, # 呈現的變數不要縮寫
fallen.leaves=TRUE, # 讓樹枝以垂直方式呈現
shadow.col="gray", # 最下面的節點塗上陰影
# number of correct classifications / number of observations in that node
extra=2)
```
```{r}
L21<-fish$L2-fish$L1
L32<-fish$L3-fish$L2
L31<-fish$L3-fish$L1
newfish<-cbind(fish,L21,L32,L31)
newfish.treenew<-rpart(Species~., data=newfish,method="class",parms=list(split="information"),control=fish.control)
printcp(newfish.treenew)
```
```{r}
# 第一種畫法
plot(newfish.treenew)
text(newfish.treenew)
# 第二種畫法
require(rpart.plot)
prp(newfish.treenew, # 模型
faclen=0, # 呈現的變數不要縮寫
fallen.leaves=TRUE, # 讓樹枝以垂直方式呈現
shadow.col="gray", # 最下面的節點塗上陰影
# number of correct classifications / number of observations in that node
extra=2)
```
```{r}
fish.control<-rpart.control(minbucket=3,minsplit=10,xval=148)
newfish.treenewcv<- rpart(Species~., data=newfish,method="class",
parms=list(split='information'),control=fish.control)
printcp(newfish.treenewcv)
```
```{r}
newfish.test<-read.csv("~/Desktop/碩一下/多變量/Fish_test_data.csv")
L31<-newfish.test$L3- newfish.test$L1
L32<-newfish.test$L3- newfish.test$L2
L21<-newfish.test$L2- newfish.test$L1
newfish.test<-cbind(newfish.test,L21,L32,L31)
newfish.tpred<-predict(newfish.treenewcv,newfish.test)
newfish.tpred
```
# Linear Discriminant Analysis
```{r warning=FALSE}
library(MASS)
newfish.lda<-lda(Species~.,data=newfish)
# 有共線性存在
```
```{r}
newfish.lda<-lda(Species~Weight+L1+Height+Width+L21+L32,data=newfish)
newfish.lda
```
```{r}
newfish.ldapred<-predict(newfish.lda,newfish[,-1])
table(newfish$Species,newfish.ldapred$class)
```
```{r}
newfish.ldacv<-lda(Species~Weight+L1+Height+Width+L21+L32,data=newfish,CV=T)
table(newfish$Species,newfish.ldacv$class)
```
```{r}
eqscplot(newfish.ldapred$x,type='n',xlab="1st LD",ylab="2nd LD")
fish.species<-c(rep("B",33),rep("W",5),rep("R",18),rep("Pa",10),rep("S",12),rep("Pi",16),rep("Pe",54))
fish.colors<-c(rep(1,33),rep(2,5),rep(3,18),rep(4,10),rep(5,12),rep(6,16),rep(7,54))
text(newfish.ldapred$x[,1:2],fish.species,col=fish.colors)
```
```{r}
newfish.ldatest<-predict(newfish.lda,newfish.test)
newfish.ldatest$class
```
# Quadratic Discriminant Analysis
Let us examine how to apply QDA to this dataset.
Let us pretend that the multivariate normal assumption is valid (you can check for this by using the R package “MVN”, as shown in the lab of CCA)
Check Normailty
```{r}
library(MVN)
fish1 <- newfish[,2:6]
mvn(fish1,mvnTest = "hz")
```
```{r eval=FALSE}
newfish.qda<-qda(Species~.,data=newfish)
```
```{r}
newfish.q<-read.csv("~/Desktop/碩一下/多變量/QDA.csv")
```
```{r eval = FALSE}
newfish.qda<-qda(Species~.,data=newfish.q)
```
```{r}
newfish.qda<-qda(Species~Weight+L1+Height+Width+L21+L32,data=newfish.q)
newfish.qdapred<-predict(newfish.qda,newfish.q)
table(newfish.q$Species,newfish.qdapred$class)
```
```{r}
predict(newfish.qda,newfish.test)$class
```
```{r}
newfish.qda<-qda(Species~Weight+L1+Height+Width+L21+L32,data=newfish.q,CV=T)
table(newfish.q$Species,newfish.qda$class)
```
# Nearest Neighbor Methdos
k = 3
```{r}
library(class)
newfish.knn<-knn(newfish[,2:10],newfish[,2:10],newfish[,"Species"],k=3,prob=T)
table(newfish$Species,newfish.knn)
```
k = 2
```{r}
newfish.knn<-knn(newfish[,2:10],newfish[,2:10],newfish[,"Species"],k=2,prob=T)
table(newfish$Species,newfish.knn)
```
k = 1
```{r}
newfish.knn<-knn(newfish[,2:10],newfish[,2:10],newfish[,"Species"],k=1,prob=T)
table(newfish$Species,newfish.knn)
```
```{r}
newfish1<-newfish[,c(1,2,3,6,8,9)]
newfish.knncv<-knn.cv(newfish1[,2:6],newfish1[,"Species"],k=1,prob=T)
table(newfish1$Species,newfish.knncv)
```
```{r}
newfish1.test<-newfish.test[,c(1,2,5,7,8)]
newfish.knntest<-knn(newfish1[,2:6],newfish1.test,newfish1[,"Species"],k=1,prob=T)
newfish.knntest
```
# Logistic Discrimination
Again, let us pretend that all assumptions (including “multivariate normal” and “common covariance matrix” over all classes) are valid (need to check that in practice).
In order to use this method in R, we need another package.
```{r}
library(nnet)
newfish.logd<-multinom(Species~.,data=newfish,maxit=250)
newfish.logd
```
```{r}
table(newfish$Species,predict(newfish.logd,newfish))
```
Now we examine the true error rate by cross-validation. To do this, we use another package called “glmnet”:
```{r warning=FALSE}
library(glmnet)
x <- as.matrix(newfish[,-1])
y <- newfish$Species
cvfit <- cv.glmnet(x, y, family='multinomial', type.measure='class', nfolds=148)
predict.value <- predict(cvfit, x, s = "lambda.min", type = "class")
table(predict.value,newfish$Species)
```
```{r}
predict(newfish.logd,newfish.test)
```