-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathlab1.R
More file actions
133 lines (89 loc) · 3.23 KB
/
lab1.R
File metadata and controls
133 lines (89 loc) · 3.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# for bivariate data
x<-cars$speed
y<-cars$dist
plot(x,y,xlab="speed",ylab="dist",main="dist vs speed for cars data")
# change point type "pch=1,2,3,...", cex is the size of the points
plot(x,y,pch=20,col="red",xlab="speed",
ylab="dist",main="dist vs speed for cars data",cex=1.5)
# covariance
# r function
cov.byR = cov(x,y)
cov.byR
cov(x+3,y-1)
cov(2*x,y)
cov(2*x,3*y)
cov(2*x,-3*y)
# by hand
n=length(x)
cov.byHand = sum((x-mean(x))*(y-mean(y)))/(n-1)
all.equal(cov.byR,cov.byHand)
# correlation
# by r
cor.byR = cor(x,y)
cor.byR
cor(2*x+3,3*y-1)
all.equal(cor.byR,cor(2*x+3,3*y-1))
cor(2*x,-3*y-1)
# by hand
cor.byHand = cov.byHand/(sd(x)*sd(y))
all.equal(cor.byR,cor.byHand)
SXX = sum((x-mean(x))^2)
SYY = sum((y-mean(y))^2)
SXY= sum((x-mean(x))*(y-mean(y)))
cor.byHand2 = SXY / sqrt(SXX * SYY)
all.equal(cor.byHand2,cor.byHand)
# more on plot
cars
cartype = as.factor(rep(c(1,2),each = 25))
cartype
newdata = data.frame(speed =x ,dist = y , cartype = cartype)
head(newdata)
tail(newdata)
boxplot(speed~cartype,data = newdata,names = c("compact","sports"))
boxplot(speed~cartype,data = newdata,names = c("compact","sports"),
col=c("red","green"))
# scatterplot for two types of cars
plot(newdata$speed,newdata$dist,pch=20)
# you can use the with function to get rid of the "$" sign is dataframe
with(newdata,plot(speed,dist,pch=20,main="dist vs speed"))
# now we draw the two type of cars in different color
with(subset(newdata,cartype==1),plot(speed,dist,pch=20,
main="dist vs speed",col="red"))
with(subset(newdata,cartype==2),plot(speed,dist,pch=20,
main="dist vs speed",col="red"))
# it overlaps the previous plot, the solution is to use points
with(subset(newdata,cartype==1),plot(speed,dist,pch=20,xlim=c(0,30),ylim=c(0,150),cex=1.5,
main="dist vs speed",col="red"))
with(subset(newdata,cartype==2),points(speed,dist,pch=2,cex=1.5,
main="dist vs speed",col="blue"))
# adding legends to the plot
legend(0,150,legend=c("compact","sports"),pch=c(20,2),col=c("red","blue"))
# or your can use "topleft", "topright", "bottomleft","bottomright" to
# specify the location of the legend
legend("bottomright",legend=c("compact","sports"),pch=c(20,2),col=c("red","blue"))
# now some instructions on linear regression
# the syntax is similar as in 421
cars.lm = lm(dist~speed,data=newdata)
cars.lm
cars.lm$coefficients
# add the regression line to the plot: abline
# for abline, you input the slope and intercept of the line your want to draw
abline(cars.lm$coefficients[1],cars.lm$coefficients[2])
# an easier way
abline(cars.lm,col="green",lwd=2)
# you can also use abline to draw horizontal or vertical lines
# horizontal case
abline(h=50,lty=2)
# vertical casse
abline(v=15,lty=2)
cars.lm$fitted.values
points(newdata$speed,cars.lm$fitted.values,pch=1)
# or you can use
fitted(cars.lm)
all.equal(fitted(cars.lm),cars.lm$fitted.values)
summary(cars.lm)
# calculating the p-value
2*(1-pt(2.601,df=48))
2*(1-pt(9.464,df=48))
car.lm.sum = summary(cars.lm)
car.lm.sum$